aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/extractor/common.py12
2 files changed, 16 insertions, 0 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index a33dec785..48ffcbf8e 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -316,6 +316,9 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)')
+ verbosity.add_option('--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded pages to files in the current directory')
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -652,6 +655,7 @@ def _real_main(argv=None):
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
+ 'write_pages': opts.write_pages,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8b067b48d..ce349fe20 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,6 +15,7 @@ from ..utils import (
compiled_regex_type,
ExtractorError,
RegexNotFoundError,
+ sanitize_filename,
unescapeHTML,
)
@@ -182,6 +183,17 @@ class InfoExtractor(object):
self.to_screen(u'Dumping request to ' + url)
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
+ if self._downloader.params.get('write_pages', False):
+ try:
+ url = url_or_request.get_full_url()
+ except AttributeError:
+ url = url_or_request
+ raw_filename = ('%s_%s.dump' % (video_id, url))
+ filename = sanitize_filename(raw_filename, restricted=True)
+ self.to_screen(u'Saving request to ' + filename)
+ with open(filename, 'wb') as outf:
+ outf.write(webpage_bytes)
+
content = webpage_bytes.decode(encoding, 'replace')
return (content, urlh)