diff options
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 32 | 
1 files changed, 24 insertions, 8 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2a5a85dc6..cef4dce85 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,8 @@ from ..utils import (      clean_html,      compiled_regex_type,      ExtractorError, +    RegexNotFoundError, +    sanitize_filename,      unescapeHTML,  ) @@ -61,9 +63,12 @@ class InfoExtractor(object):                      * ext       Will be calculated from url if missing                      * format    A human-readable description of the format                                  ("mp4 container with h264/opus"). -                                Calculated from width and height if missing. +                                Calculated from the format_id, width, height. +                                and format_note fields if missing.                      * format_id A short description of the format                                  ("mp4_h264_opus" or "19") +                    * format_note Additional info about the format +                                ("3D" or "DASH video")                      * width     Width of the video, if known                      * height    Height of the video, if known @@ -178,6 +183,17 @@ class InfoExtractor(object):              self.to_screen(u'Dumping request to ' + url)              dump = base64.b64encode(webpage_bytes).decode('ascii')              self._downloader.to_screen(dump) +        if self._downloader.params.get('write_pages', False): +            try: +                url = url_or_request.get_full_url() +            except AttributeError: +                url = url_or_request +            raw_filename = ('%s_%s.dump' % (video_id, url)) +            filename = sanitize_filename(raw_filename, restricted=True) +            self.to_screen(u'Saving request to ' + filename) +            with open(filename, 'wb') as outf: +                outf.write(webpage_bytes) +          content = webpage_bytes.decode(encoding, 'replace')          return (content, urlh) @@ -228,7 +244,7 @@ class InfoExtractor(object):          Perform a regex search on the given string, using a single or a list of          patterns returning the first matching group.          In case of failure return a default value or raise a WARNING or a -        ExtractorError, depending on fatal, specifying the field name. +        RegexNotFoundError, depending on fatal, specifying the field name.          """          if isinstance(pattern, (str, compat_str, compiled_regex_type)):              mobj = re.search(pattern, string, flags) @@ -248,7 +264,7 @@ class InfoExtractor(object):          elif default is not None:              return default          elif fatal: -            raise ExtractorError(u'Unable to extract %s' % _name) +            raise RegexNotFoundError(u'Unable to extract %s' % _name)          else:              self._downloader.report_warning(u'unable to extract %s; '                  u'please report this issue on http://yt-dl.org/bug' % _name) @@ -314,10 +330,10 @@ class InfoExtractor(object):      def _og_search_title(self, html, **kargs):          return self._og_search_property('title', html, **kargs) -    def _og_search_video_url(self, html, name='video url', **kargs): -        return self._html_search_regex([self._og_regex('video:secure_url'), -                                        self._og_regex('video')], -                                       html, name, **kargs) +    def _og_search_video_url(self, html, name='video url', secure=True, **kargs): +        regexes = [self._og_regex('video')] +        if secure: regexes.insert(0, self._og_regex('video:secure_url')) +        return self._html_search_regex(regexes, html, name, **kargs)      def _rta_search(self, html):          # See http://www.rtalabel.org/index.php?content=howtofaq#single @@ -365,7 +381,7 @@ class SearchInfoExtractor(InfoExtractor):      def _get_n_results(self, query, n):          """Get a specified number of results for a query""" -        raise NotImplementedError("This method must be implemented by sublclasses") +        raise NotImplementedError("This method must be implemented by subclasses")      @property      def SEARCH_KEY(self): | 
