diff options
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 35 | 
1 files changed, 26 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b9014fc23..dc5080504 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,10 +14,12 @@ import xml.etree.ElementTree  from ..compat import (      compat_cookiejar, +    compat_cookies,      compat_HTTPError,      compat_http_client,      compat_urllib_error,      compat_urllib_parse_urlparse, +    compat_urllib_request,      compat_urlparse,      compat_str,  ) @@ -65,7 +67,7 @@ class InfoExtractor(object):                      Potential fields:                      * url        Mandatory. The URL of the video file -                    * ext        Will be calculated from url if missing +                    * ext        Will be calculated from URL if missing                      * format     A human-readable description of the format                                   ("mp4 container with h264/opus").                                   Calculated from the format_id, width, height. @@ -155,7 +157,7 @@ class InfoExtractor(object):                      lower to higher preference, each element is a dictionary                      with the "ext" entry and one of:                          * "data": The subtitles file contents -                        * "url": A url pointing to the subtitles file +                        * "url": A URL pointing to the subtitles file      automatic_captions: Like 'subtitles', used by the YoutubeIE for                      automatically generated captions      duration:       Length of the video in seconds, as an integer. @@ -176,13 +178,18 @@ class InfoExtractor(object):                                       Set to "root" to indicate that this is a                                       comment to the original video.      age_limit:      Age restriction for the video, as an integer (years) -    webpage_url:    The url to the video webpage, if given to youtube-dl it +    webpage_url:    The URL to the video webpage, if given to youtube-dl it                      should allow to get the same result again. (It will be set                      by YoutubeDL if it's missing)      categories:     A list of categories that the video falls in, for example                      ["Sports", "Berlin"] +    tags:           A list of tags assigned to the video, e.g. ["sweden", "pop music"]      is_live:        True, False, or None (=unknown). Whether this video is a                      live stream that goes on instead of a fixed-length video. +    start_time:     Time in seconds where the reproduction should start, as +                    specified in the URL. +    end_time:       Time in seconds where the reproduction should end, as +                    specified in the URL.      Unless mentioned otherwise, the fields should be Unicode strings. @@ -501,7 +508,7 @@ class InfoExtractor(object):      # Methods for following #608      @staticmethod      def url_result(url, ie=None, video_id=None, video_title=None): -        """Returns a url that points to a page that should be processed""" +        """Returns a URL that points to a page that should be processed"""          # TODO: ie should be the class used for getting the info          video_info = {'_type': 'url',                        'url': url, @@ -626,6 +633,12 @@ class InfoExtractor(object):              template % (content_re, property_re),          ] +    @staticmethod +    def _meta_regex(prop): +        return r'''(?isx)<meta +                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) +                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) +      def _og_search_property(self, prop, html, name=None, **kargs):          if name is None:              name = 'OpenGraph %s' % prop @@ -635,7 +648,7 @@ class InfoExtractor(object):          return unescapeHTML(escaped)      def _og_search_thumbnail(self, html, **kargs): -        return self._og_search_property('image', html, 'thumbnail url', fatal=False, **kargs) +        return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)      def _og_search_description(self, html, **kargs):          return self._og_search_property('description', html, fatal=False, **kargs) @@ -656,9 +669,7 @@ class InfoExtractor(object):          if display_name is None:              display_name = name          return self._html_search_regex( -            r'''(?isx)<meta -                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) -                    [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name), +            self._meta_regex(name),              html, display_name, fatal=fatal, group='content', **kwargs)      def _dc_search_uploader(self, html): @@ -1065,6 +1076,12 @@ class InfoExtractor(object):              None, '/', True, False, expire_time, '', None, None, None)          self._downloader.cookiejar.set_cookie(cookie) +    def _get_cookies(self, url): +        """ Return a compat_cookies.SimpleCookie with the cookies for the url """ +        req = compat_urllib_request.Request(url) +        self._downloader.cookiejar.add_cookie_header(req) +        return compat_cookies.SimpleCookie(req.get_header('Cookie')) +      def get_testcases(self, include_onlymatching=False):          t = getattr(self, '_TEST', None)          if t: @@ -1116,7 +1133,7 @@ class InfoExtractor(object):  class SearchInfoExtractor(InfoExtractor):      """      Base class for paged search queries extractors. -    They accept urls in the format _SEARCH_KEY(|all|[0-9]):{query} +    They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}      Instances should define _SEARCH_KEY and _MAX_RESULTS.      """ | 
