diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/common.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 16 | 
2 files changed, 11 insertions, 7 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1e366a13c..db472aace 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -113,6 +113,8 @@ class InfoExtractor(object):      webpage_url:    The url to the video webpage, if given to youtube-dl it                      should allow to get the same result again. (It will be set                      by YoutubeDL if it's missing) +    categories:     A list of categories that the video falls in, for example +                    ["Sports", "Berlin"]      Unless mentioned otherwise, the fields should be Unicode strings. diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f0a92d182..3c8f7f7a2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  u"uploader": u"Philipp Hagemeister",                  u"uploader_id": u"phihag",                  u"upload_date": u"20121002", -                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." +                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .", +                u"categories": [u'Science & Technology'],              }          },          { @@ -1136,18 +1137,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # upload date          upload_date = None -        mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL) +        mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)          if mobj is not None:              upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())              upload_date = unified_strdate(upload_date) - -        video_categories = [] -        # categories          m_cat_container = get_element_by_id("eow-category", video_webpage)          if m_cat_container: -            video_categories = re.findall(r'<a[^<]+>(.*?)</a>', -                                m_cat_container, re.DOTALL) +            category = self._html_search_regex( +                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'cateory', +                default=None) +            video_categories = None if category is None else [category] +        else: +            video_categories = None          # description          video_description = get_element_by_id("eow-description", video_webpage)  | 
