diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-12-14 21:31:53 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-12-14 21:31:53 +0600 | 
| commit | 0cb58b0259de0b0f44b0326d492b98a8eeb6316e (patch) | |
| tree | 5570fa8d65035c697abf4f895b33d9ad15e7deca | |
| parent | 31b2051e211f3e2691a186d16733cf91eb4ab391 (diff) | |
[youtube] Extract alt_title and creator for music videos (Closes #7862)
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 22 | 
1 files changed, 22 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9b39505ba..4556a16fb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -33,6 +33,7 @@ from ..utils import (      int_or_none,      orderedSet,      parse_duration, +    remove_quotes,      remove_start,      sanitized_Request,      smuggle_url, @@ -395,12 +396,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20120506',                  'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', +                'alt_title': 'I Love It (feat. Charli XCX)',                  'description': 'md5:782e8651347686cba06e58f71ab51773',                  'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',                           'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',                           'iconic ep', 'iconic', 'love', 'it'],                  'uploader': 'Icona Pop',                  'uploader_id': 'IconaPop', +                'creator': 'Icona Pop',              }          },          { @@ -411,9 +414,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20130703',                  'title': 'Justin Timberlake - Tunnel Vision (Explicit)', +                'alt_title': 'Tunnel Vision',                  'description': 'md5:64249768eec3bc4276236606ea996373',                  'uploader': 'justintimberlakeVEVO',                  'uploader_id': 'justintimberlakeVEVO', +                'creator': 'Justin Timberlake',                  'age_limit': 18,              }          }, @@ -492,10 +497,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'id': 'nfWlot6h_JM',                  'ext': 'm4a',                  'title': 'Taylor Swift - Shake It Off', +                'alt_title': 'Shake It Off',                  'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',                  'uploader': 'TaylorSwiftVEVO',                  'uploader_id': 'TaylorSwiftVEVO',                  'upload_date': '20140818', +                'creator': 'Taylor Swift',              },              'params': {                  'youtube_include_dash_manifest': True, @@ -551,9 +558,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'ext': 'mp4',                  'upload_date': '20100430',                  'uploader_id': 'deadmau5', +                'creator': 'deadmau5',                  'description': 'md5:12c56784b8032162bb936a5f76d55360',                  'uploader': 'deadmau5',                  'title': 'Deadmau5 - Some Chords (HD)', +                'alt_title': 'Some Chords',              },              'expected_warnings': [                  'DASH manifest missing', @@ -701,10 +710,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'id': 'lsguqyKfVQg',                  'ext': 'mp4',                  'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', +                'alt_title': 'Dark Walk',                  'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',                  'upload_date': '20151119',                  'uploader_id': 'IronSoulElf',                  'uploader': 'IronSoulElf', +                'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',              },              'params': {                  'skip_download': True, @@ -1308,6 +1319,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())          upload_date = unified_strdate(upload_date) +        m_music = re.search( +            r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li', +            video_webpage) +        if m_music: +            video_alt_title = remove_quotes(unescapeHTML(m_music.group('title'))) +            video_creator = clean_html(m_music.group('creator')) +        else: +            video_alt_title = video_creator = None +          m_cat_container = self._search_regex(              r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',              video_webpage, 'categories', default=None) @@ -1537,7 +1557,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              'uploader': video_uploader,              'uploader_id': video_uploader_id,              'upload_date': upload_date, +            'creator': video_creator,              'title': video_title, +            'alt_title': video_alt_title,              'thumbnail': video_thumbnail,              'description': video_description,              'categories': video_categories, | 
