aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/youtube.py126
1 files changed, 99 insertions, 27 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4023a6e50..afe0a781b 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -33,9 +33,11 @@ from ..utils import (
int_or_none,
orderedSet,
parse_duration,
+ smuggle_url,
str_to_int,
unescapeHTML,
unified_strdate,
+ unsmuggle_url,
uppercase_escape,
ISO3166Utils,
)
@@ -558,6 +560,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format': '135', # bestvideo
}
},
+ {
+ # Multifeed videos (multiple cameras), URL is for Main Camera
+ 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'title': 'teamPGP: Rocket League Noob Stream',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6h8e8xoXJzg',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'PUOgX5z9xZw',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'teuwxikvS5k',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (zim)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }
]
def __init__(self, *args, **kwargs):
@@ -889,6 +944,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return formats
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
proto = (
'http' if self._downloader.params.get('prefer_insecure', False)
else 'https')
@@ -1005,6 +1062,48 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'"token" parameter not in video info for unknown reason',
video_id=video_id)
+ # title
+ if 'title' in video_info:
+ video_title = video_info['title'][0]
+ else:
+ self._downloader.report_warning('Unable to extract video title')
+ video_title = '_'
+
+ # description
+ video_description = get_element_by_id("eow-description", video_webpage)
+ if video_description:
+ video_description = re.sub(r'''(?x)
+ <a\s+
+ (?:[a-zA-Z-]+="[^"]+"\s+)*?
+ title="([^"]+)"\s+
+ (?:[a-zA-Z-]+="[^"]+"\s+)*?
+ class="yt-uix-redirect-link"\s*>
+ [^<]+
+ </a>
+ ''', r'\1', video_description)
+ video_description = clean_html(video_description)
+ else:
+ fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
+ if fd_mobj:
+ video_description = unescapeHTML(fd_mobj.group(1))
+ else:
+ video_description = ''
+
+ if not smuggled_data.get('force_singlefeed', False) and 'multifeed_metadata_list' in video_info:
+ entries = []
+ multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
+ for feed in multifeed_metadata_list.split(','):
+ feed_data = compat_parse_qs(feed)
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
+ })
+ return self.playlist_result(entries, video_id, video_title, video_description)
+
if 'view_count' in video_info:
view_count = int(video_info['view_count'][0])
else:
@@ -1030,13 +1129,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
self._downloader.report_warning('unable to extract uploader nickname')
- # title
- if 'title' in video_info:
- video_title = video_info['title'][0]
- else:
- self._downloader.report_warning('Unable to extract video title')
- video_title = '_'
-
# thumbnail image
# We try first to get a high quality image:
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@@ -1072,26 +1164,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
video_categories = None
- # description
- video_description = get_element_by_id("eow-description", video_webpage)
- if video_description:
- video_description = re.sub(r'''(?x)
- <a\s+
- (?:[a-zA-Z-]+="[^"]+"\s+)*?
- title="([^"]+)"\s+
- (?:[a-zA-Z-]+="[^"]+"\s+)*?
- class="yt-uix-redirect-link"\s*>
- [^<]+
- </a>
- ''', r'\1', video_description)
- video_description = clean_html(video_description)
- else:
- fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
- if fd_mobj:
- video_description = unescapeHTML(fd_mobj.group(1))
- else:
- video_description = ''
-
def _extract_count(count_name):
return str_to_int(self._search_regex(
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'