aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/extractor/comedycentral.py63
-rw-r--r--youtube_dl/extractor/common.py17
-rw-r--r--youtube_dl/extractor/mtv.py47
-rw-r--r--youtube_dl/extractor/ted.py22
4 files changed, 86 insertions, 63 deletions
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index bf8d711ee..69b2beece 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor):
'400': 'mp4',
}
_video_dimensions = {
- '3500': '1280x720',
- '2200': '960x540',
- '1700': '768x432',
- '1200': '640x360',
- '750': '512x288',
- '400': '384x216',
+ '3500': (1280, 720),
+ '2200': (960, 540),
+ '1700': (768, 432),
+ '1200': (640, 360),
+ '750': (512, 288),
+ '400': (384, 216),
}
@classmethod
@@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
- def _print_formats(self, formats):
- print('Available formats:')
- for x in formats:
- print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
-
+ @staticmethod
+ def _transform_rtmp_url(rtmp_video_url):
+ m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+ if not m:
+ raise ExtractorError(u'Cannot transform RTMP url')
+ base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+ return base + m.group('finalid')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor):
self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
continue
- if self._downloader.params.get('listformats', None):
- self._print_formats([i[0] for i in turls])
- return
-
- # For now, just pick the highest bitrate
- format,rtmp_video_url = turls[-1]
-
- # Get the format arg from the arg stream
- req_format = self._downloader.params.get('format', None)
-
- # Select format if we can find one
- for f,v in turls:
- if f == req_format:
- format, rtmp_video_url = f, v
- break
-
- m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
- if not m:
- raise ExtractorError(u'Cannot transform RTMP url')
- base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
- video_url = base + m.group('finalid')
+ formats = []
+ for format, rtmp_video_url in turls:
+ w, h = self._video_dimensions.get(format, (None, None))
+ formats.append({
+ 'url': self._transform_rtmp_url(rtmp_video_url),
+ 'ext': self._video_extensions.get(format, 'mp4'),
+ 'format_id': format,
+ 'height': h,
+ 'width': w,
+ })
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = {
'id': shortMediaId,
- 'url': video_url,
+ 'formats': formats,
'uploader': showId,
'upload_date': officialDate,
'title': effTitle,
- 'ext': 'mp4',
- 'format': format,
'thumbnail': None,
'description': compat_str(officialTitle),
}
+
+ # TODO: Remove when #980 has been merged
+ info.update(info['formats'][-1])
+
results.append(info)
return results
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 77726ee24..69cdcdc1b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -35,6 +35,8 @@ class InfoExtractor(object):
title: Video title, unescaped.
ext: Video filename extension.
+ Instead of url and ext, formats can also specified.
+
The following fields are optional:
format: The video format, defaults to ext (used for --get-format)
@@ -52,8 +54,19 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
-
- The fields should all be Unicode strings.
+ formats: A list of dictionaries for each format available, it must
+ be ordered from worst to best quality. Potential fields:
+ * url Mandatory. The URL of the video file
+ * ext Will be calculated from url if missing
+ * format A human-readable description of the format
+ ("mp4 container with h264/opus").
+ Calculated from width and height if missing.
+ * format_id A short description of the format
+ ("mp4_h264_opus" or "19")
+ * width Width of the video, if known
+ * height Height of the video, if known
+
+ Unless mentioned otherwise, the fields should be Unicode strings.
Subclasses of this one should re-define the _real_initialize() and
_real_extract() methods and define a _VALID_URL regexp.
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 8f956571d..001a576a8 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -54,23 +54,26 @@ class MTVIE(InfoExtractor):
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
- def _extract_video_url(self, metadataXml):
+ def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml:
raise ExtractorError(u'This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
renditions = mdoc.findall('.//rendition')
- # For now, always pick the highest quality.
- rendition = renditions[-1]
-
- try:
- _,_,ext = rendition.attrib['type'].partition('/')
- format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
- rtmp_video_url = rendition.find('./src').text
- except KeyError:
- raise ExtractorError('Invalid rendition field.')
- video_url = self._transform_rtmp_url(rtmp_video_url)
- return {'ext': ext, 'url': video_url, 'format': format}
+ formats = []
+ for rendition in mdoc.findall('.//rendition'):
+ try:
+ _, _, ext = rendition.attrib['type'].partition('/')
+ rtmp_video_url = rendition.find('./src').text
+ formats.append({'ext': ext,
+ 'url': self._transform_rtmp_url(rtmp_video_url),
+ 'format_id': rendition.get('bitrate'),
+ 'width': int(rendition.get('width')),
+ 'height': int(rendition.get('height')),
+ })
+ except (KeyError, TypeError):
+ raise ExtractorError('Invalid rendition field.')
+ return formats
def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text
@@ -81,19 +84,25 @@ class MTVIE(InfoExtractor):
mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,
u'Downloading video urls')
- video_info = self._extract_video_url(mediagen_page)
description_node = itemdoc.find('description')
if description_node is not None:
description = description_node.text
else:
description = None
- video_info.update({'title': itemdoc.find('title').text,
- 'id': video_id,
- 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
- 'description': description,
- })
- return video_info
+
+ info = {
+ 'title': itemdoc.find('title').text,
+ 'formats': self._extract_video_formats(mediagen_page),
+ 'id': video_id,
+ 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
+ 'description': description,
+ }
+
+ # TODO: Remove when #980 has been merged
+ info.update(info['formats'][-1])
+
+ return info
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 4c11f7a03..dfa1176a3 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -77,12 +77,20 @@ class TEDIE(InfoExtractor):
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
webpage, 'thumbnail')
+ formats = [{
+ 'ext': 'mp4',
+ 'url': stream['file'],
+ 'format': stream['id']
+ } for stream in info['htmlStreams']]
info = {
- 'id': info['id'],
- 'url': info['htmlStreams'][-1]['file'],
- 'ext': 'mp4',
- 'title': title,
- 'thumbnail': thumbnail,
- 'description': desc,
- }
+ 'id': info['id'],
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'description': desc,
+ 'formats': formats,
+ }
+
+ # TODO: Remove when #980 has been merged
+ info.update(info['formats'][-1])
+
return info