diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/comedycentral.py | 63 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/mtv.py | 47 | ||||
| -rw-r--r-- | youtube_dl/extractor/ted.py | 22 | 
4 files changed, 86 insertions, 63 deletions
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index bf8d711ee..69b2beece 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor):          '400': 'mp4',      }      _video_dimensions = { -        '3500': '1280x720', -        '2200': '960x540', -        '1700': '768x432', -        '1200': '640x360', -        '750': '512x288', -        '400': '384x216', +        '3500': (1280, 720), +        '2200': (960, 540), +        '1700': (768, 432), +        '1200': (640, 360), +        '750': (512, 288), +        '400': (384, 216),      }      @classmethod @@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor):          """Receives a URL and returns True if suitable for this IE."""          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None -    def _print_formats(self, formats): -        print('Available formats:') -        for x in formats: -            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) - +    @staticmethod +    def _transform_rtmp_url(rtmp_video_url): +        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) +        if not m: +            raise ExtractorError(u'Cannot transform RTMP url') +        base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' +        return base + m.group('finalid')      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor):                  self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')                  continue -            if self._downloader.params.get('listformats', None): -                self._print_formats([i[0] for i in turls]) -                return - -            # For now, just pick the highest bitrate -            format,rtmp_video_url = turls[-1] - -            # Get the format arg from the arg stream -            req_format = self._downloader.params.get('format', None) - -            # Select format if we can find one -            for f,v in turls: -                if f == req_format: -                    format, rtmp_video_url = f, v -                    break - -            m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) -            if not m: -                raise ExtractorError(u'Cannot transform RTMP url') -            base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' -            video_url = base + m.group('finalid') +            formats = [] +            for format, rtmp_video_url in turls: +                w, h = self._video_dimensions.get(format, (None, None)) +                formats.append({ +                    'url': self._transform_rtmp_url(rtmp_video_url), +                    'ext': self._video_extensions.get(format, 'mp4'), +                    'format_id': format, +                    'height': h, +                    'width': w, +                })              effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)              info = {                  'id': shortMediaId, -                'url': video_url, +                'formats': formats,                  'uploader': showId,                  'upload_date': officialDate,                  'title': effTitle, -                'ext': 'mp4', -                'format': format,                  'thumbnail': None,                  'description': compat_str(officialTitle),              } + +            # TODO: Remove when #980 has been merged +            info.update(info['formats'][-1]) +              results.append(info)          return results diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 77726ee24..69cdcdc1b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -35,6 +35,8 @@ class InfoExtractor(object):      title:          Video title, unescaped.      ext:            Video filename extension. +    Instead of url and ext, formats can also specified. +      The following fields are optional:      format:         The video format, defaults to ext (used for --get-format) @@ -52,8 +54,19 @@ class InfoExtractor(object):      view_count:     How many users have watched the video on the platform.      urlhandle:      [internal] The urlHandle to be used to download the file,                      like returned by urllib.request.urlopen - -    The fields should all be Unicode strings. +    formats:        A list of dictionaries for each format available, it must +                    be ordered from worst to best quality. Potential fields: +                    * url       Mandatory. The URL of the video file +                    * ext       Will be calculated from url if missing +                    * format    A human-readable description of the format +                                ("mp4 container with h264/opus"). +                                Calculated from width and height if missing. +                    * format_id A short description of the format +                                ("mp4_h264_opus" or "19") +                    * width     Width of the video, if known +                    * height    Height of the video, if known + +    Unless mentioned otherwise, the fields should be Unicode strings.      Subclasses of this one should re-define the _real_initialize() and      _real_extract() methods and define a _VALID_URL regexp. diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8f956571d..001a576a8 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -54,23 +54,26 @@ class MTVIE(InfoExtractor):      def _get_thumbnail_url(self, uri, itemdoc):          return 'http://mtv.mtvnimages.com/uri/' + uri -    def _extract_video_url(self, metadataXml): +    def _extract_video_formats(self, metadataXml):          if '/error_country_block.swf' in metadataXml:              raise ExtractorError(u'This video is not available from your country.', expected=True)          mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))          renditions = mdoc.findall('.//rendition') -        # For now, always pick the highest quality. -        rendition = renditions[-1] - -        try: -            _,_,ext = rendition.attrib['type'].partition('/') -            format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] -            rtmp_video_url = rendition.find('./src').text -        except KeyError: -            raise ExtractorError('Invalid rendition field.') -        video_url = self._transform_rtmp_url(rtmp_video_url) -        return {'ext': ext, 'url': video_url, 'format': format} +        formats = [] +        for rendition in mdoc.findall('.//rendition'): +            try: +                _, _, ext = rendition.attrib['type'].partition('/') +                rtmp_video_url = rendition.find('./src').text +                formats.append({'ext': ext, +                                'url': self._transform_rtmp_url(rtmp_video_url), +                                'format_id': rendition.get('bitrate'), +                                'width': int(rendition.get('width')), +                                'height': int(rendition.get('height')), +                                }) +            except (KeyError, TypeError): +                raise ExtractorError('Invalid rendition field.') +        return formats      def _get_video_info(self, itemdoc):          uri = itemdoc.find('guid').text @@ -81,19 +84,25 @@ class MTVIE(InfoExtractor):              mediagen_url += '&acceptMethods=fms'          mediagen_page = self._download_webpage(mediagen_url, video_id,                                                 u'Downloading video urls') -        video_info = self._extract_video_url(mediagen_page)          description_node = itemdoc.find('description')          if description_node is not None:              description = description_node.text          else:              description = None -        video_info.update({'title': itemdoc.find('title').text, -                           'id': video_id, -                           'thumbnail': self._get_thumbnail_url(uri, itemdoc), -                           'description': description, -                           }) -        return video_info + +        info = { +            'title': itemdoc.find('title').text, +            'formats': self._extract_video_formats(mediagen_page), +            'id': video_id, +            'thumbnail': self._get_thumbnail_url(uri, itemdoc), +            'description': description, +        } + +        # TODO: Remove when #980 has been merged +        info.update(info['formats'][-1]) + +        return info      def _get_videos_info(self, uri):          video_id = self._id_from_uri(uri) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 4c11f7a03..dfa1176a3 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -77,12 +77,20 @@ class TEDIE(InfoExtractor):          thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',                                         webpage, 'thumbnail') +        formats = [{ +            'ext': 'mp4', +            'url': stream['file'], +            'format': stream['id'] +            } for stream in info['htmlStreams']]          info = { -                'id': info['id'], -                'url': info['htmlStreams'][-1]['file'], -                'ext': 'mp4', -                'title': title, -                'thumbnail': thumbnail, -                'description': desc, -                } +            'id': info['id'], +            'title': title, +            'thumbnail': thumbnail, +            'description': desc, +            'formats': formats, +        } + +        # TODO: Remove when #980 has been merged +        info.update(info['formats'][-1]) +          return info  | 
