diff options
Diffstat (limited to 'youtube_dl/extractor/udn.py')
| -rw-r--r-- | youtube_dl/extractor/udn.py | 62 | 
1 files changed, 41 insertions, 21 deletions
| diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index ee35b7227..57dd73aef 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -2,10 +2,13 @@  from __future__ import unicode_literals  import json +import re +  from .common import InfoExtractor  from ..utils import ( +    determine_ext, +    int_or_none,      js_to_json, -    ExtractorError,  )  from ..compat import compat_urlparse @@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor):      _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL      _TESTS = [{          'url': 'http://video.udn.com/embed/news/300040', -        'md5': 'de06b4c90b042c128395a88f0384817e',          'info_dict': {              'id': '300040',              'ext': 'mp4',              'title': '生物老師男變女 全校挺"做自己"',              'thumbnail': 're:^https?://.*\.jpg$', -        } +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        },      }, {          'url': 'https://video.udn.com/embed/news/300040',          'only_matching': True, @@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor):          page = self._download_webpage(url, video_id)          options = json.loads(js_to_json(self._html_search_regex( -            r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) +            r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))          video_urls = options['video']          if video_urls.get('youtube'):              return self.url_result(video_urls.get('youtube'), 'Youtube') -        try: -            del video_urls['youtube'] -        except KeyError: -            pass +        formats = [] +        for video_type, api_url in video_urls.items(): +            if not api_url: +                continue -        formats = [{ -            'url': self._download_webpage( +            video_url = self._download_webpage(                  compat_urlparse.urljoin(url, api_url), video_id, -                'retrieve url for %s video' % video_type), -            'format_id': video_type, -            'preference': 0 if video_type == 'mp4' else -1, -        } for video_type, api_url in video_urls.items() if api_url] +                note='retrieve url for %s video' % video_type) -        if not formats: -            raise ExtractorError('No videos found', expected=True) +            ext = determine_ext(video_url) +            if ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    video_url, video_id, ext='mp4', m3u8_id='hls')) +            elif ext == 'f4m': +                formats.extend(self._extract_f4m_formats( +                    video_url, video_id, f4m_id='hds')) +            else: +                mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url) +                a_format = { +                    'url': video_url, +                    # video_type may be 'mp4', which confuses YoutubeDL +                    'format_id': 'http-' + video_type, +                } +                if mobj: +                    a_format.update({ +                        'height': int_or_none(mobj.group('height')), +                        'tbr': int_or_none(mobj.group('tbr')), +                    }) +                formats.append(a_format)          self._sort_formats(formats) -        thumbnail = None - -        if options.get('gallery') and len(options['gallery']): -            thumbnail = options['gallery'][0].get('original') +        thumbnails = [{ +            'url': img_url, +            'id': img_type, +        } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]          return {              'id': video_id,              'formats': formats,              'title': options['title'], -            'thumbnail': thumbnail +            'thumbnails': thumbnails,          } | 
