diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/tbs.py | 134 | ||||
| -rw-r--r-- | youtube_dl/extractor/turner.py | 48 | 
2 files changed, 120 insertions, 62 deletions
diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index e9474533f..460bc5d74 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -4,58 +4,110 @@ from __future__ import unicode_literals  import re  from .turner import TurnerBaseIE -from ..utils import extract_attributes +from ..utils import ( +    float_or_none, +    int_or_none, +    strip_or_none, +)  class TBSIE(TurnerBaseIE): -    # https://github.com/rg3/youtube-dl/issues/13658 -    _WORKING = False - -    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' +    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'      _TESTS = [{ -        'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', -        'md5': '9e61d680e2285066ade7199e6408b2ee', +        'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',          'info_dict': { -            'id': '2007318', +            'id': '8d384cde33b89f3a43ce5329de42903ed5099887',              'ext': 'mp4', -            'title': 'Theatrical Trailer', -            'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', +            'title': 'Monster', +            'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', +            'timestamp': 1508175329, +            'upload_date': '20171016',          }, -        'skip': 'TBS videos are deleted after a while', +        'params': { +            # m3u8 download +            'skip_download': True, +        }      }, { -        'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', -        'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', -        'info_dict': { -            'id': '1538823', -            'ext': 'mp4', -            'title': 'You Better Run', -            'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', -        }, -        'skip': 'TBS videos are deleted after a while', +        'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', +        'only_matching': True, +    }, { +        'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', +        'only_matching': True,      }]      def _real_extract(self, url):          domain, display_id = re.match(self._VALID_URL, url).groups()          site = domain[:3]          webpage = self._download_webpage(url, display_id) -        video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) -        query = None -        clip_id = video_params.get('clipid') -        if clip_id: -            query = 'id=' + clip_id -        else: -            query = 'titleId=' + video_params['titleid'] -        return self._extract_cvp_info( -            'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, { -                'default': { -                    'media_src': 'http://ht.cdn.turner.com/%s/big' % site, -                }, -                'secure': { -                    'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, -                    'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, -                }, -            }, { -                'url': url, -                'site_name': site.upper(), -                'auth_required': video_params.get('isAuthRequired') != 'false', -            }) +        video_data = self._parse_json(self._search_regex( +            r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', +            webpage, 'drupal setting'), display_id)['turner_playlist'][0] + +        media_id = video_data['mediaID'] +        title = video_data['title'] + +        streams_data = self._download_json( +            'http://medium.ngtv.io/media/%s/tv' % media_id, +            media_id)['media']['tv'] +        duration = None +        chapters = [] +        formats = [] +        for supported_type in ('unprotected', 'bulkaes'): +            stream_data = streams_data.get(supported_type, {}) +            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url') +            if not m3u8_url: +                continue +            if stream_data.get('playlistProtection') == 'spe': +                m3u8_url = self._add_akamai_spe_token( +                    'http://www.%s.com/service/token_spe' % site, +                    m3u8_url, media_id, { +                        'url': url, +                        'site_name': site.upper(), +                        'auth_required': video_data.get('authRequired') == '1', +                    }) +            formats.extend(self._extract_m3u8_formats( +                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) + +            duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration')) + +            if not chapters: +                for chapter in stream_data.get('contentSegments', []): +                    start_time = float_or_none(chapter.get('start')) +                    duration = float_or_none(chapter.get('duration')) +                    if start_time is None or duration is None: +                        continue +                    chapters.append({ +                        'start_time': start_time, +                        'end_time': start_time + duration, +                    }) +        self._sort_formats(formats) + +        thumbnails = [] +        for image_id, image in video_data.get('images', {}).items(): +            image_url = image.get('url') +            if not image_url or image.get('type') != 'video': +                continue +            i = { +                'id': image_id, +                'url': image_url, +            } +            mobj = re.search(r'(\d+)x(\d+)', image_url) +            if mobj: +                i.update({ +                    'width': int(mobj.group(1)), +                    'height': int(mobj.group(2)), +                }) +            thumbnails.append(i) + +        return { +            'id': media_id, +            'title': title, +            'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')), +            'duration': duration, +            'timestamp': int_or_none(video_data.get('created')), +            'season_number': int_or_none(video_data.get('season')), +            'episode_number': int_or_none(video_data.get('episode')), +            'cahpters': chapters, +            'thumbnails': thumbnails, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index efeb677ee..e73b64aeb 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -18,9 +18,32 @@ from ..utils import (  class TurnerBaseIE(AdobePassIE): +    _AKAMAI_SPE_TOKEN_CACHE = {} +      def _extract_timestamp(self, video_data):          return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) +    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data): +        secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' +        token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path) +        if not token: +            query = { +                'path': secure_path, +                'videoId': content_id, +            } +            if ap_data.get('auth_required'): +                query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name']) +            auth = self._download_xml( +                tokenizer_src, content_id, query=query) +            error_msg = xpath_text(auth, 'error/msg') +            if error_msg: +                raise ExtractorError(error_msg, expected=True) +            token = xpath_text(auth, 'token') +            if not token: +                return video_url +            self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token +        return video_url + '?hdnea=' + token +      def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):          video_data = self._download_xml(data_src, video_id)          video_id = video_data.attrib['id'] @@ -33,7 +56,6 @@ class TurnerBaseIE(AdobePassIE):          #         rtmp_src = splited_rtmp_src[1]          # aifp = xpath_text(video_data, 'akamai/aifp', default='') -        tokens = {}          urls = []          formats = []          rex = re.compile( @@ -67,26 +89,10 @@ class TurnerBaseIE(AdobePassIE):                  secure_path_data = path_data.get('secure')                  if not secure_path_data:                      continue -                video_url = secure_path_data['media_src'] + video_url -                secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' -                token = tokens.get(secure_path) -                if not token: -                    query = { -                        'path': secure_path, -                        'videoId': content_id, -                    } -                    if ap_data.get('auth_required'): -                        query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name']) -                    auth = self._download_xml( -                        secure_path_data['tokenizer_src'], video_id, query=query) -                    error_msg = xpath_text(auth, 'error/msg') -                    if error_msg: -                        raise ExtractorError(error_msg, expected=True) -                    token = xpath_text(auth, 'token') -                    if not token: -                        continue -                    tokens[secure_path] = token -                video_url = video_url + '?hdnea=' + token +                video_url = self._add_akamai_spe_token( +                    secure_path_data['tokenizer_src'], +                    secure_path_data['media_src'] + video_url, +                    content_id, ap_data)              elif not re.match('https?://', video_url):                  base_path_data = path_data.get(ext, path_data.get('default', {}))                  media_src = base_path_data.get('media_src')  | 
