diff options
| author | remitamine <remitamine@gmail.com> | 2015-12-25 01:59:56 +0100 | 
|---|---|---|
| committer | remitamine <remitamine@gmail.com> | 2015-12-25 01:59:56 +0100 | 
| commit | fb8e402ad252bebc54c36c5acca1b7ef05416c9b (patch) | |
| tree | 719bdcb591d89c9368384d16c0410f901f47106c | |
| parent | c24044635b6e7f67274899e7c5e7a5efcce947af (diff) | |
[hotstar] Add new extractor
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/hotstar.py | 79 | 
2 files changed, 80 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 365c0b86f..dd7e23d80 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -253,6 +253,7 @@ from .history import HistoryIE  from .hitbox import HitboxIE, HitboxLiveIE  from .hornbunny import HornBunnyIE  from .hotnewhiphop import HotNewHipHopIE +from .hotstar import HotStarIE  from .howcast import HowcastIE  from .howstuffworks import HowStuffWorksIE  from .huffpost import HuffPostIE diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py new file mode 100644 index 000000000..05d27e75d --- /dev/null +++ b/youtube_dl/extractor/hotstar.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    determine_ext, +    int_or_none, +) + + +class HotStarIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})' +    _TEST = { +        'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', +        'info_dict': { +            'id': '1000076273', +            'ext': 'mp4', +            'title': 'On Air With AIB - English', +            'description': 'md5:c957d8868e9bc793ccb813691cc4c434', +            'timestamp': 1447227000, +            'upload_date': '20151111', +            'duration': 381, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        } +    } + +    _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' +    _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' + +    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): +        json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) +        if json_data['resultCode'] != 'OK': +            if fatal: +                raise ExtractorError(json_data['errorDescription']) +            return None +        return json_data['resultObj'] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        video_data = self._download_json( +            self._GET_CONTENT_TEMPLATE % video_id, +            video_id)['contentInfo'][0] + +        formats = [] +        # PCTV for extracting f4m manifest +        for f in ('TABLET',): +            format_data = self._download_json( +                self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'), +                video_id, 'Downloading %s JSON metadata' % f, fatal=False) +            if format_data: +                format_url = format_data['src'] +                ext = determine_ext(format_url) +                if ext == 'm3u8': +                    m3u8_formats = self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False) +                    if m3u8_formats: +                        formats.extend(m3u8_formats) +                elif ext == 'f4m': +                    # produce broken files +                    continue +                else: +                    formats.append({ +                        'url': format_url, +                        'width': int_or_none(format_data.get('width')), +                        'height': int_or_none(format_data.get('height')), +                    }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': video_data['episodeTitle'], +            'description': video_data.get('description'), +            'duration': int_or_none(video_data.get('duration')), +            'timestamp': int_or_none(video_data.get('broadcastDate')), +            'formats': formats, +        } | 
