diff options
| -rw-r--r-- | youtube_dl/extractor/dramafever.py | 174 | 
1 files changed, 102 insertions, 72 deletions
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 40787ffcd..0f33a61a3 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -1,104 +1,111 @@  # encoding: utf-8  from __future__ import unicode_literals -import re +import itertools  from .common import InfoExtractor +from ..compat import ( +    compat_HTTPError, +    compat_urlparse, +) +from ..utils import ( +    ExtractorError, +    clean_html, +    determine_ext, +    int_or_none, +    parse_iso8601, +)  class DramaFeverIE(InfoExtractor):      IE_NAME = 'dramafever' -    _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)/' -    _TESTS = [{ +    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)' +    _TEST = {          'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',          'info_dict': {              'id': '4512.1',              'ext': 'flv',              'title': 'Cooking with Shin 4512.1', +            'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', +            'thumbnail': 're:^https?://.*\.jpg', +            'timestamp': 1404336058,              'upload_date': '20140702', -            'description': 'Served at all special occasions and featured in the hit drama Heirs, Shin cooks Red Bean Rice.', +            'duration': 343,          } -    }] +    }      def _real_extract(self, url): -        video_id = self._match_id(url).replace("/", ".") - -        consumer_secret = self._get_consumer_secret(video_id) +        video_id = self._match_id(url).replace('/', '.') -        ep_json = self._download_json( -            "http://www.dramafever.com/amp/episode/feed.json?guid=%s" % video_id, -            video_id, note='Downloading episode metadata', -            errnote="Video may not be available for your location")["channel"]["item"] +        try: +            feed = self._download_json( +                'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, +                video_id, 'Downloading episode JSON')['channel']['item'] +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError): +                raise ExtractorError( +                    'Currently unavailable in your country.', expected=True) +            raise -        title = ep_json["media-group"]["media-title"] -        description = ep_json["media-group"]["media-description"] -        thumbnail = ep_json["media-group"]["media-thumbnail"]["@attributes"]["url"] -        duration = int(ep_json["media-group"]["media-content"][0]["@attributes"]["duration"]) -        mobj = re.match(r"([0-9]{4})-([0-9]{2})-([0-9]{2})", ep_json["pubDate"]) -        upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) if mobj is not None else None +        media_group = feed.get('media-group', {})          formats = [] -        for vid_format in ep_json["media-group"]["media-content"]: -            src = vid_format["@attributes"]["url"] -            if '.f4m' in src: -                formats.extend(self._extract_f4m_formats(src, video_id)) - +        for media_content in media_group['media-content']: +            src = media_content.get('@attributes', {}).get('url') +            if not src: +                continue +            ext = determine_ext(src) +            if ext == 'f4m': +                formats.extend(self._extract_f4m_formats( +                    src, video_id, f4m_id='hds')) +            elif ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    src, video_id, 'mp4', m3u8_id='hls')) +            else: +                formats.append({ +                    'url': src, +                })          self._sort_formats(formats) -        video_subtitles = self.extract_subtitles(video_id, consumer_secret) + +        title = media_group.get('media-title') +        description = media_group.get('media-description') +        duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration')) +        thumbnail = self._proto_relative_url( +            media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url')) +        timestamp = parse_iso8601(feed.get('pubDate'), ' ') + +        subtitles = {} +        for media_subtitle in media_group.get('media-subTitle', []): +            lang = media_subtitle.get('@attributes', {}).get('lang') +            href = media_subtitle.get('@attributes', {}).get('href') +            if not lang or not href: +                continue +            subtitles[lang] = [{ +                'ext': 'ttml', +                'url': href, +            }]          return {              'id': video_id,              'title': title,              'description': description,              'thumbnail': thumbnail, -            'upload_date': upload_date, +            'timestamp': timestamp,              'duration': duration,              'formats': formats, -            'subtitles': video_subtitles, +            'subtitles': subtitles,          } -    def _get_consumer_secret(self, video_id): -        df_js = self._download_webpage( -            "http://www.dramafever.com/static/126960d/v2/js/plugins/jquery.threadedcomments.js", video_id) -        return self._search_regex(r"'cs': '([0-9a-zA-Z]+)'", df_js, "cs") - -    def _get_episodes(self, series_id, consumer_secret, episode_filter=None): -        _PAGE_SIZE = 60 - -        curr_page = 1 -        max_pages = curr_page + 1 -        results = [] -        while max_pages >= curr_page: -            page_url = "http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d" % \ -                       (consumer_secret, series_id, _PAGE_SIZE, curr_page) -            series = self._download_json( -                page_url, series_id, note="Downloading series json page #%d" % curr_page) -            max_pages = series['num_pages'] -            results.extend([ep for ep in series['value'] if episode_filter is None or episode_filter(ep)]) -            curr_page += 1 -        return results - -    def _get_subtitles(self, video_id, consumer_secret): - -        res = None -        info = self._get_episodes( -            video_id.split(".")[0], consumer_secret, -            episode_filter=lambda x: x['guid'] == video_id) - -        if len(info) == 1 and info[0]['subfile'] != '': -            res = {'en': [{'url': info[0]['subfile'], 'ext': 'srt'}]} -        return res - - -class DramaFeverSeriesIE(DramaFeverIE): + +class DramaFeverSeriesIE(InfoExtractor):      IE_NAME = 'dramafever:series' -    _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)/\d*[a-zA-Z_][a-zA-Z0-9_]*/' +    _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d).+)?)?$'      _TESTS = [{          'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',          'info_dict': {              'id': '4512',              'title': 'Cooking with Shin', -            'description': 'Professional chef and cooking instructor Shin Kim takes some of the delicious dishes featured in your favorite dramas and shows you how to make them right at home.', +            'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',          },          'playlist_count': 4,      }, { @@ -106,25 +113,48 @@ class DramaFeverSeriesIE(DramaFeverIE):          'info_dict': {              'id': '124',              'title': 'IRIS', -            'description': 'Lee Byung Hun and Kim Tae Hee star in this powerhouse drama and ratings megahit of action, intrigue and romance.', +            'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',          },          'playlist_count': 20,      }] +    _CONSUMER_SECRET = 'DA59dtVXYLxajktV' +    _PAGE_SIZE = 5  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) + +    def _get_consumer_secret(self, video_id): +        mainjs = self._download_webpage( +            'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js', +            video_id, 'Downloading main.js', fatal=False) +        if not mainjs: +            return self._CONSUMER_SECRET +        return self._search_regex( +            r"var\s+cs\s*=\s*'([^']+)'", mainjs, +            'consumer secret', default=self._CONSUMER_SECRET) +      def _real_extract(self, url):          series_id = self._match_id(url) +          consumer_secret = self._get_consumer_secret(series_id) -        series_json = self._download_json( -            "http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s" % (consumer_secret, series_id), -            series_id, note='Downloading series metadata')["series"][series_id] +        series = self._download_json( +            'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s' +            % (consumer_secret, series_id), +            series_id, 'Downloading series JSON')['series'][series_id] -        title = series_json["name"] -        description = series_json["description_short"] +        title = clean_html(series['name']) +        description = clean_html(series.get('description') or series.get('description_short')) -        episodes = self._get_episodes(series_id, consumer_secret)          entries = [] -        for ep in episodes: -            entries.append(self.url_result( -                'http://www.dramafever.com%s' % ep['episode_url'], 'DramaFever', ep['guid'])) +        for page_num in itertools.count(1): +            episodes = self._download_json( +                'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d' +                % (consumer_secret, series_id, self._PAGE_SIZE, page_num), +                series_id, 'Downloading episodes JSON page #%d' % page_num) +            for episode in episodes.get('value', []): +                entries.append(self.url_result( +                    compat_urlparse.urljoin(url, episode['episode_url']), +                    'DramaFever', episode.get('guid'))) +            if page_num == episodes['num_pages']: +                break +          return self.playlist_result(entries, series_id, title, description)  | 
