youtube_dl/extractor/dramafever.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

# encoding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor


class DramaFeverIE(InfoExtractor):
    IE_NAME = 'dramafever'
    _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)/'
    _TESTS = [{
        'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
        'info_dict': {
            'id': '4512.1',
            'ext': 'flv',
            'title': 'Cooking with Shin 4512.1',
            'upload_date': '20140702',
            'description': 'Served at all special occasions and featured in the hit drama Heirs, Shin cooks Red Bean Rice.',
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url).replace("/", ".")

        consumer_secret = self._get_consumer_secret(video_id)

        ep_json = self._download_json(
            "http://www.dramafever.com/amp/episode/feed.json?guid=%s" % video_id,
            video_id, note='Downloading episode metadata',
            errnote="Video may not be available for your location")["channel"]["item"]

        title = ep_json["media-group"]["media-title"]
        description = ep_json["media-group"]["media-description"]
        thumbnail = ep_json["media-group"]["media-thumbnail"]["@attributes"]["url"]
        duration = int(ep_json["media-group"]["media-content"][0]["@attributes"]["duration"])
        mobj = re.match(r"([0-9]{4})-([0-9]{2})-([0-9]{2})", ep_json["pubDate"])
        upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) if mobj is not None else None

        formats = []
        for vid_format in ep_json["media-group"]["media-content"]:
            src = vid_format["@attributes"]["url"]
            if '.f4m' in src:
                formats.extend(self._extract_f4m_formats(src, video_id))

        self._sort_formats(formats)
        video_subtitles = self.extract_subtitles(video_id, consumer_secret)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'duration': duration,
            'formats': formats,
            'subtitles': video_subtitles,
        }

    def _get_consumer_secret(self, video_id):
        df_js = self._download_webpage(
            "http://www.dramafever.com/static/126960d/v2/js/plugins/jquery.threadedcomments.js", video_id)
        return self._search_regex(r"'cs': '([0-9a-zA-Z]+)'", df_js, "cs")

    def _get_episodes(self, series_id, consumer_secret, episode_filter=None):
        _PAGE_SIZE = 60

        curr_page = 1
        max_pages = curr_page + 1
        results = []
        while max_pages >= curr_page:
            page_url = "http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d" % \
                       (consumer_secret, series_id, _PAGE_SIZE, curr_page)
            series = self._download_json(
                page_url, series_id, note="Downloading series json page #%d" % curr_page)
            max_pages = series['num_pages']
            results.extend([ep for ep in series['value'] if episode_filter is None or episode_filter(ep)])
            curr_page += 1
        return results

    def _get_subtitles(self, video_id, consumer_secret):

        res = None
        info = self._get_episodes(
            video_id.split(".")[0], consumer_secret,
            episode_filter=lambda x: x['guid'] == video_id)

        if len(info) == 1 and info[0]['subfile'] != '':
            res = {'en': [{'url': info[0]['subfile'], 'ext': 'srt'}]}
        return res


class DramaFeverSeriesIE(DramaFeverIE):
    IE_NAME = 'dramafever:series'
    _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)/\d*[a-zA-Z_][a-zA-Z0-9_]*/'
    _TESTS = [{
        'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
        'info_dict': {
            'id': '4512',
            'title': 'Cooking with Shin',
            'description': 'Professional chef and cooking instructor Shin Kim takes some of the delicious dishes featured in your favorite dramas and shows you how to make them right at home.',
        },
        'playlist_count': 4,
    }, {
        'url': 'http://www.dramafever.com/drama/124/IRIS/',
        'info_dict': {
            'id': '124',
            'title': 'IRIS',
            'description': 'Lee Byung Hun and Kim Tae Hee star in this powerhouse drama and ratings megahit of action, intrigue and romance.',
        },
        'playlist_count': 20,
    }]

    def _real_extract(self, url):
        series_id = self._match_id(url)
        consumer_secret = self._get_consumer_secret(series_id)

        series_json = self._download_json(
            "http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s" % (consumer_secret, series_id),
            series_id, note='Downloading series metadata')["series"][series_id]

        title = series_json["name"]
        description = series_json["description_short"]

        episodes = self._get_episodes(series_id, consumer_secret)
        entries = []
        for ep in episodes:
            entries.append(self.url_result(
                'http://www.dramafever.com%s' % ep['episode_url'], 'DramaFever', ep['guid']))
        return self.playlist_result(entries, series_id, title, description)