youtube_dl/extractor/miomio.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

# coding: utf-8
from __future__ import unicode_literals

import random

from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
    xpath_text,
    int_or_none,
    ExtractorError,
    sanitized_Request,
)


class MioMioIE(InfoExtractor):
    IE_NAME = 'miomio.tv'
    _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
    _TESTS = [{
        # "type=video" in flashvars
        'url': 'http://www.miomio.tv/watch/cc88912/',
        'info_dict': {
            'id': '88912',
            'ext': 'flv',
            'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
            'duration': 5923,
        },
        'skip': 'Unable to load videos',
    }, {
        'url': 'http://www.miomio.tv/watch/cc184024/',
        'info_dict': {
            'id': '43729',
            'title': '《动漫同人插画绘制》',
        },
        'playlist_mincount': 86,
        'skip': 'Unable to load videos',
    }, {
        'url': 'http://www.miomio.tv/watch/cc173113/',
        'info_dict': {
            'id': '173113',
            'title': 'The New Macbook 2015 上手试玩与简评'
        },
        'playlist_mincount': 2,
        'skip': 'Unable to load videos',
    }, {
        # new 'h5' player
        'url': 'http://www.miomio.tv/watch/cc273997/',
        'md5': '0b27a4b4495055d826813f8c3a6b2070',
        'info_dict': {
            'id': '273997',
            'ext': 'mp4',
            'title': 'マツコの知らない世界【劇的進化SP！ビニール傘＆冷凍食品2016】 1_2 - 16 05 31',
        },
        'skip': 'Unable to load videos',
    }]

    def _extract_mioplayer(self, webpage, video_id, title, http_headers):
        xml_config = self._search_regex(
            r'flashvars="type=(?:sina|video)&amp;(.+?)&amp;',
            webpage, 'xml config')

        # skipping the following page causes lags and eventually connection drop-outs
        self._request_webpage(
            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
            video_id)

        vid_config_request = sanitized_Request(
            'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
            headers=http_headers)

        # the following xml contains the actual configuration information on the video file(s)
        vid_config = self._download_xml(vid_config_request, video_id)

        if not int_or_none(xpath_text(vid_config, 'timelength')):
            raise ExtractorError('Unable to load videos!', expected=True)

        entries = []
        for f in vid_config.findall('./durl'):
            segment_url = xpath_text(f, 'url', 'video url')
            if not segment_url:
                continue
            order = xpath_text(f, 'order', 'order')
            segment_id = video_id
            segment_title = title
            if order:
                segment_id += '-%s' % order
                segment_title += ' part %s' % order
            entries.append({
                'id': segment_id,
                'url': segment_url,
                'title': segment_title,
                'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
                'http_headers': http_headers,
            })

        return entries

    def _download_chinese_webpage(self, *args, **kwargs):
        # Requests with English locales return garbage
        headers = {
            'Accept-Language': 'zh-TW,en-US;q=0.7,en;q=0.3',
        }
        kwargs.setdefault('headers', {}).update(headers)
        return self._download_webpage(*args, **kwargs)

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_chinese_webpage(
            url, video_id)

        title = self._html_search_meta(
            'description', webpage, 'title', fatal=True)

        mioplayer_path = self._search_regex(
            r'src="(/mioplayer(?:_h5)?/[^"]+)"', webpage, 'ref_path')

        if '_h5' in mioplayer_path:
            player_url = compat_urlparse.urljoin(url, mioplayer_path)
            player_webpage = self._download_chinese_webpage(
                player_url, video_id,
                note='Downloading player webpage', headers={'Referer': url})
            entries = self._parse_html5_media_entries(player_url, player_webpage, video_id)
            http_headers = {'Referer': player_url}
        else:
            http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path}
            entries = self._extract_mioplayer(webpage, video_id, title, http_headers)

        if len(entries) == 1:
            segment = entries[0]
            segment['id'] = video_id
            segment['title'] = title
            segment['http_headers'] = http_headers
            return segment

        return {
            '_type': 'multi_video',
            'id': video_id,
            'entries': entries,
            'title': title,
            'http_headers': http_headers,
        }