youtube_dl/extractor/theplatform.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

from __future__ import unicode_literals

import re
import json

from .subtitles import SubtitlesInfoExtractor
from ..compat import (
    compat_str,
)
from ..utils import (
    determine_ext,
    ExtractorError,
    xpath_with_ns,
)

_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})


class ThePlatformIE(SubtitlesInfoExtractor):
    _VALID_URL = r'''(?x)
        (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
           (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
         |theplatform:)(?P<id>[^/\?&]+)'''

    _TEST = {
        # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
        'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
        'info_dict': {
            'id': 'e9I_cZgTgIPd',
            'ext': 'flv',
            'title': 'Blackberry\'s big, bold Z30',
            'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
            'duration': 247,
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        if mobj.group('config'):
            config_url = url + '&form=json'
            config_url = config_url.replace('swf/', 'config/')
            config_url = config_url.replace('onsite/', 'onsite/config/')
            config = self._download_json(config_url, video_id, 'Downloading config')
            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
        else:
            smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
                        'format=smil&mbr=true'.format(video_id))

        meta = self._download_xml(smil_url, video_id)
        try:
            error_msg = next(
                n.attrib['abstract']
                for n in meta.findall(_x('.//smil:ref'))
                if n.attrib.get('title') == 'Geographic Restriction')
        except StopIteration:
            pass
        else:
            raise ExtractorError(error_msg, expected=True)

        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
        info_json = self._download_webpage(info_url, video_id)
        info = json.loads(info_json)

        subtitles = {}
        captions = info.get('captions')
        if isinstance(captions, list):
            for caption in captions:
                lang, src = caption.get('lang'), caption.get('src')
                if lang and src:
                    subtitles[lang] = src

        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(video_id, subtitles)
            return

        subtitles = self.extract_subtitles(video_id, subtitles)

        head = meta.find(_x('smil:head'))
        body = meta.find(_x('smil:body'))

        f4m_node = body.find(_x('smil:seq//smil:video'))
        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
            f4m_url = f4m_node.attrib['src']
            if 'manifest.f4m?' not in f4m_url:
                f4m_url += '?'
            # the parameters are from syfy.com, other sites may use others,
            # they also work for nbc.com
            f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
            formats = self._extract_f4m_formats(f4m_url, video_id)
        else:
            formats = []
            switch = body.find(_x('smil:switch'))
            if switch is not None:
                base_url = head.find(_x('smil:meta')).attrib['base']
                for f in switch.findall(_x('smil:video')):
                    attr = f.attrib
                    width = int(attr['width'])
                    height = int(attr['height'])
                    vbr = int(attr['system-bitrate']) // 1000
                    format_id = '%dx%d_%dk' % (width, height, vbr)
                    formats.append({
                        'format_id': format_id,
                        'url': base_url,
                        'play_path': 'mp4:' + attr['src'],
                        'ext': 'flv',
                        'width': width,
                        'height': height,
                        'vbr': vbr,
                    })
            else:
                switch = body.find(_x('smil:seq//smil:switch'))
                for f in switch.findall(_x('smil:video')):
                    attr = f.attrib
                    vbr = int(attr['system-bitrate']) // 1000
                    ext = determine_ext(attr['src'])
                    if ext == 'once':
                        ext = 'mp4'
                    formats.append({
                        'format_id': compat_str(vbr),
                        'url': attr['src'],
                        'vbr': vbr,
                        'ext': ext,
                    })
            self._sort_formats(formats)

        return {
            'id': video_id,
            'title': info['title'],
            'subtitles': subtitles,
            'formats': formats,
            'description': info['description'],
            'thumbnail': info['defaultThumbnailUrl'],
            'duration': info['duration'] // 1000,
        }