aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/walla.py
blob: e687c3af0a3d0e7df5f8b1f06044729631309839 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# coding: utf-8
from __future__ import unicode_literals


import re

from .common import InfoExtractor


class WallaIE(InfoExtractor):
    _VALID_URL = r'http://vod\.walla\.co\.il/\w+/(?P<id>\d+)'
    _TEST = {
        'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
        'info_dict': {
            'id': '2642630',
            'ext': 'flv',
            'title': 'וואן דיירקשן: ההיסטריה',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        
        video_id = mobj.group('id')

        config_url = 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id
        
        webpage = self._download_webpage(config_url, video_id, '')

        media_id = self._html_search_regex(r'<media_id>(\d+)</media_id>', webpage, video_id, 'extract media id')

        prefix = '0' if len(media_id) == 7 else ''

        series =  '%s%s' % (prefix, media_id[0:2])
        session = media_id[2:5]
        episode = media_id[5:7]
        
        title = self._html_search_regex(r'<title>(.*)</title>', webpage, video_id, 'title')

        default_quality = self._html_search_regex(r'<qualities defaultType="(\d+)">', webpage, video_id, 0)

        quality = default_quality if default_quality else '40'

        media_path = '/%s/%s/%s' % (series, session, media_id) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL) 

        playpath = 'mp4:media/%s/%s/%s-%s' % (series, session, media_id, quality) #self._html_search_regex(r'<quality type="%s">.*<src>(.*)</src>' % default_quality ,webpage, '', flags=re.DOTALL) 

        subtitles = {}

        subtitle_url = self._html_search_regex(r'<subtitles.*<src>(.*)</src>.*</subtitle>', webpage, video_id, 0)

        print subtitle_url

        if subtitle_url:
            subtitles_page = self._download_webpage(subtitle_url, video_id, '')
            subtitles['heb'] = subtitles_page

        return {
            'id': video_id,
            'title': title,
            'url': 'rtmp://wafla.walla.co.il:1935/vod',
            'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
            'page_url': url,
            'app': "vod",
            'play_path': playpath,
            'tc_url': 'rtmp://wafla.walla.co.il:1935/vod',
            'rtmp_protocol': 'rtmp',
            'ext': 'flv',
            'subtitles': subtitles,
        }