diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-08-22 18:40:26 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-08-22 18:40:28 +0200 | 
| commit | 2b9faf55427b73cb978ddd661c32a5cb313f9331 (patch) | |
| tree | 03798e77b93eff75191db8740a36ffda21081034 | |
| parent | ed2d6a19607e965602d66ea90ac716118b739a16 (diff) | |
[rtve] Add support for live stream
At the moment, only RTVE-1 seems to work flawlessly.
-2 seems geoblocked right now.
-TDP doesn't seem to be available outside of Spain.
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtve.py | 152 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 6 | 
3 files changed, 113 insertions, 47 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index da9606f24..3753fa291 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -264,7 +264,7 @@ from .rtbf import RTBFIE  from .rtlnl import RtlXlIE  from .rtlnow import RTLnowIE  from .rts import RTSIE -from .rtve import RTVEALaCartaIE +from .rtve import RTVEALaCartaIE, RTVELiveIE  from .ruhd import RUHDIE  from .rutube import (      RutubeIE, diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index c2228b2f0..8cf753902 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -1,21 +1,66 @@  # encoding: utf-8  from __future__ import unicode_literals -import re  import base64 +import re +import time  from .common import InfoExtractor  from ..utils import (      struct_unpack, +    remove_end,  ) +def _decrypt_url(png): +    encrypted_data = base64.b64decode(png) +    text_index = encrypted_data.find(b'tEXt') +    text_chunk = encrypted_data[text_index - 4:] +    length = struct_unpack('!I', text_chunk[:4])[0] +    # Use bytearray to get integers when iterating in both python 2.x and 3.x +    data = bytearray(text_chunk[8:8 + length]) +    data = [chr(b) for b in data if b != 0] +    hash_index = data.index('#') +    alphabet_data = data[:hash_index] +    url_data = data[hash_index + 1:] + +    alphabet = [] +    e = 0 +    d = 0 +    for l in alphabet_data: +        if d == 0: +            alphabet.append(l) +            d = e = (e + 1) % 4 +        else: +            d -= 1 +    url = '' +    f = 0 +    e = 3 +    b = 1 +    for letter in url_data: +        if f == 0: +            l = int(letter) * 10 +            f = 1 +        else: +            if e == 0: +                l += int(letter) +                url += alphabet[l] +                e = (b + 3) % 4 +                f = 0 +                b += 1 +            else: +                e -= 1 + +    return url + + +  class RTVEALaCartaIE(InfoExtractor):      IE_NAME = 'rtve.es:alacarta'      IE_DESC = 'RTVE a la carta'      _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',          'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',          'info_dict': { @@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):              'ext': 'mp4',              'title': 'Balonmano - Swiss Cup masculina. Final: EspaƱa-Suecia',          }, -    } - -    def _decrypt_url(self, png): -        encrypted_data = base64.b64decode(png) -        text_index = encrypted_data.find(b'tEXt') -        text_chunk = encrypted_data[text_index-4:] -        length = struct_unpack('!I', text_chunk[:4])[0] -        # Use bytearray to get integers when iterating in both python 2.x and 3.x -        data = bytearray(text_chunk[8:8+length]) -        data = [chr(b) for b in data if b != 0] -        hash_index = data.index('#') -        alphabet_data = data[:hash_index] -        url_data = data[hash_index+1:] - -        alphabet = [] -        e = 0 -        d = 0 -        for l in alphabet_data: -            if d == 0: -                alphabet.append(l) -                d = e = (e + 1) % 4 -            else: -                d -= 1 -        url = '' -        f = 0 -        e = 3 -        b = 1 -        for letter in url_data: -            if f == 0: -                l = int(letter)*10 -                f = 1 -            else: -                if e == 0: -                    l += int(letter) -                    url += alphabet[l] -                    e = (b + 3) % 4 -                    f = 0 -                    b += 1 -                else: -                    e -= 1 - -        return url +    }, { +        'note': 'Live stream', +        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', +        'info_dict': { +            'id': '1694255', +            'ext': 'flv', +            'title': 'TODO', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -74,11 +86,59 @@ class RTVEALaCartaIE(InfoExtractor):              video_id)['page']['items'][0]          png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id          png = self._download_webpage(png_url, video_id, 'Downloading url information') -        video_url = self._decrypt_url(png) +        video_url = _decrypt_url(png)          return {              'id': video_id,              'title': info['title'],              'url': video_url, -            'thumbnail': info['image'], +            'thumbnail': info.get('image'), +            'page_url': url, +        } + + +class RTVELiveIE(InfoExtractor): +    IE_NAME = 'rtve.es:live' +    IE_DESC = 'RTVE.es live streams' +    _VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias(?=/directo-la-1)|television)/(?P<id>[a-zA-Z0-9-]+)' + +    _TESTS = [{ +        'url': 'http://www.rtve.es/noticias/directo-la-1/', +        'info_dict': { +            'id': 'directo-la-1', +            'ext': 'flv', +            'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$', +        }, +        'params': { +            'skip_download': 'live stream', +        } +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        start_time = time.gmtime() +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) +        player_url = self._search_regex( +            r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL') +        title = remove_end(self._og_search_title(webpage), ' en directo') +        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time) + +        vidplayer_id = self._search_regex( +            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID') +        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id +        png = self._download_webpage(png_url, video_id, 'Downloading url information') +        video_url = _decrypt_url(png) + +        print(video_url) + +        return { +            'id': video_id, +            'ext': 'flv', +            'title': title, +            'url': video_url, +            'app': 'rtve-live-live?ovpfv=2.1.2', +            'player_url': player_url, +            'rtmp_live': True,          } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 42ad520f9..ac8a78adf 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1285,6 +1285,12 @@ def remove_start(s, start):      return s +def remove_end(s, end): +    if s.endswith(end): +        return s[:-len(end)] +    return s + +  def url_basename(url):      path = compat_urlparse.urlparse(url).path      return path.strip(u'/').split(u'/')[-1]  | 
