diff options
| author | Naglis Jonaitis <njonaitis@gmail.com> | 2014-11-24 23:15:33 +0200 | 
|---|---|---|
| committer | Naglis Jonaitis <njonaitis@gmail.com> | 2014-11-24 23:15:33 +0200 | 
| commit | 2c25a2bd29236ef744e3b9032230dc74c1029df5 (patch) | |
| tree | 287142077a0d9713715849583f210f86fddd8673 | |
| parent | 00e9d396ab4f772c7c76752e7ab096076dfbfd2a (diff) | |
[tunein] Add new extractor (Closes #4097)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/tunein.py | 101 | 
2 files changed, 102 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb5e6ac77..cdcda1fa9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -405,6 +405,7 @@ from .trutube import TruTubeIE  from .tube8 import Tube8IE  from .tudou import TudouIE  from .tumblr import TumblrIE +from .tunein import TuneInIE  from .turbo import TurboIE  from .tutv import TutvIE  from .tvigle import TvigleIE diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py new file mode 100644 index 000000000..8c29f1634 --- /dev/null +++ b/youtube_dl/extractor/tunein.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class TuneInIE(InfoExtractor): +    _VALID_URL = r'''(?x)https?://(?:www\.)? +    (?: +        tunein\.com/ +        (?: +            radio/.*?-s| +            station/.*?StationId\= +        )(?P<id>[0-9]+) +        |tun\.in/(?P<redirect_id>[A-Za-z0-9]+) +    ) +    ''' + +    _INFO_DICT = { +        'id': '34682', +        'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', +        'ext': 'AAC', +        'thumbnail': 're:^https?://.*\.png$', +        'location': 'Tacoma, WA', +    } +    _TESTS = [ +        { +            'url': 'http://tunein.com/radio/Jazz24-885-s34682/', +            'info_dict': _INFO_DICT, +            'params': { +                'skip_download': True,  # live stream +            }, +        }, +        {  # test redirection +            'url': 'http://tun.in/ser7s', +            'info_dict': _INFO_DICT, +            'params': { +                'skip_download': True,  # live stream +            }, +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        redirect_id = mobj.group('redirect_id') +        if redirect_id: +            # The server doesn't support HEAD requests +            urlh = self._request_webpage( +                url, redirect_id, note='Downloading redirect page') +            url = urlh.geturl() +            self.to_screen('Following redirect: %s' % url) +            mobj = re.match(self._VALID_URL, url) +        station_id = mobj.group('id') + +        webpage = self._download_webpage( +            url, station_id, note='Downloading station webpage') + +        payload = self._html_search_regex( +            r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data') +        json_data = json.loads(payload) +        station_info = json_data['Station']['broadcast'] +        title = station_info['Title'] +        thumbnail = station_info.get('Logo') +        location = station_info.get('Location') +        streams_url = station_info.get('StreamUrl') +        if not streams_url: +            raise ExtractorError('No downloadable streams found', +                                 expected=True) +        stream_data = self._download_webpage( +            streams_url, station_id, note='Downloading stream data') +        streams = json.loads(self._search_regex( +            r'\((.*)\);', stream_data, 'stream info'))['Streams'] + +        is_live = None +        formats = [] +        for stream in streams: +            if stream.get('Type') == 'Live': +                is_live = True +            formats.append({ +                'abr': stream.get('Bandwidth'), +                'ext': stream.get('MediaType'), +                'acodec': stream.get('MediaType'), +                'vcodec': 'none', +                'url': stream.get('Url'), +                # Sometimes streams with the highest quality do not exist +                'preference': stream.get('Reliability'), +            }) +        self._sort_formats(formats) + +        return { +            'id': station_id, +            'title': title, +            'formats': formats, +            'thumbnail': thumbnail, +            'location': location, +            'is_live': is_live, +        } | 
