diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/abc7news.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/cspan.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/dcn.py | 178 | ||||
| -rw-r--r-- | youtube_dl/extractor/esri.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 20 | ||||
| -rw-r--r-- | youtube_dl/extractor/iqiyi.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/jwplatform.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/livestream.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/phoenix.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/rai.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/tunein.py | 191 | ||||
| -rw-r--r-- | youtube_dl/extractor/vgtv.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/viki.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/zdf.py | 253 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | 
16 files changed, 503 insertions, 221 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 165835f63..971047ad4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -135,7 +135,12 @@ from .dailymotion import (  )  from .daum import DaumIE  from .dbtv import DBTVIE -from .dcn import DCNIE +from .dcn import ( +    DCNIE, +    DCNVideoIE, +    DCNLiveIE, +    DCNSeasonIE, +)  from .dctp import DctpTvIE  from .deezer import DeezerPlaylistIE  from .democracynow import DemocracynowIE @@ -703,7 +708,13 @@ from .tube8 import Tube8IE  from .tubitv import TubiTvIE  from .tudou import TudouIE  from .tumblr import TumblrIE -from .tunein import TuneInIE +from .tunein import ( +    TuneInClipIE, +    TuneInStationIE, +    TuneInProgramIE, +    TuneInTopicIE, +    TuneInShortenerIE, +)  from .turbo import TurboIE  from .tutv import TutvIE  from .tv2 import ( diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py index c04949c21..122dc9099 100644 --- a/youtube_dl/extractor/abc7news.py +++ b/youtube_dl/extractor/abc7news.py @@ -44,7 +44,6 @@ class Abc7NewsIE(InfoExtractor):              'contentURL', webpage, 'm3u8 url', fatal=True)          formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') -        self._sort_formats(formats)          title = self._og_search_title(webpage).strip()          description = self._og_search_description(webpage).strip() diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7b685d157..b3ee67018 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -58,18 +58,23 @@ class CSpanIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) +        video_type = None          webpage = self._download_webpage(url, video_id) -        matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage) -        if matches: +        # We first look for clipid, because clipprog always appears before +        patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] +        results = list(filter(None, (re.search(p, webpage) for p in patterns))) +        if results: +            matches = results[0]              video_type, video_id = matches.groups() -            if video_type == 'prog': -                video_type = 'program' +            video_type = 'clip' if video_type == 'id' else 'program'          else:              senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)              if senate_isvp_url:                  title = self._og_search_title(webpage)                  surl = smuggle_url(senate_isvp_url, {'force_title': title})                  return self.url_result(surl, 'SenateISVP', video_id, title) +        if video_type is None or video_id is None: +            raise ExtractorError('unable to find video id and type')          def get_text_attr(d, attr):              return d.get(attr, {}).get('#text') diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 9737cff14..0d140f12f 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -1,26 +1,89 @@  # coding: utf-8  from __future__ import unicode_literals +import re +import base64 +  from .common import InfoExtractor  from ..compat import compat_urllib_parse  from ..utils import (      int_or_none,      parse_iso8601,      sanitized_Request, +    smuggle_url, +    unsmuggle_url,  )  class DCNIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)' +    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' + +    def _real_extract(self, url): +        show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() +        if video_id and int(video_id) > 0: +            return self.url_result( +                'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo') +        elif season_id and int(season_id) > 0: +            return self.url_result(smuggle_url( +                'http://www.dcndigital.ae/program/season/%s' % season_id, +                {'show_id': show_id}), 'DCNSeason') +        else: +            return self.url_result( +                'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason') + + +class DCNBaseIE(InfoExtractor): +    def _extract_video_info(self, video_data, video_id, is_live): +        title = video_data.get('title_en') or video_data['title_ar'] +        img = video_data.get('img') +        thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None +        duration = int_or_none(video_data.get('duration')) +        description = video_data.get('description_en') or video_data.get('description_ar') +        timestamp = parse_iso8601(video_data.get('create_time'), ' ') + +        return { +            'id': video_id, +            'title': self._live_title(title) if is_live else title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'timestamp': timestamp, +            'is_live': is_live, +        } + +    def _extract_video_formats(self, webpage, video_id, entry_protocol): +        formats = [] +        m3u8_url = self._html_search_regex( +            r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) +        if m3u8_url: +            m3u8_formats = self._extract_m3u8_formats( +                m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None) +            if m3u8_formats: +                formats.extend(m3u8_formats) + +        rtsp_url = self._search_regex( +            r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) +        if rtsp_url: +            formats.append({ +                'url': rtsp_url, +                'format_id': 'rtsp', +            }) + +        self._sort_formats(formats) +        return formats + + +class DCNVideoIE(DCNBaseIE): +    IE_NAME = 'dcn:video' +    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'      _TEST = { -        'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', +        'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',          'info_dict':          {              'id': '17375',              'ext': 'mp4',              'title': 'رحلة العمر : الحلقة 1',              'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', -            'thumbnail': 're:^https?://.*\.jpg$',              'duration': 2041,              'timestamp': 1227504126,              'upload_date': '20081124', @@ -37,46 +100,95 @@ class DCNIE(InfoExtractor):          request = sanitized_Request(              'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,              headers={'Origin': 'http://www.dcndigital.ae'}) - -        video = self._download_json(request, video_id) -        title = video.get('title_en') or video['title_ar'] +        video_data = self._download_json(request, video_id) +        info = self._extract_video_info(video_data, video_id, False)          webpage = self._download_webpage(              'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +              compat_urllib_parse.urlencode({ -                'id': video['id'], -                'user_id': video['user_id'], -                'signature': video['signature'], +                'id': video_data['id'], +                'user_id': video_data['user_id'], +                'signature': video_data['signature'],                  'countries': 'Q0M=',                  'filter': 'DENY',              }), video_id) +        info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') +        return info -        m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url') -        formats = self._extract_m3u8_formats( -            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') -        rtsp_url = self._search_regex( -            r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) -        if rtsp_url: -            formats.append({ -                'url': rtsp_url, -                'format_id': 'rtsp', +class DCNLiveIE(DCNBaseIE): +    IE_NAME = 'dcn:live' +    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)' + +    def _real_extract(self, url): +        channel_id = self._match_id(url) + +        request = sanitized_Request( +            'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, +            headers={'Origin': 'http://www.dcndigital.ae'}) + +        channel_data = self._download_json(request, channel_id) +        info = self._extract_video_info(channel_data, channel_id, True) + +        webpage = self._download_webpage( +            'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + +            compat_urllib_parse.urlencode({ +                'id': base64.b64encode(channel_data['user_id'].encode()).decode(), +                'channelid': base64.b64encode(channel_data['id'].encode()).decode(), +                'signature': channel_data['signature'], +                'countries': 'Q0M=', +                'filter': 'DENY', +            }), channel_id) +        info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') +        return info + + +class DCNSeasonIE(InfoExtractor): +    IE_NAME = 'dcn:season' +    _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' +    _TEST = { +        'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', +        'info_dict': +        { +            'id': '7910', +            'title': 'محاضرات الشيخ الشعراوي', +        }, +        'playlist_mincount': 27, +    } + +    def _real_extract(self, url): +        url, smuggled_data = unsmuggle_url(url, {}) +        show_id, season_id = re.match(self._VALID_URL, url).groups() + +        data = {} +        if season_id: +            data['season'] = season_id +            show_id = smuggled_data.get('show_id') +            if show_id is None: +                request = sanitized_Request( +                    'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, +                    headers={'Origin': 'http://www.dcndigital.ae'}) +                season = self._download_json(request, season_id) +                show_id = season['id'] +        data['show_id'] = show_id +        request = sanitized_Request( +            'http://admin.mangomolo.com/analytics/index.php/plus/show', +            compat_urllib_parse.urlencode(data), +            { +                'Origin': 'http://www.dcndigital.ae', +                'Content-Type': 'application/x-www-form-urlencoded'              }) -        self._sort_formats(formats) +        show = self._download_json(request, show_id) +        if not season_id: +            season_id = show['default_season'] +        for season in show['seasons']: +            if season['id'] == season_id: +                title = season.get('title_en') or season['title_ar'] -        img = video.get('img') -        thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None -        duration = int_or_none(video.get('duration')) -        description = video.get('description_en') or video.get('description_ar') -        timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ') +                entries = [] +                for video in show['videos']: +                    entries.append(self.url_result( +                        'http://www.dcndigital.ae/media/%s' % video['id'], 'DCNVideo')) -        return { -            'id': video_id, -            'title': title, -            'description': description, -            'thumbnail': thumbnail, -            'duration': duration, -            'timestamp': timestamp, -            'formats': formats, -        } +                return self.playlist_result(entries, season_id, title) diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py index bf5d2019f..d4205d7fb 100644 --- a/youtube_dl/extractor/esri.py +++ b/youtube_dl/extractor/esri.py @@ -61,7 +61,7 @@ class EsriVideoIE(InfoExtractor):              webpage, 'duration', fatal=False))          upload_date = unified_strdate(self._html_search_meta( -            'last-modified', webpage, 'upload date', fatal=None)) +            'last-modified', webpage, 'upload date', fatal=False))          return {              'id': video_id, diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 39c481068..5e43f2359 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -74,7 +74,7 @@ class FacebookIE(InfoExtractor):              return          login_page_req = sanitized_Request(self._LOGIN_URL) -        login_page_req.add_header('Cookie', 'locale=en_US') +        self._set_cookie('facebook.com', 'locale', 'en_US')          login_page = self._download_webpage(login_page_req, None,                                              note='Downloading login page',                                              errnote='Unable to download login page') @@ -100,13 +100,25 @@ class FacebookIE(InfoExtractor):              login_results = self._download_webpage(request, None,                                                     note='Logging in', errnote='unable to fetch login page')              if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: +                error = self._html_search_regex( +                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>', +                    login_results, 'login error', default=None, group='error') +                if error: +                    raise ExtractorError('Unable to login: %s' % error, expected=True)                  self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')                  return +            fb_dtsg = self._search_regex( +                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None) +            h = self._search_regex( +                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None) + +            if not fb_dtsg or not h: +                return +              check_form = { -                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), -                'h': self._search_regex( -                    r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), +                'fb_dtsg': fb_dtsg, +                'h': h,                  'name_action_selected': 'dont_save',              }              check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index c3731a110..66a70a181 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -16,7 +16,7 @@ class IqiyiIE(InfoExtractor):      IE_NAME = 'iqiyi'      IE_DESC = '爱奇艺' -    _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html' +    _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'      _TESTS = [{          'url': 'http://www.iqiyi.com/v_19rrojlavg.html', @@ -84,6 +84,15 @@ class IqiyiIE(InfoExtractor):          'params': {              'skip_download': True,          }, +    }, { +        'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', +        'only_matching': True, +    }, { +        'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', +        'only_matching': True, +    }, { +        'url': 'http://yule.iqiyi.com/pcb.html', +        'only_matching': True,      }]      _FORMATS_MAP = [ diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index cdc095a79..a92adf2b3 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -44,7 +44,8 @@ class JWPlatformIE(InfoExtractor):              source_url = self._proto_relative_url(source['file'])              source_type = source.get('type') or ''              if source_type == 'application/vnd.apple.mpegurl': -                m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None) +                m3u8_formats = self._extract_m3u8_formats( +                    source_url, video_id, 'mp4', 'm3u8_native', fatal=False)                  if m3u8_formats:                      formats.extend(m3u8_formats)              elif source_type.startswith('audio'): diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 9c8d826c4..688eb2308 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -254,7 +254,7 @@ class LivestreamOriginalIE(InfoExtractor):          'playlist_mincount': 4,      }, {          # live stream -        'url': 'http://www.livestream.com/znsbahamas', +        'url': 'http://original.livestream.com/znsbahamas',          'only_matching': True,      }] diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py index 46cebc0d7..6ce2ec19d 100644 --- a/youtube_dl/extractor/phoenix.py +++ b/youtube_dl/extractor/phoenix.py @@ -1,10 +1,9 @@  from __future__ import unicode_literals -from .common import InfoExtractor -from .zdf import extract_from_xml_url +from .zdf import ZDFIE -class PhoenixIE(InfoExtractor): +class PhoenixIE(ZDFIE):      _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/          (?:              phoenix/die_sendungen/(?:[^/]+/)? @@ -41,5 +40,5 @@ class PhoenixIE(InfoExtractor):              r'<div class="phx_vod" id="phx_vod_([0-9]+)"',              webpage, 'internal video ID') -        api_url = 'http://www.phoenix.de/php/zdfplayer-v1.3/data/beitragsDetails.php?ak=web&id=%s' % internal_id -        return extract_from_xml_url(self, video_id, api_url) +        api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id +        return self.extract_from_xml_url(video_id, api_url) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 14f1ccbb4..278b1d2bf 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -118,13 +118,13 @@ class RaiTVIE(InfoExtractor):                  if ext == 'm3u8':                      m3u8_formats = self._extract_m3u8_formats(                          media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', -                        fatal=None) +                        fatal=False)                      if m3u8_formats:                          formats.extend(m3u8_formats)                  elif ext == 'f4m':                      f4m_formats = self._extract_f4m_formats(                          media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', -                        video_id, f4m_id='hds', fatal=None) +                        video_id, f4m_id='hds', fatal=False)                      if f4m_formats:                          formats.extend(f4m_formats)                  elif ext == 'stl': diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py index b6b1f2568..8322cc14d 100644 --- a/youtube_dl/extractor/tunein.py +++ b/youtube_dl/extractor/tunein.py @@ -2,74 +2,33 @@  from __future__ import unicode_literals  import json -import re  from .common import InfoExtractor  from ..utils import ExtractorError +from ..compat import compat_urlparse -class TuneInIE(InfoExtractor): -    _VALID_URL = r'''(?x)https?://(?:www\.)? -    (?: -        tunein\.com/ -        (?: -            radio/.*?-s| -            station/.*?StationId\= -        )(?P<id>[0-9]+) -        |tun\.in/(?P<redirect_id>[A-Za-z0-9]+) -    ) -    ''' -    _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station' - -    _INFO_DICT = { -        'id': '34682', -        'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', -        'ext': 'aac', -        'thumbnail': 're:^https?://.*\.png$', -        'location': 'Tacoma, WA', -    } -    _TESTS = [ -        { -            'url': 'http://tunein.com/radio/Jazz24-885-s34682/', -            'info_dict': _INFO_DICT, -            'params': { -                'skip_download': True,  # live stream -            }, -        }, -        {  # test redirection -            'url': 'http://tun.in/ser7s', -            'info_dict': _INFO_DICT, -            'params': { -                'skip_download': True,  # live stream -            }, -        }, -    ] +class TuneInBaseIE(InfoExtractor): +    _API_BASE_URL = 'http://tunein.com/tuner/tune/'      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        redirect_id = mobj.group('redirect_id') -        if redirect_id: -            # The server doesn't support HEAD requests -            urlh = self._request_webpage( -                url, redirect_id, note='Downloading redirect page') -            url = urlh.geturl() -            self.to_screen('Following redirect: %s' % url) -            mobj = re.match(self._VALID_URL, url) -        station_id = mobj.group('id') - -        station_info = self._download_json( -            self._API_URL_TEMPLATE.format(station_id), -            station_id, note='Downloading station JSON') - -        title = station_info['Title'] -        thumbnail = station_info.get('Logo') -        location = station_info.get('Location') -        streams_url = station_info.get('StreamUrl') +        content_id = self._match_id(url) + +        content_info = self._download_json( +            self._API_BASE_URL + self._API_URL_QUERY % content_id, +            content_id, note='Downloading JSON metadata') + +        title = content_info['Title'] +        thumbnail = content_info.get('Logo') +        location = content_info.get('Location') +        streams_url = content_info.get('StreamUrl')          if not streams_url: -            raise ExtractorError('No downloadable streams found', -                                 expected=True) +            raise ExtractorError('No downloadable streams found', expected=True) +        if not streams_url.startswith('http://'): +            streams_url = compat_urlparse.urljoin(url, streams_url) +          stream_data = self._download_webpage( -            streams_url, station_id, note='Downloading stream data') +            streams_url, content_id, note='Downloading stream data')          streams = json.loads(self._search_regex(              r'\((.*)\);', stream_data, 'stream info'))['Streams'] @@ -97,10 +56,122 @@ class TuneInIE(InfoExtractor):          self._sort_formats(formats)          return { -            'id': station_id, +            'id': content_id,              'title': title,              'formats': formats,              'thumbnail': thumbnail,              'location': location,              'is_live': is_live,          } + + +class TuneInClipIE(TuneInBaseIE): +    IE_NAME = 'tunein:clip' +    _VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P<id>\d+)' +    _API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s' + +    _TESTS = [ +        { +            'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816', +            'md5': '99f00d772db70efc804385c6b47f4e77', +            'info_dict': { +                'id': '816', +                'title': '32m', +                'ext': 'mp3', +            }, +        }, +    ] + + +class TuneInStationIE(TuneInBaseIE): +    IE_NAME = 'tunein:station' +    _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId\=)(?P<id>\d+)' +    _API_URL_QUERY = '?tuneType=Station&stationId=%s' + +    @classmethod +    def suitable(cls, url): +        return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url) + +    _TESTS = [ +        { +            'url': 'http://tunein.com/radio/Jazz24-885-s34682/', +            'info_dict': { +                'id': '34682', +                'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', +                'ext': 'mp3', +                'location': 'Tacoma, WA', +            }, +            'params': { +                'skip_download': True,  # live stream +            }, +        }, +    ] + + +class TuneInProgramIE(TuneInBaseIE): +    IE_NAME = 'tunein:program' +    _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId\=)(?P<id>\d+)' +    _API_URL_QUERY = '?tuneType=Program&programId=%s' + +    _TESTS = [ +        { +            'url': 'http://tunein.com/radio/Jazz-24-p2506/', +            'info_dict': { +                'id': '2506', +                'title': 'Jazz 24 on 91.3 WUKY-HD3', +                'ext': 'mp3', +                'location': 'Lexington, KY', +            }, +            'params': { +                'skip_download': True,  # live stream +            }, +        }, +    ] + + +class TuneInTopicIE(TuneInBaseIE): +    IE_NAME = 'tunein:topic' +    _VALID_URL = r'https?://(?:www\.)?tunein\.com/topic/.*?TopicId\=(?P<id>\d+)' +    _API_URL_QUERY = '?tuneType=Topic&topicId=%s' + +    _TESTS = [ +        { +            'url': 'http://tunein.com/topic/?TopicId=101830576', +            'md5': 'c31a39e6f988d188252eae7af0ef09c9', +            'info_dict': { +                'id': '101830576', +                'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)', +                'ext': 'mp3', +                'location': 'Belgium', +            }, +        }, +    ] + + +class TuneInShortenerIE(InfoExtractor): +    IE_NAME = 'tunein:shortener' +    IE_DESC = False  # Do not list +    _VALID_URL = r'https?://tun\.in/(?P<id>[A-Za-z0-9]+)' + +    _TEST = { +        # test redirection +        'url': 'http://tun.in/ser7s', +        'info_dict': { +            'id': '34682', +            'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', +            'ext': 'mp3', +            'location': 'Tacoma, WA', +        }, +        'params': { +            'skip_download': True,  # live stream +        }, +    } + +    def _real_extract(self, url): +        redirect_id = self._match_id(url) +        # The server doesn't support HEAD requests +        urlh = self._request_webpage( +            url, redirect_id, note='Downloading redirect page') +        url = urlh.geturl() +        self.to_screen('Following redirect: %s' % url) +        return self.url_result(url) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 811ee197d..129668a99 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -86,9 +86,10 @@ class VGTVIE(XstreamIE):          {              # streamType: wasLive              'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', +            'md5': '458f4841239dab414343b50e5af8869c',              'info_dict': {                  'id': '113063', -                'ext': 'mp4', +                'ext': 'flv',                  'title': 'V75 fra Solvalla 30.05.15',                  'description': 'md5:b3743425765355855f88e096acc93231',                  'thumbnail': 're:^https?://.*\.jpg', @@ -97,10 +98,6 @@ class VGTVIE(XstreamIE):                  'upload_date': '20150530',                  'view_count': int,              }, -            'params': { -                # m3u8 download -                'skip_download': True, -            },          },          {              'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more', @@ -160,12 +157,15 @@ class VGTVIE(XstreamIE):                  formats.extend(m3u8_formats)          hds_url = streams.get('hds') -        # wasLive hds are always 404 -        if hds_url and stream_type != 'wasLive': +        if hds_url: +            hdcore_sign = 'hdcore=3.7.0'              f4m_formats = self._extract_f4m_formats( -                hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False) +                hds_url + '?%s' % hdcore_sign, video_id, f4m_id='hds', fatal=False)              if f4m_formats: -                formats.extend(f4m_formats) +                for entry in f4m_formats: +                    # URLs without the extra param induce an 404 error +                    entry.update({'extra_param_to_segment_url': hdcore_sign}) +                    formats.append(entry)          mp4_urls = streams.get('pseudostreaming') or []          mp4_url = streams.get('mp4') diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index ca3f20a3d..9a1c377a4 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -279,7 +279,7 @@ class VikiIE(VikiBaseIE):                  if format_id == 'm3u8':                      m3u8_formats = self._extract_m3u8_formats(                          format_dict['url'], video_id, 'mp4', 'm3u8_native', -                        m3u8_id='m3u8-%s' % protocol, fatal=None) +                        m3u8_id='m3u8-%s' % protocol, fatal=False)                      if m3u8_formats:                          formats.extend(m3u8_formats)                  else: diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 9a3331a69..92c12bac6 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -10,106 +10,16 @@ from ..utils import (      unified_strdate,      OnDemandPagedList,      xpath_text, +    determine_ext, +    qualities, +    float_or_none,  ) -def extract_from_xml_url(ie, video_id, xml_url): -    doc = ie._download_xml( -        xml_url, video_id, -        note='Downloading video info', -        errnote='Failed to download video info') - -    title = doc.find('.//information/title').text -    description = xpath_text(doc, './/information/detail', 'description') -    duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) -    uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') -    uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') -    upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) - -    def xml_to_format(fnode): -        video_url = fnode.find('url').text -        is_available = 'http://www.metafilegenerator' not in video_url - -        format_id = fnode.attrib['basetype'] -        format_m = re.match(r'''(?x) -            (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ -            (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) -        ''', format_id) - -        ext = format_m.group('container') -        proto = format_m.group('proto').lower() - -        quality = xpath_text(fnode, './quality', 'quality') -        abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) -        vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) - -        width = int_or_none(xpath_text(fnode, './width', 'width')) -        height = int_or_none(xpath_text(fnode, './height', 'height')) - -        filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) - -        format_note = '' -        if not format_note: -            format_note = None - -        return { -            'format_id': format_id + '-' + quality, -            'url': video_url, -            'ext': ext, -            'acodec': format_m.group('acodec'), -            'vcodec': format_m.group('vcodec'), -            'abr': abr, -            'vbr': vbr, -            'width': width, -            'height': height, -            'filesize': filesize, -            'format_note': format_note, -            'protocol': proto, -            '_available': is_available, -        } - -    def xml_to_thumbnails(fnode): -        thumbnails = [] -        for node in fnode: -            thumbnail_url = node.text -            if not thumbnail_url: -                continue -            thumbnail = { -                'url': thumbnail_url, -            } -            if 'key' in node.attrib: -                m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) -                if m: -                    thumbnail['width'] = int(m.group(1)) -                    thumbnail['height'] = int(m.group(2)) -            thumbnails.append(thumbnail) -        return thumbnails - -    thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) - -    format_nodes = doc.findall('.//formitaeten/formitaet') -    formats = list(filter( -        lambda f: f['_available'], -        map(xml_to_format, format_nodes))) -    ie._sort_formats(formats) - -    return { -        'id': video_id, -        'title': title, -        'description': description, -        'duration': duration, -        'thumbnails': thumbnails, -        'uploader': uploader, -        'uploader_id': uploader_id, -        'upload_date': upload_date, -        'formats': formats, -    } - -  class ZDFIE(InfoExtractor):      _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' -    _TEST = { +    _TESTS = [{          'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',          'info_dict': {              'id': '2037704', @@ -122,12 +32,163 @@ class ZDFIE(InfoExtractor):              'upload_date': '20131127',          },          'skip': 'Videos on ZDF.de are depublicised in short order', -    } +    }] + +    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): +        param_groups = {} +        for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): +            group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace')) +            params = {} +            for param in param_group: +                params[param.get('name')] = param.get('value') +            param_groups[group_id] = params + +        formats = [] +        for video in smil.findall(self._xpath_ns('.//video', namespace)): +            src = video.get('src') +            if not src: +                continue +            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) +            group_id = video.get('paramGroup') +            param_group = param_groups[group_id] +            for proto in param_group['protocols'].split(','): +                formats.append({ +                    'url': '%s://%s' % (proto, param_group['host']), +                    'app': param_group['app'], +                    'play_path': src, +                    'ext': 'flv', +                    'format_id': '%s-%d' % (proto, bitrate), +                    'tbr': bitrate, +                    'protocol': proto, +                }) +        self._sort_formats(formats) +        return formats + +    def extract_from_xml_url(self, video_id, xml_url): +        doc = self._download_xml( +            xml_url, video_id, +            note='Downloading video info', +            errnote='Failed to download video info') + +        title = doc.find('.//information/title').text +        description = xpath_text(doc, './/information/detail', 'description') +        duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) +        uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') +        uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') +        upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) + +        def xml_to_thumbnails(fnode): +            thumbnails = [] +            for node in fnode: +                thumbnail_url = node.text +                if not thumbnail_url: +                    continue +                thumbnail = { +                    'url': thumbnail_url, +                } +                if 'key' in node.attrib: +                    m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) +                    if m: +                        thumbnail['width'] = int(m.group(1)) +                        thumbnail['height'] = int(m.group(2)) +                thumbnails.append(thumbnail) +            return thumbnails + +        thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) + +        format_nodes = doc.findall('.//formitaeten/formitaet') +        quality = qualities(['veryhigh', 'high', 'med', 'low']) + +        def get_quality(elem): +            return quality(xpath_text(elem, 'quality')) +        format_nodes.sort(key=get_quality) +        format_ids = [] +        formats = [] +        for fnode in format_nodes: +            video_url = fnode.find('url').text +            is_available = 'http://www.metafilegenerator' not in video_url +            if not is_available: +                continue +            format_id = fnode.attrib['basetype'] +            quality = xpath_text(fnode, './quality', 'quality') +            format_m = re.match(r'''(?x) +                (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ +                (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) +            ''', format_id) + +            ext = determine_ext(video_url, None) or format_m.group('container') +            if ext not in ('smil', 'f4m', 'm3u8'): +                format_id = format_id + '-' + quality +            if format_id in format_ids: +                continue + +            if ext == 'meta': +                continue +            elif ext == 'smil': +                smil_formats = self._extract_smil_formats( +                    video_url, video_id, fatal=False) +                if smil_formats: +                    formats.extend(smil_formats) +            elif ext == 'm3u8': +                m3u8_formats = self._extract_m3u8_formats( +                    video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) +                if m3u8_formats: +                    formats.extend(m3u8_formats) +            elif ext == 'f4m': +                f4m_formats = self._extract_f4m_formats( +                    video_url, video_id, f4m_id='hds', fatal=False) +                if f4m_formats: +                    formats.extend(f4m_formats) +            else: +                proto = format_m.group('proto').lower() + +                abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) +                vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) + +                width = int_or_none(xpath_text(fnode, './width', 'width')) +                height = int_or_none(xpath_text(fnode, './height', 'height')) + +                filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) + +                format_note = '' +                if not format_note: +                    format_note = None + +                formats.append({ +                    'format_id': format_id, +                    'url': video_url, +                    'ext': ext, +                    'acodec': format_m.group('acodec'), +                    'vcodec': format_m.group('vcodec'), +                    'abr': abr, +                    'vbr': vbr, +                    'width': width, +                    'height': height, +                    'filesize': filesize, +                    'format_note': format_note, +                    'protocol': proto, +                    '_available': is_available, +                }) +            format_ids.append(format_id) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'thumbnails': thumbnails, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'upload_date': upload_date, +            'formats': formats, +        }      def _real_extract(self, url):          video_id = self._match_id(url)          xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id -        return extract_from_xml_url(self, video_id, xml_url) +        return self.extract_from_xml_url(video_id, xml_url)  class ZDFChannelIE(InfoExtractor): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1737ac5f6..0ed6c45c8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -773,11 +773,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):                      raise original_ioerror              resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)              resp.msg = old_resp.msg +            del resp.headers['Content-encoding']          # deflate          if resp.headers.get('Content-encoding', '') == 'deflate':              gz = io.BytesIO(self.deflate(resp.read()))              resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)              resp.msg = old_resp.msg +            del resp.headers['Content-encoding']          # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see          # https://github.com/rg3/youtube-dl/issues/6457).          if 300 <= resp.code < 400: | 
