aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py15
-rw-r--r--youtube_dl/extractor/abc7news.py1
-rw-r--r--youtube_dl/extractor/cspan.py13
-rw-r--r--youtube_dl/extractor/dcn.py178
-rw-r--r--youtube_dl/extractor/esri.py2
-rw-r--r--youtube_dl/extractor/facebook.py20
-rw-r--r--youtube_dl/extractor/iqiyi.py11
-rw-r--r--youtube_dl/extractor/jwplatform.py3
-rw-r--r--youtube_dl/extractor/livestream.py2
-rw-r--r--youtube_dl/extractor/phoenix.py9
-rw-r--r--youtube_dl/extractor/rai.py4
-rw-r--r--youtube_dl/extractor/tunein.py191
-rw-r--r--youtube_dl/extractor/vgtv.py18
-rw-r--r--youtube_dl/extractor/viki.py2
-rw-r--r--youtube_dl/extractor/zdf.py253
15 files changed, 501 insertions, 221 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 165835f63..971047ad4 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -135,7 +135,12 @@ from .dailymotion import (
)
from .daum import DaumIE
from .dbtv import DBTVIE
-from .dcn import DCNIE
+from .dcn import (
+ DCNIE,
+ DCNVideoIE,
+ DCNLiveIE,
+ DCNSeasonIE,
+)
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .democracynow import DemocracynowIE
@@ -703,7 +708,13 @@ from .tube8 import Tube8IE
from .tubitv import TubiTvIE
from .tudou import TudouIE
from .tumblr import TumblrIE
-from .tunein import TuneInIE
+from .tunein import (
+ TuneInClipIE,
+ TuneInStationIE,
+ TuneInProgramIE,
+ TuneInTopicIE,
+ TuneInShortenerIE,
+)
from .turbo import TurboIE
from .tutv import TutvIE
from .tv2 import (
diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py
index c04949c21..122dc9099 100644
--- a/youtube_dl/extractor/abc7news.py
+++ b/youtube_dl/extractor/abc7news.py
@@ -44,7 +44,6 @@ class Abc7NewsIE(InfoExtractor):
'contentURL', webpage, 'm3u8 url', fatal=True)
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
- self._sort_formats(formats)
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index 7b685d157..b3ee67018 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -58,18 +58,23 @@ class CSpanIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+ video_type = None
webpage = self._download_webpage(url, video_id)
- matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage)
- if matches:
+ # We first look for clipid, because clipprog always appears before
+ patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
+ results = list(filter(None, (re.search(p, webpage) for p in patterns)))
+ if results:
+ matches = results[0]
video_type, video_id = matches.groups()
- if video_type == 'prog':
- video_type = 'program'
+ video_type = 'clip' if video_type == 'id' else 'program'
else:
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
if senate_isvp_url:
title = self._og_search_title(webpage)
surl = smuggle_url(senate_isvp_url, {'force_title': title})
return self.url_result(surl, 'SenateISVP', video_id, title)
+ if video_type is None or video_id is None:
+ raise ExtractorError('unable to find video id and type')
def get_text_attr(d, attr):
return d.get(attr, {}).get('#text')
diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py
index 9737cff14..0d140f12f 100644
--- a/youtube_dl/extractor/dcn.py
+++ b/youtube_dl/extractor/dcn.py
@@ -1,26 +1,89 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+import base64
+
from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
+ smuggle_url,
+ unsmuggle_url,
)
class DCNIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
+
+ def _real_extract(self, url):
+ show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
+ if video_id and int(video_id) > 0:
+ return self.url_result(
+ 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo')
+ elif season_id and int(season_id) > 0:
+ return self.url_result(smuggle_url(
+ 'http://www.dcndigital.ae/program/season/%s' % season_id,
+ {'show_id': show_id}), 'DCNSeason')
+ else:
+ return self.url_result(
+ 'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason')
+
+
+class DCNBaseIE(InfoExtractor):
+ def _extract_video_info(self, video_data, video_id, is_live):
+ title = video_data.get('title_en') or video_data['title_ar']
+ img = video_data.get('img')
+ thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
+ duration = int_or_none(video_data.get('duration'))
+ description = video_data.get('description_en') or video_data.get('description_ar')
+ timestamp = parse_iso8601(video_data.get('create_time'), ' ')
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'is_live': is_live,
+ }
+
+ def _extract_video_formats(self, webpage, video_id, entry_protocol):
+ formats = []
+ m3u8_url = self._html_search_regex(
+ r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False)
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+
+ rtsp_url = self._search_regex(
+ r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
+ if rtsp_url:
+ formats.append({
+ 'url': rtsp_url,
+ 'format_id': 'rtsp',
+ })
+
+ self._sort_formats(formats)
+ return formats
+
+
+class DCNVideoIE(DCNBaseIE):
+ IE_NAME = 'dcn:video'
+ _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887',
+ 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
'info_dict':
{
'id': '17375',
'ext': 'mp4',
'title': 'رحلة العمر : الحلقة 1',
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
- 'thumbnail': 're:^https?://.*\.jpg$',
'duration': 2041,
'timestamp': 1227504126,
'upload_date': '20081124',
@@ -37,46 +100,95 @@ class DCNIE(InfoExtractor):
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'})
-
- video = self._download_json(request, video_id)
- title = video.get('title_en') or video['title_ar']
+ video_data = self._download_json(request, video_id)
+ info = self._extract_video_info(video_data, video_id, False)
webpage = self._download_webpage(
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
compat_urllib_parse.urlencode({
- 'id': video['id'],
- 'user_id': video['user_id'],
- 'signature': video['signature'],
+ 'id': video_data['id'],
+ 'user_id': video_data['user_id'],
+ 'signature': video_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
}), video_id)
+ info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
+ return info
- m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
- rtsp_url = self._search_regex(
- r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
- if rtsp_url:
- formats.append({
- 'url': rtsp_url,
- 'format_id': 'rtsp',
+class DCNLiveIE(DCNBaseIE):
+ IE_NAME = 'dcn:live'
+ _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ request = sanitized_Request(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
+ headers={'Origin': 'http://www.dcndigital.ae'})
+
+ channel_data = self._download_json(request, channel_id)
+ info = self._extract_video_info(channel_data, channel_id, True)
+
+ webpage = self._download_webpage(
+ 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
+ compat_urllib_parse.urlencode({
+ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
+ 'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
+ 'signature': channel_data['signature'],
+ 'countries': 'Q0M=',
+ 'filter': 'DENY',
+ }), channel_id)
+ info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
+ return info
+
+
+class DCNSeasonIE(InfoExtractor):
+ IE_NAME = 'dcn:season'
+ _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
+ _TEST = {
+ 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
+ 'info_dict':
+ {
+ 'id': '7910',
+ 'title': 'محاضرات الشيخ الشعراوي',
+ },
+ 'playlist_mincount': 27,
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ show_id, season_id = re.match(self._VALID_URL, url).groups()
+
+ data = {}
+ if season_id:
+ data['season'] = season_id
+ show_id = smuggled_data.get('show_id')
+ if show_id is None:
+ request = sanitized_Request(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
+ headers={'Origin': 'http://www.dcndigital.ae'})
+ season = self._download_json(request, season_id)
+ show_id = season['id']
+ data['show_id'] = show_id
+ request = sanitized_Request(
+ 'http://admin.mangomolo.com/analytics/index.php/plus/show',
+ compat_urllib_parse.urlencode(data),
+ {
+ 'Origin': 'http://www.dcndigital.ae',
+ 'Content-Type': 'application/x-www-form-urlencoded'
})
- self._sort_formats(formats)
+ show = self._download_json(request, show_id)
+ if not season_id:
+ season_id = show['default_season']
+ for season in show['seasons']:
+ if season['id'] == season_id:
+ title = season.get('title_en') or season['title_ar']
- img = video.get('img')
- thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
- duration = int_or_none(video.get('duration'))
- description = video.get('description_en') or video.get('description_ar')
- timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
+ entries = []
+ for video in show['videos']:
+ entries.append(self.url_result(
+ 'http://www.dcndigital.ae/media/%s' % video['id'], 'DCNVideo'))
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- }
+ return self.playlist_result(entries, season_id, title)
diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py
index bf5d2019f..d4205d7fb 100644
--- a/youtube_dl/extractor/esri.py
+++ b/youtube_dl/extractor/esri.py
@@ -61,7 +61,7 @@ class EsriVideoIE(InfoExtractor):
webpage, 'duration', fatal=False))
upload_date = unified_strdate(self._html_search_meta(
- 'last-modified', webpage, 'upload date', fatal=None))
+ 'last-modified', webpage, 'upload date', fatal=False))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 39c481068..5e43f2359 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -74,7 +74,7 @@ class FacebookIE(InfoExtractor):
return
login_page_req = sanitized_Request(self._LOGIN_URL)
- login_page_req.add_header('Cookie', 'locale=en_US')
+ self._set_cookie('facebook.com', 'locale', 'en_US')
login_page = self._download_webpage(login_page_req, None,
note='Downloading login page',
errnote='Unable to download login page')
@@ -100,13 +100,25 @@ class FacebookIE(InfoExtractor):
login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page')
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
+ login_results, 'login error', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
+ fb_dtsg = self._search_regex(
+ r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
+ h = self._search_regex(
+ r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
+
+ if not fb_dtsg or not h:
+ return
+
check_form = {
- 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
- 'h': self._search_regex(
- r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
+ 'fb_dtsg': fb_dtsg,
+ 'h': h,
'name_action_selected': 'dont_save',
}
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py
index c3731a110..66a70a181 100644
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -16,7 +16,7 @@ class IqiyiIE(InfoExtractor):
IE_NAME = 'iqiyi'
IE_DESC = '爱奇艺'
- _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html'
+ _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
_TESTS = [{
'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
@@ -84,6 +84,15 @@ class IqiyiIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://yule.iqiyi.com/pcb.html',
+ 'only_matching': True,
}]
_FORMATS_MAP = [
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index cdc095a79..a92adf2b3 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -44,7 +44,8 @@ class JWPlatformIE(InfoExtractor):
source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl':
- m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
+ m3u8_formats = self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', 'm3u8_native', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif source_type.startswith('audio'):
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 9c8d826c4..688eb2308 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -254,7 +254,7 @@ class LivestreamOriginalIE(InfoExtractor):
'playlist_mincount': 4,
}, {
# live stream
- 'url': 'http://www.livestream.com/znsbahamas',
+ 'url': 'http://original.livestream.com/znsbahamas',
'only_matching': True,
}]
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py
index 46cebc0d7..6ce2ec19d 100644
--- a/youtube_dl/extractor/phoenix.py
+++ b/youtube_dl/extractor/phoenix.py
@@ -1,10 +1,9 @@
from __future__ import unicode_literals
-from .common import InfoExtractor
-from .zdf import extract_from_xml_url
+from .zdf import ZDFIE
-class PhoenixIE(InfoExtractor):
+class PhoenixIE(ZDFIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
(?:
phoenix/die_sendungen/(?:[^/]+/)?
@@ -41,5 +40,5 @@ class PhoenixIE(InfoExtractor):
r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
webpage, 'internal video ID')
- api_url = 'http://www.phoenix.de/php/zdfplayer-v1.3/data/beitragsDetails.php?ak=web&id=%s' % internal_id
- return extract_from_xml_url(self, video_id, api_url)
+ api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id
+ return self.extract_from_xml_url(video_id, api_url)
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index 14f1ccbb4..278b1d2bf 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -118,13 +118,13 @@ class RaiTVIE(InfoExtractor):
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
- fatal=None)
+ fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
elif ext == 'f4m':
f4m_formats = self._extract_f4m_formats(
media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
- video_id, f4m_id='hds', fatal=None)
+ video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
elif ext == 'stl':
diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py
index b6b1f2568..8322cc14d 100644
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dl/extractor/tunein.py
@@ -2,74 +2,33 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import ExtractorError
+from ..compat import compat_urlparse
-class TuneInIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?
- (?:
- tunein\.com/
- (?:
- radio/.*?-s|
- station/.*?StationId\=
- )(?P<id>[0-9]+)
- |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
- )
- '''
- _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
-
- _INFO_DICT = {
- 'id': '34682',
- 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
- 'ext': 'aac',
- 'thumbnail': 're:^https?://.*\.png$',
- 'location': 'Tacoma, WA',
- }
- _TESTS = [
- {
- 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
- 'info_dict': _INFO_DICT,
- 'params': {
- 'skip_download': True, # live stream
- },
- },
- { # test redirection
- 'url': 'http://tun.in/ser7s',
- 'info_dict': _INFO_DICT,
- 'params': {
- 'skip_download': True, # live stream
- },
- },
- ]
+class TuneInBaseIE(InfoExtractor):
+ _API_BASE_URL = 'http://tunein.com/tuner/tune/'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- redirect_id = mobj.group('redirect_id')
- if redirect_id:
- # The server doesn't support HEAD requests
- urlh = self._request_webpage(
- url, redirect_id, note='Downloading redirect page')
- url = urlh.geturl()
- self.to_screen('Following redirect: %s' % url)
- mobj = re.match(self._VALID_URL, url)
- station_id = mobj.group('id')
-
- station_info = self._download_json(
- self._API_URL_TEMPLATE.format(station_id),
- station_id, note='Downloading station JSON')
-
- title = station_info['Title']
- thumbnail = station_info.get('Logo')
- location = station_info.get('Location')
- streams_url = station_info.get('StreamUrl')
+ content_id = self._match_id(url)
+
+ content_info = self._download_json(
+ self._API_BASE_URL + self._API_URL_QUERY % content_id,
+ content_id, note='Downloading JSON metadata')
+
+ title = content_info['Title']
+ thumbnail = content_info.get('Logo')
+ location = content_info.get('Location')
+ streams_url = content_info.get('StreamUrl')
if not streams_url:
- raise ExtractorError('No downloadable streams found',
- expected=True)
+ raise ExtractorError('No downloadable streams found', expected=True)
+ if not streams_url.startswith('http://'):
+ streams_url = compat_urlparse.urljoin(url, streams_url)
+
stream_data = self._download_webpage(
- streams_url, station_id, note='Downloading stream data')
+ streams_url, content_id, note='Downloading stream data')
streams = json.loads(self._search_regex(
r'\((.*)\);', stream_data, 'stream info'))['Streams']
@@ -97,10 +56,122 @@ class TuneInIE(InfoExtractor):
self._sort_formats(formats)
return {
- 'id': station_id,
+ 'id': content_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'location': location,
'is_live': is_live,
}
+
+
+class TuneInClipIE(TuneInBaseIE):
+ IE_NAME = 'tunein:clip'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s'
+
+ _TESTS = [
+ {
+ 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816',
+ 'md5': '99f00d772db70efc804385c6b47f4e77',
+ 'info_dict': {
+ 'id': '816',
+ 'title': '32m',
+ 'ext': 'mp3',
+ },
+ },
+ ]
+
+
+class TuneInStationIE(TuneInBaseIE):
+ IE_NAME = 'tunein:station'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId\=)(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=Station&stationId=%s'
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url)
+
+ _TESTS = [
+ {
+ 'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
+ 'info_dict': {
+ 'id': '34682',
+ 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+ 'ext': 'mp3',
+ 'location': 'Tacoma, WA',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ },
+ ]
+
+
+class TuneInProgramIE(TuneInBaseIE):
+ IE_NAME = 'tunein:program'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId\=)(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=Program&programId=%s'
+
+ _TESTS = [
+ {
+ 'url': 'http://tunein.com/radio/Jazz-24-p2506/',
+ 'info_dict': {
+ 'id': '2506',
+ 'title': 'Jazz 24 on 91.3 WUKY-HD3',
+ 'ext': 'mp3',
+ 'location': 'Lexington, KY',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ },
+ ]
+
+
+class TuneInTopicIE(TuneInBaseIE):
+ IE_NAME = 'tunein:topic'
+ _VALID_URL = r'https?://(?:www\.)?tunein\.com/topic/.*?TopicId\=(?P<id>\d+)'
+ _API_URL_QUERY = '?tuneType=Topic&topicId=%s'
+
+ _TESTS = [
+ {
+ 'url': 'http://tunein.com/topic/?TopicId=101830576',
+ 'md5': 'c31a39e6f988d188252eae7af0ef09c9',
+ 'info_dict': {
+ 'id': '101830576',
+ 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)',
+ 'ext': 'mp3',
+ 'location': 'Belgium',
+ },
+ },
+ ]
+
+
+class TuneInShortenerIE(InfoExtractor):
+ IE_NAME = 'tunein:shortener'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://tun\.in/(?P<id>[A-Za-z0-9]+)'
+
+ _TEST = {
+ # test redirection
+ 'url': 'http://tun.in/ser7s',
+ 'info_dict': {
+ 'id': '34682',
+ 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
+ 'ext': 'mp3',
+ 'location': 'Tacoma, WA',
+ },
+ 'params': {
+ 'skip_download': True, # live stream
+ },
+ }
+
+ def _real_extract(self, url):
+ redirect_id = self._match_id(url)
+ # The server doesn't support HEAD requests
+ urlh = self._request_webpage(
+ url, redirect_id, note='Downloading redirect page')
+ url = urlh.geturl()
+ self.to_screen('Following redirect: %s' % url)
+ return self.url_result(url)
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 811ee197d..129668a99 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -86,9 +86,10 @@ class VGTVIE(XstreamIE):
{
# streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
+ 'md5': '458f4841239dab414343b50e5af8869c',
'info_dict': {
'id': '113063',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg',
@@ -97,10 +98,6 @@ class VGTVIE(XstreamIE):
'upload_date': '20150530',
'view_count': int,
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
},
{
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
@@ -160,12 +157,15 @@ class VGTVIE(XstreamIE):
formats.extend(m3u8_formats)
hds_url = streams.get('hds')
- # wasLive hds are always 404
- if hds_url and stream_type != 'wasLive':
+ if hds_url:
+ hdcore_sign = 'hdcore=3.7.0'
f4m_formats = self._extract_f4m_formats(
- hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
+ hds_url + '?%s' % hdcore_sign, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
- formats.extend(f4m_formats)
+ for entry in f4m_formats:
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.append(entry)
mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4')
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index ca3f20a3d..9a1c377a4 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -279,7 +279,7 @@ class VikiIE(VikiBaseIE):
if format_id == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_dict['url'], video_id, 'mp4', 'm3u8_native',
- m3u8_id='m3u8-%s' % protocol, fatal=None)
+ m3u8_id='m3u8-%s' % protocol, fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 9a3331a69..92c12bac6 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -10,106 +10,16 @@ from ..utils import (
unified_strdate,
OnDemandPagedList,
xpath_text,
+ determine_ext,
+ qualities,
+ float_or_none,
)
-def extract_from_xml_url(ie, video_id, xml_url):
- doc = ie._download_xml(
- xml_url, video_id,
- note='Downloading video info',
- errnote='Failed to download video info')
-
- title = doc.find('.//information/title').text
- description = xpath_text(doc, './/information/detail', 'description')
- duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
- uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
- uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
- upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
-
- def xml_to_format(fnode):
- video_url = fnode.find('url').text
- is_available = 'http://www.metafilegenerator' not in video_url
-
- format_id = fnode.attrib['basetype']
- format_m = re.match(r'''(?x)
- (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
- (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
- ''', format_id)
-
- ext = format_m.group('container')
- proto = format_m.group('proto').lower()
-
- quality = xpath_text(fnode, './quality', 'quality')
- abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
- vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
-
- width = int_or_none(xpath_text(fnode, './width', 'width'))
- height = int_or_none(xpath_text(fnode, './height', 'height'))
-
- filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
-
- format_note = ''
- if not format_note:
- format_note = None
-
- return {
- 'format_id': format_id + '-' + quality,
- 'url': video_url,
- 'ext': ext,
- 'acodec': format_m.group('acodec'),
- 'vcodec': format_m.group('vcodec'),
- 'abr': abr,
- 'vbr': vbr,
- 'width': width,
- 'height': height,
- 'filesize': filesize,
- 'format_note': format_note,
- 'protocol': proto,
- '_available': is_available,
- }
-
- def xml_to_thumbnails(fnode):
- thumbnails = []
- for node in fnode:
- thumbnail_url = node.text
- if not thumbnail_url:
- continue
- thumbnail = {
- 'url': thumbnail_url,
- }
- if 'key' in node.attrib:
- m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
- if m:
- thumbnail['width'] = int(m.group(1))
- thumbnail['height'] = int(m.group(2))
- thumbnails.append(thumbnail)
- return thumbnails
-
- thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
-
- format_nodes = doc.findall('.//formitaeten/formitaet')
- formats = list(filter(
- lambda f: f['_available'],
- map(xml_to_format, format_nodes)))
- ie._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
- 'formats': formats,
- }
-
-
class ZDFIE(InfoExtractor):
_VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
'info_dict': {
'id': '2037704',
@@ -122,12 +32,163 @@ class ZDFIE(InfoExtractor):
'upload_date': '20131127',
},
'skip': 'Videos on ZDF.de are depublicised in short order',
- }
+ }]
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ param_groups = {}
+ for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
+ group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
+ params = {}
+ for param in param_group:
+ params[param.get('name')] = param.get('value')
+ param_groups[group_id] = params
+
+ formats = []
+ for video in smil.findall(self._xpath_ns('.//video', namespace)):
+ src = video.get('src')
+ if not src:
+ continue
+ bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+ group_id = video.get('paramGroup')
+ param_group = param_groups[group_id]
+ for proto in param_group['protocols'].split(','):
+ formats.append({
+ 'url': '%s://%s' % (proto, param_group['host']),
+ 'app': param_group['app'],
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': '%s-%d' % (proto, bitrate),
+ 'tbr': bitrate,
+ 'protocol': proto,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def extract_from_xml_url(self, video_id, xml_url):
+ doc = self._download_xml(
+ xml_url, video_id,
+ note='Downloading video info',
+ errnote='Failed to download video info')
+
+ title = doc.find('.//information/title').text
+ description = xpath_text(doc, './/information/detail', 'description')
+ duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
+ uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
+ uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
+ upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
+
+ def xml_to_thumbnails(fnode):
+ thumbnails = []
+ for node in fnode:
+ thumbnail_url = node.text
+ if not thumbnail_url:
+ continue
+ thumbnail = {
+ 'url': thumbnail_url,
+ }
+ if 'key' in node.attrib:
+ m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
+ if m:
+ thumbnail['width'] = int(m.group(1))
+ thumbnail['height'] = int(m.group(2))
+ thumbnails.append(thumbnail)
+ return thumbnails
+
+ thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
+
+ format_nodes = doc.findall('.//formitaeten/formitaet')
+ quality = qualities(['veryhigh', 'high', 'med', 'low'])
+
+ def get_quality(elem):
+ return quality(xpath_text(elem, 'quality'))
+ format_nodes.sort(key=get_quality)
+ format_ids = []
+ formats = []
+ for fnode in format_nodes:
+ video_url = fnode.find('url').text
+ is_available = 'http://www.metafilegenerator' not in video_url
+ if not is_available:
+ continue
+ format_id = fnode.attrib['basetype']
+ quality = xpath_text(fnode, './quality', 'quality')
+ format_m = re.match(r'''(?x)
+ (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
+ (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
+ ''', format_id)
+
+ ext = determine_ext(video_url, None) or format_m.group('container')
+ if ext not in ('smil', 'f4m', 'm3u8'):
+ format_id = format_id + '-' + quality
+ if format_id in format_ids:
+ continue
+
+ if ext == 'meta':
+ continue
+ elif ext == 'smil':
+ smil_formats = self._extract_smil_formats(
+ video_url, video_id, fatal=False)
+ if smil_formats:
+ formats.extend(smil_formats)
+ elif ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ video_url, video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ else:
+ proto = format_m.group('proto').lower()
+
+ abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
+ vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
+
+ width = int_or_none(xpath_text(fnode, './width', 'width'))
+ height = int_or_none(xpath_text(fnode, './height', 'height'))
+
+ filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
+
+ format_note = ''
+ if not format_note:
+ format_note = None
+
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'acodec': format_m.group('acodec'),
+ 'vcodec': format_m.group('vcodec'),
+ 'abr': abr,
+ 'vbr': vbr,
+ 'width': width,
+ 'height': height,
+ 'filesize': filesize,
+ 'format_note': format_note,
+ 'protocol': proto,
+ '_available': is_available,
+ })
+ format_ids.append(format_id)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
- return extract_from_xml_url(self, video_id, xml_url)
+ return self.extract_from_xml_url(video_id, xml_url)
class ZDFChannelIE(InfoExtractor):