diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/abcotvs.py (renamed from youtube_dl/extractor/abc7news.py) | 53 | ||||
-rw-r--r-- | youtube_dl/extractor/bilibili.py | 33 | ||||
-rw-r--r-- | youtube_dl/extractor/cartoonnetwork.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/cctv.py | 53 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/gamestar.py | 51 | ||||
-rw-r--r-- | youtube_dl/extractor/lci.py | 24 | ||||
-rw-r--r-- | youtube_dl/extractor/moevideo.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/nick.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/prosiebensat1.py | 218 | ||||
-rw-r--r-- | youtube_dl/extractor/puls4.py | 109 | ||||
-rw-r--r-- | youtube_dl/extractor/tbs.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/trutv.py | 35 | ||||
-rw-r--r-- | youtube_dl/extractor/turner.py | 26 | ||||
-rw-r--r-- | youtube_dl/extractor/tvnoe.py | 49 | ||||
-rw-r--r-- | youtube_dl/extractor/tvplay.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/wat.py | 70 |
17 files changed, 498 insertions, 278 deletions
diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abcotvs.py index c04949c21..53a900e50 100644 --- a/youtube_dl/extractor/abc7news.py +++ b/youtube_dl/extractor/abcotvs.py @@ -1,13 +1,18 @@ +# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + int_or_none, + parse_iso8601, +) -class Abc7NewsIE(InfoExtractor): - _VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)' +class ABCOTVSIE(InfoExtractor): + IE_NAME = 'abcotvs' + _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)' _TESTS = [ { 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', @@ -15,7 +20,7 @@ class Abc7NewsIE(InfoExtractor): 'id': '472581', 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', 'ext': 'mp4', - 'title': 'East Bay museum celebrates history of synthesized music', + 'title': 'East Bay museum celebrates vintage synthesizers', 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10', 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1421123075, @@ -41,7 +46,7 @@ class Abc7NewsIE(InfoExtractor): webpage = self._download_webpage(url, display_id) m3u8 = self._html_search_meta( - 'contentURL', webpage, 'm3u8 url', fatal=True) + 'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0] formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') self._sort_formats(formats) @@ -66,3 +71,41 @@ class Abc7NewsIE(InfoExtractor): 'uploader': uploader, 'formats': formats, } + + +class ABCOTVSClipsIE(InfoExtractor): + IE_NAME = 'abcotvs:clips' + _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)' + _TEST = { + 'url': 'https://clips.abcotvs.com/kabc/video/214814', + 'info_dict': { + 'id': '214814', + 'ext': 'mp4', + 'title': 'SpaceX launch pad explosion destroys rocket, satellite', + 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b', + 'upload_date': '20160901', + 'timestamp': 1472756695, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0] + title = video_data['title'] + formats = self._extract_m3u8_formats( + video_data['videoURL'].split('?')[0], video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailURL'), + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('pubDate')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index a332fbb69..8fa96d3a0 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -10,11 +10,12 @@ from ..utils import ( int_or_none, float_or_none, unified_timestamp, + urlencode_postdata, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -77,6 +78,17 @@ class BiliBiliIE(InfoExtractor): 'skip_download': True, }, 'expected_warnings': ['upload time'], + }, { + 'url': 'http://bangumi.bilibili.com/anime/v/40068', + 'md5': '08d539a0884f3deb7b698fb13ba69696', + 'info_dict': { + 'id': '40068', + 'ext': 'mp4', + 'duration': 1402.357, + 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + 'thumbnail': 're:^http?://.+\.jpg', + }, }] _APP_KEY = '6f90a59ac58a4123' @@ -84,13 +96,19 @@ class BiliBiliIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - cid = compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] + if 'anime/v' not in url: + cid = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters'))['cid'][0] + else: + js = self._download_json( + 'http://bangumi.bilibili.com/web_api/get_source', video_id, + data=urlencode_postdata({'episode_id': video_id}), + headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}) + cid = js['result']['cid'] payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() @@ -125,6 +143,7 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) + thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) # TODO 'view_count' requires deobfuscating Javascript info = { @@ -132,7 +151,7 @@ class BiliBiliIE(InfoExtractor): 'title': title, 'description': description, 'timestamp': timestamp, - 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'thumbnail': thumbnail, 'duration': float_or_none(video_info.get('timelength'), scale=1000), } diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index b3f30b1ca..688a6375e 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -30,7 +30,7 @@ class CartoonNetworkIE(TurnerBaseIE): return self._extract_cvp_info( 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'secure': { - 'media_src': 'http://apple-secure.cdn.turner.com/toon/big', + 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', }, }) diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py new file mode 100644 index 000000000..72a72cb73 --- /dev/null +++ b/youtube_dl/extractor/cctv.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import float_or_none + + +class CCTVIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:.+?\.)? + (?: + cctv\.(?:com|cn)| + cntv\.cn + )/ + (?: + video/[^/]+/(?P<id>[0-9a-f]{32})| + \d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+) + )''' + _TESTS = [{ + 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', + 'md5': '819c7b49fc3927d529fb4cd555621823', + 'info_dict': { + 'id': '454368eb19ad44a1925bf1eb96140a61', + 'ext': 'mp4', + 'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1', + } + }, { + 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})', + webpage, 'video_id') + api_data = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id) + m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url']) + + return { + 'id': video_id, + 'title': api_data['title'], + 'formats': self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False), + 'duration': float_or_none(api_data.get('video', {}).get('totalLength')), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d511b04bc..522ef7d8b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -5,11 +5,14 @@ from .abc import ( ABCIE, ABCIViewIE, ) -from .abc7news import Abc7NewsIE from .abcnews import ( AbcNewsIE, AbcNewsVideoIE, ) +from .abcotvs import ( + ABCOTVSIE, + ABCOTVSClipsIE, +) from .academicearth import AcademicEarthCourseIE from .acast import ( ACastIE, @@ -143,6 +146,7 @@ from .cbsnews import ( ) from .cbssports import CBSSportsIE from .ccc import CCCIE +from .cctv import CCTVIE from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE @@ -421,6 +425,7 @@ from .kuwo import ( ) from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .lci import LCIIE from .lcp import ( LcpPlayIE, LcpIE, @@ -888,6 +893,7 @@ from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trollvids import TrollvidsIE +from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE from .tudou import ( @@ -917,6 +923,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, TVPIE, diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 69058a583..341e72733 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -1,14 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, - parse_duration, - str_to_int, - unified_strdate, + remove_end, ) @@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor): 'id': '76110', 'ext': 'mp4', 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil', - 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.', - 'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg', + 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1406542020, 'upload_date': '20140728', 'duration': 17 } @@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_title = self._og_search_title(webpage) - title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title) - url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id - description = self._og_search_description(webpage).strip() - - thumbnail = self._proto_relative_url( - self._og_search_thumbnail(webpage), scheme='http:') - - upload_date = unified_strdate(self._html_search_regex( - r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ', - webpage, 'upload_date', fatal=False)) - - duration = parse_duration(self._html_search_regex( - r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration', - fatal=False)) - - view_count = str_to_int(self._html_search_regex( - r' Zuschauer: ([0-9\.]+) ', webpage, - 'view_count', fatal=False)) + # TODO: there are multiple ld+json objects in the webpage, + # while _search_json_ld finds only the first one + json_ld = self._parse_json(self._search_regex( + r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>', + webpage, 'JSON-LD', group='json_ld'), video_id) + info_dict = self._json_ld(json_ld, video_id) + info_dict['title'] = remove_end(info_dict['title'], ' - GameStar') + view_count = json_ld.get('interactionCount') comment_count = int_or_none(self._html_search_regex( - r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count', + r'([0-9]+) Kommentare</span>', webpage, 'comment_count', fatal=False)) - return { + info_dict.update({ 'id': video_id, - 'title': title, 'url': url, 'ext': 'mp4', - 'thumbnail': thumbnail, - 'description': description, - 'upload_date': upload_date, - 'duration': duration, 'view_count': view_count, 'comment_count': comment_count - } + }) + + return info_dict diff --git a/youtube_dl/extractor/lci.py b/youtube_dl/extractor/lci.py new file mode 100644 index 000000000..af34829e7 --- /dev/null +++ b/youtube_dl/extractor/lci.py @@ -0,0 +1,24 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LCIIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' + _TEST = { + 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html', + 'md5': '2fdb2538b884d4d695f9bd2bde137e6c', + 'info_dict': { + 'id': '13244802', + 'ext': 'mp4', + 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting', + 'description': 'md5:a4363e3a960860132f8124b62f4a01c9', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id') + return self.url_result('wat:' + wat_id, 'Wat', wat_id) diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 978d5d5bf..91ee9c4e9 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -35,7 +35,8 @@ class MoeVideoIE(InfoExtractor): 'height': 360, 'duration': 179, 'filesize': 17822500, - } + }, + 'skip': 'Video has been removed', }, { 'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a', diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 64730a624..57cf1ce8e 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -69,13 +69,16 @@ class NickIE(MTVServicesInfoExtractor): class NickDeIE(MTVServicesInfoExtractor): IE_NAME = 'nick.de' - _VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', 'only_matching': True, }, { 'url': 'http://www.nick.de/shows/342-icarly', 'only_matching': True, + }, { + 'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index c6eee3b72..7335dc2af 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -15,7 +15,111 @@ from ..utils import ( ) -class ProSiebenSat1IE(InfoExtractor): +class ProSiebenSat1BaseIE(InfoExtractor): + def _extract_video_info(self, url, clip_id): + client_location = url + + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': self._TOKEN, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'ids': clip_id, + })[0] + + if video.get('is_protected') is True: + raise ExtractorError('This video is DRM protected.', expected=True) + + duration = float_or_none(video.get('duration')) + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + }) + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + formats = [] + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: + continue + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) + self._sort_formats(formats) + + return { + 'duration': duration, + 'formats': formats, + } + + +class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' @@ -188,6 +292,9 @@ class ProSiebenSat1IE(InfoExtractor): }, ] + _TOKEN = 'prosieben' + _SALT = '01!8d8F_)r9]4s[qeuXfP%' + _CLIENT_NAME = 'kolibri-2.0.19-splec4' _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', @@ -234,123 +341,22 @@ class ProSiebenSat1IE(InfoExtractor): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') - - access_token = 'prosieben' - client_name = 'kolibri-2.0.19-splec4' - client_location = url - - video = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos', - clip_id, 'Downloading videos JSON', query={ - 'access_token': access_token, - 'client_location': client_location, - 'client_name': client_name, - 'ids': clip_id, - })[0] - - if video.get('is_protected') is True: - raise ExtractorError('This video is DRM protected.', expected=True) - - duration = float_or_none(video.get('duration')) - source_ids = [compat_str(source['id']) for source in video['sources']] - - g = '01!8d8F_)r9]4s[qeuXfP%' - client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest() - - sources = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, - clip_id, 'Downloading sources JSON', query={ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - }) - server_id = sources['server_id'] - title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title') - - def fix_bitrate(bitrate): - bitrate = int_or_none(bitrate) - if not bitrate: - return None - return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate - - formats = [] - for source_id in source_ids: - client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest() - urls = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, - clip_id, 'Downloading urls JSON', fatal=False, query={ - 'access_token': access_token, - 'client_id': client_id, - 'client_location': client_location, - 'client_name': client_name, - 'server_id': server_id, - 'source_ids': source_id, - }) - if not urls: - continue - if urls.get('status_code') != 0: - raise ExtractorError('This video is unavailable', expected=True) - urls_sources = urls['sources'] - if isinstance(urls_sources, dict): - urls_sources = urls_sources.values() - for source in urls_sources: - source_url = source.get('url') - if not source_url: - continue - protocol = source.get('protocol') - mimetype = source.get('mimetype') - if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': - formats.extend(self._extract_f4m_formats( - source_url, clip_id, f4m_id='hds', fatal=False)) - elif mimetype == 'application/x-mpegURL': - formats.extend(self._extract_m3u8_formats( - source_url, clip_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - tbr = fix_bitrate(source['bitrate']) - if protocol in ('rtmp', 'rtmpe'): - mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) - if not mobj: - continue - path = mobj.group('path') - mp4colon_index = path.rfind('mp4:') - app = path[:mp4colon_index] - play_path = path[mp4colon_index:] - formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), - 'app': app, - 'play_path': play_path, - 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', - 'page_url': 'http://www.prosieben.de', - 'tbr': tbr, - 'ext': 'flv', - 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), - }) - else: - formats.append({ - 'url': source_url, - 'tbr': tbr, - 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), - }) - self._sort_formats(formats) - + info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._html_search_regex( self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) - return { + info.update({ 'id': clip_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - } + }) + return info def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index fca30e1aa..9c2ccbe2d 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -1,88 +1,51 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from .common import InfoExtractor +from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import ( - ExtractorError, unified_strdate, - int_or_none, + parse_duration, + compat_str, ) -class Puls4IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)' +class Puls4IE(ProSiebenSat1BaseIE): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)' _TESTS = [{ - 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816', - 'md5': '49f6a6629747eeec43cef6a46b5df81d', + 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', + 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', 'info_dict': { - 'id': '2716816', - 'ext': 'mp4', - 'title': 'Pro und Contra vom 23.02.2015', - 'description': 'md5:293e44634d9477a67122489994675db6', - 'duration': 2989, - 'upload_date': '20150224', + 'id': '118118', + 'ext': 'flv', + 'title': 'Tobias Homberger von myclubs im #2min2miotalk', + 'description': 'md5:f9def7c5e8745d6026d8885487d91955', + 'upload_date': '20160830', 'uploader': 'PULS_4', }, - 'skip': 'Only works from Germany', - }, { - 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106', - 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec', - 'info_dict': { - 'id': '1298106', - 'ext': 'mp4', - 'title': 'Lucky Fritz', - }, - 'skip': 'Only works from Germany', }] + _TOKEN = 'puls4' + _SALT = '01!kaNgaiNgah1Ie4AeSha' + _CLIENT_NAME = '' def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - error_message = self._html_search_regex( - r'<div[^>]+class="message-error"[^>]*>(.+?)</div>', - webpage, 'error message', default=None) - if error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) - - real_url = self._html_search_regex( - r'\"fsk-button\".+?href=\"([^"]+)', - webpage, 'fsk_button', default=None) - if real_url: - webpage = self._download_webpage(real_url, video_id) - - player = self._search_regex( - r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}', - webpage, 'player') - - player_json = self._parse_json( - '[%s]' % player, video_id, - transform_source=lambda s: s.replace('undefined,', '')) - - formats = None - result = None - - for v in player_json: - if isinstance(v, list) and not formats: - formats = [{ - 'url': f['url'], - 'format': 'hd' if f.get('hd') else 'sd', - 'width': int_or_none(f.get('size_x')), - 'height': int_or_none(f.get('size_y')), - 'tbr': int_or_none(f.get('bitrate')), - } for f in v] - self._sort_formats(formats) - elif isinstance(v, dict) and not result: - result = { - 'id': video_id, - 'title': v['videopartname'].strip(), - 'description': v.get('videotitle'), - 'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')), - 'upload_date': unified_strdate(v.get('clipreleasetime')), - 'uploader': v.get('channel'), - } - - result['formats'] = formats - - return result + path = self._match_id(url) + content_path = self._download_json( + 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] + media = self._download_json( + 'http://www.puls4.com' + content_path, + content_path)['mediaCurrent'] + player_content = media['playerContent'] + info = self._extract_video_info(url, player_content['id']) + info.update({ + 'id': compat_str(media['objectId']), + 'title': player_content['title'], + 'description': media.get('description'), + 'thumbnail': media.get('previewLink'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(player_content.get('duration')), + 'episode': player_content.get('episodePartName'), + 'show': media.get('channel'), + 'season_id': player_content.get('seasonId'), + 'uploader': player_content.get('sourceCompany'), + }) + return info diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index 79b00e376..0c351e045 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -53,7 +53,7 @@ class TBSIE(TurnerBaseIE): 'media_src': 'http://ht.cdn.turner.com/%s/big' % site, }, 'secure': { - 'media_src': 'http://apple-secure.cdn.turner.com/%s/big' % site, + 'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, }, }) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py new file mode 100644 index 000000000..e60d8a181 --- /dev/null +++ b/youtube_dl/extractor/trutv.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .turner import TurnerBaseIE + + +class TruTVIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))' + _TEST = { + 'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html', + 'md5': '2cdc844f317579fed1a7251b087ff417', + 'info_dict': { + 'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets', + 'ext': 'mp4', + 'title': 'You Won\'t Believe These Sports Bets', + 'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.', + 'upload_date': '20130305', + } + } + + def _real_extract(self, url): + path, video_id = re.match(self._VALID_URL, url).groups() + if path: + data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path + else: + data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id + return self._extract_cvp_info( + data_src, path, { + 'secure': { + 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', + 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do', + }, + }) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index b59dafda6..4228c1ccc 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -12,7 +12,7 @@ from ..utils import ( parse_duration, xpath_attr, update_url_query, - compat_urlparse, + ExtractorError, ) @@ -24,6 +24,7 @@ class TurnerBaseIE(InfoExtractor): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'] title = xpath_text(video_data, 'headline', fatal=True) + content_id = xpath_text(video_data, 'contentId') or video_id # rtmp_src = xpath_text(video_data, 'akamai/src') # if rtmp_src: # splited_rtmp_src = rtmp_src.split(',') @@ -54,7 +55,7 @@ class TurnerBaseIE(InfoExtractor): # auth = self._download_webpage( # protected_path_data['tokenizer_src'], query={ # 'path': protected_path, - # 'videoId': video_id, + # 'videoId': content_id, # 'aifp': aifp, # }) # token = xpath_text(auth, 'token') @@ -72,8 +73,11 @@ class TurnerBaseIE(InfoExtractor): auth = self._download_xml( secure_path_data['tokenizer_src'], video_id, query={ 'path': secure_path, - 'videoId': video_id, + 'videoId': content_id, }) + error_msg = xpath_text(auth, 'error/msg') + if error_msg: + raise ExtractorError(error_msg, expected=True) token = xpath_text(auth, 'token') if not token: continue @@ -93,19 +97,9 @@ class TurnerBaseIE(InfoExtractor): formats.extend(self._extract_smil_formats( video_url, video_id, fatal=False)) elif ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id or 'hls', - fatal=False) - if m3u8_formats: - # Sometimes final URLs inside m3u8 are unsigned, let's fix this - # ourselves - qs = compat_urlparse.urlparse(video_url).query - if qs: - query = compat_urlparse.parse_qs(qs) - for m3u8_format in m3u8_formats: - m3u8_format['url'] = update_url_query(m3u8_format['url'], query) - m3u8_format['extra_param_to_segment_url'] = qs - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', + m3u8_id=format_id or 'hls', fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py new file mode 100644 index 000000000..1cd3e6a58 --- /dev/null +++ b/youtube_dl/extractor/tvnoe.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + clean_html, + get_element_by_class, + js_to_json, +) + + +class TVNoeIE(JWPlatformBaseIE): + _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.tvnoe.cz/video/10362', + 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', + 'info_dict': { + 'id': '10362', + 'ext': 'mp4', + 'series': 'Noční univerzita', + 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', + 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex( + r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL') + + ifs_page = self._download_webpage(iframe_url, video_id) + jwplayer_data = self._parse_json( + self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=iframe_url) + + info_dict.update({ + 'id': video_id, + 'title': clean_html(get_element_by_class( + 'field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class( + 'field-name-body', webpage)), + 'series': clean_html(get_element_by_class('title', webpage)) + }) + + return info_dict diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 4186e82db..c2a6e4e39 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -349,6 +349,25 @@ class ViafreeIE(InfoExtractor): }, 'add_ie': [TVPlayIE.ie_key()], }, { + # with relatedClips + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1', + 'info_dict': { + 'id': '758770', + 'ext': 'mp4', + 'title': 'Sommaren med YouTube-stjärnorna S01E01', + 'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f', + 'series': 'Sommaren med YouTube-stjärnorna', + 'season': 'Säsong 1', + 'season_number': 1, + 'duration': 1326, + 'timestamp': 1470905572, + 'upload_date': '20160811', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TVPlayIE.ie_key()], + }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'only_matching': True, }, { @@ -365,8 +384,17 @@ class ViafreeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_id = self._search_regex( - r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P<id>\d{6,})', - webpage, 'video id') + video_id = None + + thumbnail = self._og_search_thumbnail(webpage, default=None) + if thumbnail: + video_id = self._search_regex( + r'https?://[^/]+/imagecache/(?:[^/]+/)+seasons/\d+/(\d{6,})/', + thumbnail, 'video id', default=None) + + if not video_id: + video_id = self._search_regex( + r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})', + webpage, 'video id') return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key()) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 9f1b8b4b5..20fef1f04 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -86,38 +86,50 @@ class WatIE(InfoExtractor): def extract_url(path_template, url_type): req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) - head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type) - red_url = head.geturl() - if req_url == red_url: - raise ExtractorError( - '%s said: Sorry, this video is not available from your country.' % self.IE_NAME, - expected=True) - return red_url + head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False) + if head: + red_url = head.geturl() + if req_url != red_url: + return red_url + return None + + def remove_bitrate_limit(manifest_url): + return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url) formats = [] try: - http_url = extract_url('android5/%s.mp4', 'http') - m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - formats.extend(m3u8_formats) - formats.extend(self._extract_f4m_formats( - m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), - video_id, f4m_id='hds', fatal=False)) - for m3u8_format in m3u8_formats: - vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') - if not vbr or not abr: - continue - format_id = m3u8_format['format_id'].replace('hls', 'http') - fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url) - if self._is_valid_url(fmt_url, video_id, format_id): - f = m3u8_format.copy() - f.update({ - 'url': fmt_url, - 'format_id': format_id, - 'protocol': 'http', - }) - formats.append(f) + manifest_urls = self._download_json( + 'http://www.wat.tv/get/webhtml/' + video_id, video_id) + m3u8_url = manifest_urls.get('hls') + if m3u8_url: + m3u8_url = remove_bitrate_limit(m3u8_url) + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + formats.extend(self._extract_f4m_formats( + m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + http_url = extract_url('android5/%s.mp4', 'http') + if http_url: + for m3u8_format in m3u8_formats: + vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') + if not vbr or not abr: + continue + format_id = m3u8_format['format_id'].replace('hls', 'http') + fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url) + if self._is_valid_url(fmt_url, video_id, format_id): + f = m3u8_format.copy() + f.update({ + 'url': fmt_url, + 'format_id': format_id, + 'protocol': 'http', + }) + formats.append(f) + mpd_url = manifest_urls.get('mpd') + if mpd_url: + formats.extend(self._extract_mpd_formats(remove_bitrate_limit( + mpd_url), video_id, mpd_id='dash', fatal=False)) self._sort_formats(formats) except ExtractorError: abr = 64 |