diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/adobetv.py | 70 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/dbtv.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/dvtv.py | 116 | ||||
-rw-r--r-- | youtube_dl/extractor/gameone.py | 60 | ||||
-rw-r--r-- | youtube_dl/extractor/internetvideoarchive.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/nerdcubed.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/rtp.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/smotri.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/sohu.py | 96 | ||||
-rw-r--r-- | youtube_dl/extractor/sportdeutschland.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/telecinco.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/tmz.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/yahoo.py | 53 |
16 files changed, 305 insertions, 123 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 073ac1fab..fd0ebffe3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .abc import ABCIE from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE +from .adobetv import AdobeTVIE from .adultswim import AdultSwimIE from .aftonbladet import AftonbladetIE from .aljazeera import AlJazeeraIE diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py new file mode 100644 index 000000000..28e07f8b0 --- /dev/null +++ b/youtube_dl/extractor/adobetv.py @@ -0,0 +1,70 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, + str_to_int, +) + + +class AdobeTVIE(InfoExtractor): + _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)' + + _TEST = { + 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', + 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', + 'info_dict': { + 'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop', + 'ext': 'mp4', + 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', + 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', + 'thumbnail': 're:https?://.*\.jpg$', + 'upload_date': '20110914', + 'duration': 60, + 'view_count': int, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + player = self._parse_json( + self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'), + video_id) + + title = player.get('title') or self._search_regex( + r'data-title="([^"]+)"', webpage, 'title') + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + upload_date = unified_strdate( + self._html_search_meta('datepublished', webpage, 'upload date')) + + duration = parse_duration( + self._html_search_meta('duration', webpage, 'duration') + or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) + + view_count = str_to_int(self._search_regex( + r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', + webpage, 'view count')) + + formats = [{ + 'url': source['src'], + 'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None, + 'tbr': source.get('bitrate'), + } for source in player['sources']] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d302fe45f..4b950e485 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -40,7 +40,7 @@ class InfoExtractor(object): information about the video (or videos) the URL refers to. This information includes the real video URL, the video title, author and others. The information is stored in a dictionary which is then - passed to the FileDownloader. The FileDownloader processes this + passed to the YoutubeDL. The YoutubeDL processes this information possibly downloading the video to the file system, among other possible outcomes. diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 2c7756eb6..354046a9e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -29,8 +29,8 @@ from .common import InfoExtractor class CrunchyrollIE(SubtitlesInfoExtractor): - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' - _TEST = { + _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' + _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { 'id': '645513', @@ -46,7 +46,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor): # rtmp 'skip_download': True, }, - } + }, { + 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', + 'only_matching': True, + }] _FORMAT_IDS = { '360': ('60', '106'), diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index 1d3e2ff08..212217625 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( float_or_none, int_or_none, @@ -61,7 +62,7 @@ class DBTVIE(InfoExtractor): self._sort_formats(formats) return { - 'id': video['id'], + 'id': compat_str(video['id']), 'display_id': display_id, 'title': video['title'], 'description': clean_html(video['desc']), diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index af552831c..c1a4bc757 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -1,63 +1,125 @@ # coding: utf-8 - from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( js_to_json, - unescapeHTML + unescapeHTML, + ExtractorError, ) class DVTVIE(InfoExtractor): IE_NAME = 'dvtv' - IE_DESC = 'http://video.aktualne.cz/dvtv/' + IE_DESC = 'http://video.aktualne.cz/' - _VALID_URL = r'http://video\.aktualne\.cz/dvtv/(?P<id>[a-z0-9-]+/r~[0-9a-f]{32})/?' + _VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' _TESTS = [{ 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', - 'md5': '75800f964fa0f82939a2914563301f72', + 'md5': '67cb83e4a955d36e1b5d31993134a0c2', 'info_dict': { - 'id': 'e5efe9ca855511e4833a0025900fea04', - 'ext': 'webm', - 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně' + 'id': 'dc0768de855511e49e4b0025900fea04', + 'ext': 'mp4', + 'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně', } }, { 'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/', 'md5': '6388f1941b48537dbd28791f712af8bf', 'info_dict': { - 'id': '82ed4322849211e4a10c0025900fea04', + 'id': '72c02230849211e49f60002590604f2e', 'ext': 'mp4', - 'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala' + 'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala', } + }, { + 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', + 'info_dict': { + 'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'id': '973eb3bc854e11e498be002590604f2e', + }, + 'playlist': [{ + 'md5': 'da7ca6be4935532241fa9520b3ad91e4', + 'info_dict': { + 'id': 'b0b40906854d11e4bdad0025900fea04', + 'ext': 'mp4', + 'title': 'Drtinová Veselovský TV 16. 12. 2014: Témata dne' + } + }, { + 'md5': '5f7652a08b05009c1292317b449ffea2', + 'info_dict': { + 'id': '420ad9ec854a11e4bdad0025900fea04', + 'ext': 'mp4', + 'title': 'Školní masakr možná změní boj s Talibanem, říká novinářka' + } + }, { + 'md5': '498eb9dfa97169f409126c617e2a3d64', + 'info_dict': { + 'id': '95d35580846a11e4b6d20025900fea04', + 'ext': 'mp4', + 'title': 'Boj o kliniku: Veřejný zájem, nebo právo na majetek?' + } + }, { + 'md5': 'b8dc6b744844032dab6ba3781a7274b9', + 'info_dict': { + 'id': '6fe14d66853511e4833a0025900fea04', + 'ext': 'mp4', + 'title': 'Pánek: Odmítání syrských uprchlíků je ostudou české vlády' + } + }], + }, { + 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', + 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + def _parse_video_metadata(self, js, video_id): + metadata = self._parse_json(js, video_id, transform_source=js_to_json) - code = self._search_regex( - r'(?s)embedData[0-9a-f]{32}\[\'asset\'\] = (\{.+?\});', - webpage, 'video JSON') - payload = self._parse_json(code, video_id, transform_source=js_to_json) formats = [] - for source in payload['sources']: - ext = source['type'][6:] + for video in metadata['sources']: + ext = video['type'][6:] formats.append({ - 'url': source['file'], + 'url': video['file'], 'ext': ext, - 'format': '%s %s' % (ext, source['label']), - 'format_id': '%s-%s' % (ext, source['label']), - 'height': int(source['label'].rstrip('p')), + 'format_id': '%s-%s' % (ext, video['label']), + 'height': int(video['label'].rstrip('p')), 'fps': 25, }) + self._sort_formats(formats) return { - 'id': video_id[-32:], - 'display_id': video_id[:-35], - 'title': unescapeHTML(payload['title']), - 'thumbnail': 'http:%s' % payload['image'], + 'id': metadata['mediaid'], + 'title': unescapeHTML(metadata['title']), + 'thumbnail': self._proto_relative_url(metadata['image'], 'http:'), 'formats': formats } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + # single video + item = self._search_regex( + r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});", + webpage, 'video', default=None, fatal=False) + + if item: + return self._parse_video_metadata(item, video_id) + + # playlist + items = re.findall( + r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);", + webpage) + + if items: + return { + '_type': 'playlist', + 'id': video_id, + 'title': self._og_search_title(webpage), + 'entries': [self._parse_video_metadata(i, video_id) for i in items] + } + + raise ExtractorError('Could not find neither video nor playlist') diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 3022f539d..75f180928 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -6,7 +6,9 @@ import re from .common import InfoExtractor from ..utils import ( xpath_with_ns, - parse_iso8601 + parse_iso8601, + float_or_none, + int_or_none, ) NAMESPACE_MAP = { @@ -21,21 +23,38 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.gameone.de/tv/288', - 'md5': '136656b7fb4c9cb4a8e2d500651c499b', - 'info_dict': { - 'id': '288', - 'ext': 'mp4', - 'title': 'Game One - Folge 288', - 'duration': 1238, - 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', - 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', - 'age_limit': 16, - 'upload_date': '20140513', - 'timestamp': 1399980122, + _TESTS = [ + { + 'url': 'http://www.gameone.de/tv/288', + 'md5': '136656b7fb4c9cb4a8e2d500651c499b', + 'info_dict': { + 'id': '288', + 'ext': 'mp4', + 'title': 'Game One - Folge 288', + 'duration': 1238, + 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', + 'age_limit': 16, + 'upload_date': '20140513', + 'timestamp': 1399980122, + } + }, + { + 'url': 'http://gameone.de/tv/220', + 'md5': '5227ca74c4ae6b5f74c0510a7c48839e', + 'info_dict': { + 'id': '220', + 'ext': 'mp4', + 'upload_date': '20120918', + 'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker', + 'timestamp': 1347971451, + 'title': 'Game One - Folge 220', + 'duration': 896.62, + 'age_limit': 16, + } } - } + + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -66,13 +85,13 @@ class GameOneIE(InfoExtractor): video_id, 'Downloading media:content') rendition_items = content.findall('.//rendition') - duration = int(rendition_items[0].get('duration')) + duration = float_or_none(rendition_items[0].get('duration')) formats = [ { 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), - 'width': int(r.get('width')), - 'height': int(r.get('height')), - 'tbr': int(r.get('bitrate')), + 'width': int_or_none(r.get('width')), + 'height': int_or_none(r.get('height')), + 'tbr': int_or_none(r.get('bitrate')), } for r in rendition_items ] @@ -105,7 +124,8 @@ class GameOnePlaylistIE(InfoExtractor): webpage = self._download_webpage('http://www.gameone.de/tv', 'TV') max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage))) entries = [ - self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne') + self.url_result('http://www.gameone.de/tv/%d' % + video_id, 'GameOne') for video_id in range(max_id, 0, -1)] return { diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index c813d4b82..483cc6f9e 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -22,7 +22,7 @@ class InternetVideoArchiveIE(InfoExtractor): 'ext': 'mp4', 'title': 'SKYFALL', 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', - 'duration': 149, + 'duration': 152, }, } diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dl/extractor/nerdcubed.py index 9f2e678e0..efc903afa 100644 --- a/youtube_dl/extractor/nerdcubed.py +++ b/youtube_dl/extractor/nerdcubed.py @@ -18,7 +18,7 @@ class NerdCubedFeedIE(InfoExtractor): def _real_extract(self, url): feed = self._download_json(url, url, "Downloading NerdCubed JSON feed") - + entries = [{ '_type': 'url', 'title': feed_entry['title'], @@ -33,4 +33,3 @@ class NerdCubedFeedIE(InfoExtractor): 'id': 'nerdcubed-feed', 'entries': entries, } - diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py index 54f67a52f..7736cabba 100644 --- a/youtube_dl/extractor/rtp.py +++ b/youtube_dl/extractor/rtp.py @@ -12,7 +12,7 @@ class RTPIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'info_dict': { - 'id': '174042', + 'id': 'e174042', 'ext': 'mp3', 'title': 'Paixões Cruzadas', 'description': 'As paixões musicais de António Cartaxo e António Macedo', diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index d031fe401..baef3daa0 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -69,6 +69,7 @@ class SmotriIE(InfoExtractor): 'params': { 'videopassword': 'qwerty', }, + 'skip': 'Video is not approved by moderator', }, # age limit + video-password { @@ -86,7 +87,8 @@ class SmotriIE(InfoExtractor): }, 'params': { 'videopassword': '333' - } + }, + 'skip': 'Video is not approved by moderator', }, # swf player { diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 07f514a46..c04791997 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -1,11 +1,10 @@ # encoding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor -from ..utils import ExtractorError +from .common import compat_str class SohuIE(InfoExtractor): @@ -29,60 +28,73 @@ class SohuIE(InfoExtractor): base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' else: base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' - data_url = base_data_url + str(vid_id) - data_json = self._download_webpage( - data_url, video_id, - note='Downloading JSON data for ' + str(vid_id)) - return json.loads(data_json) + + return self._download_json( + base_data_url + vid_id, video_id, + 'Downloading JSON data for %s' % vid_id) mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') mytv = mobj.group('mytv') is not None webpage = self._download_webpage(url, video_id) - raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>', - webpage, 'video title') + raw_title = self._html_search_regex( + r'(?s)<title>(.+?)</title>', + webpage, 'video title') title = raw_title.partition('-')[0].strip() - vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage, - 'video path') - data = _fetch_data(vid, mytv) - - QUALITIES = ('ori', 'super', 'high', 'nor') - vid_ids = [data['data'][q + 'Vid'] - for q in QUALITIES - if data['data'][q + 'Vid'] != 0] - if not vid_ids: - raise ExtractorError('No formats available for this video') + vid = self._html_search_regex( + r'var vid ?= ?["\'](\d+)["\']', + webpage, 'video path') + vid_data = _fetch_data(vid, mytv) - # For now, we just pick the highest available quality - vid_id = vid_ids[-1] + formats_json = {} + for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): + vid_id = vid_data['data'].get('%sVid' % format_id) + if not vid_id: + continue + vid_id = compat_str(vid_id) + formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv) - format_data = data if vid == vid_id else _fetch_data(vid_id, mytv) - part_count = format_data['data']['totalBlocks'] - allot = format_data['allot'] - prot = format_data['prot'] - clipsURL = format_data['data']['clipsURL'] - su = format_data['data']['su'] + part_count = vid_data['data']['totalBlocks'] playlist = [] for i in range(part_count): - part_url = ('http://%s/?prot=%s&file=%s&new=%s' % - (allot, prot, clipsURL[i], su[i])) - part_str = self._download_webpage( - part_url, video_id, - note='Downloading part %d of %d' % (i + 1, part_count)) - - part_info = part_str.split('|') - video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) - - video_info = { - 'id': '%s_part%02d' % (video_id, i + 1), + formats = [] + for format_id, format_data in formats_json.items(): + allot = format_data['allot'] + prot = format_data['prot'] + + data = format_data['data'] + clips_url = data['clipsURL'] + su = data['su'] + + part_str = self._download_webpage( + 'http://%s/?prot=%s&file=%s&new=%s' % + (allot, prot, clips_url[i], su[i]), + video_id, + 'Downloading %s video URL part %d of %d' + % (format_id, i + 1, part_count)) + + part_info = part_str.split('|') + video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) + + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'filesize': data['clipsBytes'][i], + 'width': data['width'], + 'height': data['height'], + 'fps': data['fps'], + }) + self._sort_formats(formats) + + playlist.append({ + 'id': '%s_part%d' % (video_id, i + 1), 'title': title, - 'url': video_url, - 'ext': 'mp4', - } - playlist.append(video_info) + 'duration': vid_data['data']['clipsDuration'][i], + 'formats': formats, + }) if len(playlist) == 1: info = playlist[0] diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index 2f57f5b7c..1a57aebf1 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -60,9 +60,10 @@ class SportDeutschlandIE(InfoExtractor): categories = list(data.get('section', {}).get('tags', {}).values()) asset = data['asset'] + assets_info = self._download_json(asset['url'], video_id) formats = [] - smil_url = asset['video'] + smil_url = assets_info['video'] if '.smil' in smil_url: m3u8_url = smil_url.replace('.smil', '.m3u8') formats.extend( diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2a2fff5e1..be3f72df7 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -6,7 +6,7 @@ from .mitele import MiTeleIE class TelecincoIE(MiTeleIE): IE_NAME = 'telecinco.es' - _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html' + _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html' _TEST = { 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index 827aa08a4..c5c6fdc51 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -15,7 +15,7 @@ class TMZIE(InfoExtractor): 'ext': 'mp4', 'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!', 'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?', - 'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640', + 'thumbnail': r're:http://cdnbakmi\.kaltura\.com/.*thumbnail.*', } } diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 031226f27..f8e7041a0 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -12,6 +12,7 @@ from ..compat import ( ) from ..utils import ( clean_html, + unescapeHTML, ExtractorError, int_or_none, ) @@ -55,14 +56,14 @@ class YahooIE(InfoExtractor): } }, { - 'url': 'https://tw.screen.yahoo.com/taipei-opinion-poll/選情站報-街頭民調-台北市篇-102823042.html', - 'md5': '92a7fdd8a08783c68a174d7aa067dde8', + 'url': 'https://tw.screen.yahoo.com/election-2014-askmayor/敢問市長-黃秀霜批賴清德-非常高傲-033009720.html', + 'md5': '3a09cf59349cfaddae1797acc3c087fc', 'info_dict': { - 'id': '7a23b569-7bea-36cb-85b9-bd5301a0a1fb', + 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f', 'ext': 'mp4', - 'title': '選情站報 街頭民調 台北市篇', - 'description': '選情站報 街頭民調 台北市篇', - 'duration': 429, + 'title': '敢問市長/黃秀霜批賴清德「非常高傲」', + 'description': '直言台南沒捷運 交通居五都之末', + 'duration': 396, } }, { @@ -87,14 +88,14 @@ class YahooIE(InfoExtractor): 'duration': 121, } }, { - 'url': 'https://ca.finance.yahoo.com/news/20-most-valuable-brands-world-112600775.html', - 'md5': '3e401e4eed6325aa29d9b96125fd5b4f', + 'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html', + 'md5': '226a895aae7e21b0129e2a2006fe9690', 'info_dict': { - 'id': 'c1b4c09c-8ed8-3b65-8b05-169c55358a83', + 'id': 'e624c4bc-3389-34de-9dfc-025f74943409', 'ext': 'mp4', - 'title': "Apple Is The World's Most Valuable Brand", - 'description': 'md5:73eabc1a11c6f59752593b2ceefa1262', - 'duration': 21, + 'title': '\'The Interview\' TV Spot: War', + 'description': 'The Interview', + 'duration': 30, } }, { 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', @@ -117,6 +118,16 @@ class YahooIE(InfoExtractor): 'duration': 201, } }, { + 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', + 'md5': '989396ae73d20c6f057746fb226aa215', + 'info_dict': { + 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', + 'ext': 'mp4', + 'title': '\'True Story\' Trailer', + 'description': 'True Story', + 'duration': 150, + }, + }, { 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', 'only_matching': True, } @@ -125,6 +136,7 @@ class YahooIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('display_id') + page_id = mobj.group('id') url = mobj.group('url') host = mobj.group('host') webpage = self._download_webpage(url, display_id) @@ -149,6 +161,7 @@ class YahooIE(InfoExtractor): r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', r'"first_videoid"\s*:\s*"([^"]+)"', + r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), ] video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') else: @@ -163,17 +176,15 @@ class YahooIE(InfoExtractor): region = self._search_regex( r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', webpage, 'region', fatal=False, default='US') - query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' - ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="%s"' - ' AND protocol="http"' % (video_id, region)) data = compat_urllib_parse.urlencode({ - 'q': query, - 'env': 'prod', - 'format': 'json', + 'protocol': 'http', + 'region': region, }) + query_url = ( + 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + '{id}?{data}'.format(id=video_id, data=data)) query_result = self._download_json( - 'http://video.query.yahoo.com/v1/public/yql?' + data, - display_id, 'Downloading video info') + query_url, display_id, 'Downloading video info') info = query_result['query']['results']['mediaObj'][0] meta = info.get('meta') @@ -211,7 +222,7 @@ class YahooIE(InfoExtractor): return { 'id': video_id, 'display_id': display_id, - 'title': meta['title'], + 'title': unescapeHTML(meta['title']), 'formats': formats, 'description': clean_html(meta['description']), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), |