diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/ard.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/defense.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/francetv.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/globo.py | 28 | ||||
-rw-r--r-- | youtube_dl/extractor/kankan.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/keezmovies.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/la7.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/macgamestore.py | 17 | ||||
-rw-r--r-- | youtube_dl/extractor/mpora.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/nerdcubed.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/ringtv.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/rottentomatoes.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/rutv.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/servingsys.py | 19 | ||||
-rw-r--r-- | youtube_dl/extractor/sina.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/soundcloud.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/teamcoco.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/teletask.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/toutv.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/wdr.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 13 |
23 files changed, 135 insertions, 126 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 967bd865c..783b53e23 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor): _TESTS = [{ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', - 'file': '22429276.mp4', - 'md5': '469751912f1de0816a9fc9df8336476c', - 'info_dict': { - 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?', - 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014', - }, - 'skip': 'Blocked outside of Germany', + 'only_matching': True, }, { 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', 'info_dict': { diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index 5e50c63d9..2b90bf4fc 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -1,40 +1,38 @@ from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor class DefenseGouvFrIE(InfoExtractor): IE_NAME = 'defense.gouv.fr' - _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' - r'ligthboxvideo/base-de-medias/webtv/(.*)') + _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' _TEST = { 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', - 'file': '11213.mp4', 'md5': '75bba6124da7e63d2d60b5244ec9430c', - "info_dict": { - "title": "attaque-chimique-syrienne-du-21-aout-2013-1" + 'info_dict': { + 'id': '11213', + 'ext': 'mp4', + 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1' } } def _real_extract(self, url): - title = re.match(self._VALID_URL, url).group(1) + title = self._match_id(url) webpage = self._download_webpage(url, title) + video_id = self._search_regex( r"flashvars.pvg_id=\"(\d+)\";", webpage, 'ID') json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' + video_id) - info = self._download_webpage(json_url, title, - 'Downloading JSON config') - video_url = json.loads(info)['renditions'][0]['url'] - - return {'id': video_id, - 'ext': 'mp4', - 'url': video_url, - 'title': title, - } + info = self._download_json(json_url, title, 'Downloading JSON config') + video_url = info['renditions'][0]['url'] + + return { + 'id': video_id, + 'ext': 'mp4', + 'url': video_url, + 'title': title, + } diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index bbc760a49..170d68075 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor): class GenerationQuoiIE(InfoExtractor): IE_NAME = 'france2.fr:generation-quoi' - _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' + _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)' _TEST = { 'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', - 'file': 'k7FJX8VBcvvLmX4wA5Q.mp4', 'info_dict': { + 'id': 'k7FJX8VBcvvLmX4wA5Q', + 'ext': 'mp4', 'title': 'Génération Quoi - Garde à Vous', 'uploader': 'Génération Quoi', }, @@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor): # It uses Dailymotion 'skip_download': True, }, - 'skip': 'Only available from France', } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) - info_json = self._download_webpage(info_url, name) + display_id = self._match_id(url) + info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) + info_json = self._download_webpage(info_url, display_id) info = json.loads(info_json) return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], ie='Dailymotion') diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 6949a57c7..29638a194 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -70,6 +70,19 @@ class GloboIE(InfoExtractor): 'like_count': int, } }, + { + 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', + 'md5': 'c1defca721ce25b2354e927d3e4b3dec', + 'info_dict': { + 'id': '3928201', + 'ext': 'mp4', + 'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas', + 'duration': 1472.906, + 'uploader': 'Canal Brasil', + 'uploader_id': 705, + 'like_count': int, + } + }, ] class MD5(): @@ -381,11 +394,16 @@ class GloboIE(InfoExtractor): signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding) signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5 - formats.append({ - 'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'), - 'format_id': resource_id, - 'height': resource['height'] - }) + resource_url = resource['url'] + signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') + if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4')) + else: + formats.append({ + 'url': signed_url, + 'format_id': resource_id, + 'height': resource.get('height'), + }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py index dbfe4cc03..364dc878e 100644 --- a/youtube_dl/extractor/kankan.py +++ b/youtube_dl/extractor/kankan.py @@ -13,17 +13,17 @@ class KankanIE(InfoExtractor): _TEST = { 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', - 'file': '48863.flv', 'md5': '29aca1e47ae68fc28804aca89f29507e', 'info_dict': { + 'id': '48863', + 'ext': 'flv', 'title': 'Ready To Go', }, 'skip': 'Only available from China', } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 97dcb518a..82eddec51 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -7,10 +7,6 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlparse, compat_urllib_request, - compat_urllib_parse, -) -from ..aes import ( - aes_decrypt_text ) @@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)' _TEST = { 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', - 'file': '1214711.mp4', 'md5': '6e297b7e789329923fcf83abb67c9289', 'info_dict': { + 'id': '1214711', + 'ext': 'mp4', 'title': 'Petite Asian Lady Mai Playing In Bathtub', 'age_limit': 18, } @@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor): embedded_url = mobj.group(1) return self.url_result(embedded_url) - video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title') - video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url')) - if 'encrypted=true' in webpage: - password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password') - video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') + video_title = self._html_search_regex( + r'<h1 [^>]*>([^<]+)', webpage, 'title') + video_url = self._html_search_regex( + r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL') path = compat_urllib_parse_urlparse(video_url).path extension = os.path.splitext(path)[1][1:] format = path.split('/')[4].split('_')[:2] diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index db2028e9f..b08f6e3c9 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( parse_duration, @@ -20,9 +18,10 @@ class LA7IE(InfoExtractor): _TEST = { 'url': 'http://www.la7.tv/richplayer/?assetid=50355319', - 'file': '50355319.mp4', 'md5': 'ec7d1f0224d20ba293ab56cf2259651f', 'info_dict': { + 'id': '50355319', + 'ext': 'mp4', 'title': 'IL DIVO', 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', 'duration': 6254, @@ -31,9 +30,7 @@ class LA7IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id doc = self._download_xml(xml_url, video_id) diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py index b818cf50c..3cd4a3a19 100644 --- a/youtube_dl/extractor/macgamestore.py +++ b/youtube_dl/extractor/macgamestore.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ExtractorError @@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor): _TEST = { 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', - 'file': '2450.m4v', 'md5': '8649b8ea684b6666b4c5be736ecddc61', 'info_dict': { + 'id': '2450', + 'ext': 'm4v', 'title': 'Crow', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id, 'Downloading trailer page') + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading trailer page') - if re.search(r'>Missing Media<', webpage) is not None: - raise ExtractorError('Trailer %s does not exist' % video_id, expected=True) + if '>Missing Media<' in webpage: + raise ExtractorError( + 'Trailer %s does not exist' % video_id, expected=True) video_title = self._html_search_regex( r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title') diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 88c9501cd..6db3c67a5 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -1,21 +1,19 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import int_or_none class MporaIE(InfoExtractor): - _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' + _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' IE_NAME = 'MPORA' _TEST = { 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', - 'file': 'AAdo8okx4wiz.mp4', 'md5': 'a7a228473eedd3be741397cf452932eb', 'info_dict': { + 'id': 'AAdo8okx4wiz', + 'ext': 'mp4', 'title': 'Katy Curd - Winter in the Forest', 'duration': 416, 'uploader': 'Peter Newman Media', @@ -23,14 +21,12 @@ class MporaIE(InfoExtractor): } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + data_json = self._search_regex( r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') - - data = json.loads(data_json) + data = self._parse_json(data_json, video_id) uploader = data['info_overlay'].get('username') duration = data['video']['duration'] // 1000 diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7a3b62ebe..bc7f49ebb 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -79,12 +79,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): try: _, _, ext = rendition.attrib['type'].partition('/') rtmp_video_url = rendition.find('./src').text - formats.append({'ext': ext, - 'url': self._transform_rtmp_url(rtmp_video_url), - 'format_id': rendition.get('bitrate'), - 'width': int(rendition.get('width')), - 'height': int(rendition.get('height')), - }) + if rtmp_video_url.endswith('siteunavail.png'): + continue + formats.append({ + 'ext': ext, + 'url': self._transform_rtmp_url(rtmp_video_url), + 'format_id': rendition.get('bitrate'), + 'width': int(rendition.get('width')), + 'height': int(rendition.get('height')), + }) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') self._sort_formats(formats) @@ -240,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor): _TESTS = [ { 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - 'file': '853555.mp4', 'md5': '850f3f143316b1e71fa56a4edfd6e0f8', 'info_dict': { + 'id': '853555', + 'ext': 'mp4', 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"', 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', }, }, - { - 'add_ie': ['Vevo'], - 'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - 'file': 'USCJY1331283.mp4', - 'md5': '73b4e7fcadd88929292fe52c3ced8caf', - 'info_dict': { - 'title': 'Everything Has Changed', - 'upload_date': '20130606', - 'uploader': 'Taylor Swift', - }, - 'skip': 'VEVO is only available in some countries', - }, ] def _get_thumbnail_url(self, uri, itemdoc): @@ -272,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor): webpage = self._download_webpage(url, video_id) # Some videos come from Vevo.com - m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', - webpage, re.DOTALL) + m_vevo = re.search( + r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage) if m_vevo: vevo_id = m_vevo.group(1) self.to_screen('Vevo video detected: %s' % vevo_id) diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dl/extractor/nerdcubed.py index efc903afa..dff78e486 100644 --- a/youtube_dl/extractor/nerdcubed.py +++ b/youtube_dl/extractor/nerdcubed.py @@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor): _TEST = { 'url': 'http://www.nerdcubed.co.uk/feed.json', 'info_dict': { + 'id': 'nerdcubed-feed', 'title': 'nerdcubed.co.uk feed', }, 'playlist_mincount': 1300, diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py index 59dc137cc..efa4afeb6 100644 --- a/youtube_dl/extractor/ringtv.py +++ b/youtube_dl/extractor/ringtv.py @@ -6,12 +6,13 @@ from .common import InfoExtractor class RingTVIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' + _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' _TEST = { "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30", - "file": "857645.mp4", "md5": "d25945f5df41cdca2d2587165ac28720", "info_dict": { + 'id': '857645', + 'ext': 'mp4', "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV', "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.', } diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index c1500b82f..e8bb20a08 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE): _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', - 'file': '613340.mp4', 'info_dict': { + 'id': '613340', + 'ext': 'mp4', 'title': 'TOY STORY 3', 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', }, diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a73e6f331..ef766237b 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor): 'vbr': int(quality), } elif transport == 'm3u8': - fmt = { - 'url': url, - 'ext': 'mp4', - } + formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4')) + continue else: fmt = { 'url': url diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py index 16dc3736b..c013d678f 100644 --- a/youtube_dl/extractor/servingsys.py +++ b/youtube_dl/extractor/servingsys.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, @@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor): _TEST = { 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', + 'info_dict': { + 'id': '5349193', + 'title': 'AdAPPter_Hyundai_demo', + }, 'playlist': [{ - 'file': '29955898.flv', 'md5': 'baed851342df6846eb8677a60a011a0f', 'info_dict': { + 'id': '29955898', + 'ext': 'flv', 'title': 'AdAPPter_Hyundai_demo (1)', 'duration': 74, 'tbr': 1378, @@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor): 'height': 400, }, }, { - 'file': '29907998.flv', 'md5': '979b4da2655c4bc2d81aeb915a8c5014', 'info_dict': { + 'id': '29907998', + 'ext': 'flv', 'title': 'AdAPPter_Hyundai_demo (2)', 'duration': 34, 'width': 854, @@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor): 'params': { 'playlistend': 2, }, - 'skip': 'Blocked in the US [sic]', + '_skip': 'Blocked in the US [sic]', } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - pl_id = mobj.group('id') - + pl_id = self._match_id(url) vast_doc = self._download_xml(url, pl_id) + title = vast_doc.find('.//AdTitle').text media = vast_doc.find('.//MediaFile').text info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index a63d126d4..0891a441f 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -11,7 +11,7 @@ from ..compat import ( class SinaIE(InfoExtractor): - _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/ + _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/ ( (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-)))) | @@ -23,9 +23,10 @@ class SinaIE(InfoExtractor): _TESTS = [ { 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', - 'file': '110028898.flv', 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', 'info_dict': { + 'id': '110028898', + 'ext': 'flv', 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', } }, @@ -39,10 +40,6 @@ class SinaIE(InfoExtractor): }, ] - @classmethod - def suitable(cls, url): - return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None - def _extract_video(self, video_id): data = compat_urllib_parse.urlencode({'vid': video_id}) url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, @@ -59,7 +56,7 @@ class SinaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') if mobj.group('token') is not None: # The video id is in the redirected url diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5d60c4939..c5284fa67 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE): _TESTS = [{ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', 'info_dict': { + 'id': '2284613', 'title': 'The Royal Concept EP', }, 'playlist_mincount': 6, @@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE): return { '_type': 'playlist', 'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], - 'id': info['id'], + 'id': '%s' % info['id'], 'title': info['title'], } diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 5fa67eb8d..18a823719 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor): _TESTS = [ { 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', - 'file': '80187.mp4', 'md5': '3f7746aa0dc86de18df7539903d399ea', 'info_dict': { + 'id': '80187', + 'ext': 'mp4', 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' } }, { 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', - 'file': '19705.mp4', 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 'info_dict': { + 'id': '19705', + 'ext': 'mp4', "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", "title": "Louis C.K. Interview Pt. 1 11/3/11" } @@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor): video_id = mobj.group("video_id") if not video_id: video_id = self._html_search_regex( - r'data-node-id="(\d+?)"', + r'<div\s+class="player".*?data-id="(\d+?)"', webpage, 'video id') data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py index e54145105..b9e2ef8ca 100644 --- a/youtube_dl/extractor/teletask.py +++ b/youtube_dl/extractor/teletask.py @@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor): _TEST = { 'url': 'http://www.tele-task.de/archive/video/html5/26168/', 'info_dict': { + 'id': '26168', 'title': 'Duplicate Detection', }, 'playlist': [{ @@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor): def _real_extract(self, url): lecture_id = self._match_id(url) - webpage = self._download_webpage(url, lecture_id) title = self._html_search_regex( diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 2837f9c8e..4797d1310 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor): _TEST = { 'url': 'http://www.tou.tv/30-vies/S04E41', - 'file': '30-vies_S04E41.mp4', 'info_dict': { + 'id': '30-vies_S04E41', + 'ext': 'mp4', 'title': '30 vies Saison 4 / Épisode 41', 'description': 'md5:da363002db82ccbe4dafeb9cab039b09', 'age_limit': 8, diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 06b0bed41..1bb473514 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor): _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', - 'file': '75524534.mp4', 'md5': 'c507a72f780cacc12b2248bb4006d253', 'info_dict': { + 'id': '75524534', + 'ext': 'mp4', 'title': "DICK HARDWICK 'Comedian'", 'uploader': 'Richard Hardwick', } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 313b9c15d..c90488500 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -71,6 +71,9 @@ class WDRIE(InfoExtractor): { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', 'playlist_mincount': 146, + 'info_dict': { + 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', + } } ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3b3678c6e..e4b26b84f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1160,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'info_dict': { + 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx', 'title': 'YDL_Empty_List', }, 'playlist_count': 0, @@ -1168,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'info_dict': { 'title': '29C3: Not my department', + 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', }, 'playlist_count': 95, }, { @@ -1175,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'PLBB231211A4F62143', 'info_dict': { 'title': '[OLD]Team Fortress 2 (Class-based LP)', + 'id': 'PLBB231211A4F62143', }, 'playlist_mincount': 26, }, { @@ -1182,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', 'info_dict': { 'title': 'Uploads from Cauchemar', + 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', }, 'playlist_mincount': 799, }, { 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', 'info_dict': { 'title': 'YDL_safe_search', + 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl', }, 'playlist_count': 2, }, { @@ -1196,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'playlist_count': 4, 'info_dict': { 'title': 'JODA15', + 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', } }, { 'note': 'Embedded SWF player', @@ -1203,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'playlist_count': 4, 'info_dict': { 'title': 'JODA7', + 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ', } }, { 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', 'info_dict': { - 'title': 'Uploads from Interstellar Movie', + 'title': 'Uploads from Interstellar Movie', + 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincout': 21, }] @@ -1314,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor): 'note': 'paginated channel', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'playlist_mincount': 91, + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + } }] def extract_videos_from_page(self, page): |