diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/appletrailers.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/archiveorg.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/comedycentral.py | 21 | ||||
-rw-r--r-- | youtube_dl/extractor/daum.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/dreisat.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/faz.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/gamespot.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/gametrailers.py | 15 | ||||
-rw-r--r-- | youtube_dl/extractor/metacritic.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 76 | ||||
-rw-r--r-- | youtube_dl/extractor/naver.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/redtube.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/southparkstudios.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/trilulilu.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/viddler.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/xhamster.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 12 |
17 files changed, 78 insertions, 132 deletions
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 5b522552a..a527f10de 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -113,7 +113,7 @@ class AppleTrailersIE(InfoExtractor): }) formats = sorted(formats, key=lambda f: (f['height'], f['width'])) - info = { + playlist.append({ '_type': 'video', 'id': video_id, 'title': title, @@ -124,12 +124,7 @@ class AppleTrailersIE(InfoExtractor): 'upload_date': upload_date, 'uploader_id': uploader_id, 'user_agent': 'QuickTime compatible (youtube-dl)', - } - # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = formats[-1]['ext'] - - playlist.append(info) + }) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index a8394bfb0..8bb546410 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -49,7 +49,7 @@ class ArchiveOrgIE(InfoExtractor): for f in formats: f['ext'] = determine_ext(f['url']) - info = { + return { '_type': 'video', 'id': video_id, 'title': title, @@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor): 'description': description, 'uploader': uploader, 'upload_date': upload_date, + 'thumbnail': data.get('misc', {}).get('image'), } - thumbnail = data.get('misc', {}).get('image') - if thumbnail: - info['thumbnail'] = thumbnail - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index caea446ea..a54ce3ee7 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor from ..utils import ( compat_str, compat_urllib_parse, @@ -11,8 +11,8 @@ from ..utils import ( ) -class ComedyCentralIE(MTVIE): - _VALID_URL = r'https?://(?:www\.)?comedycentral\.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' +class ComedyCentralIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www.)?comedycentral.com/(video-clips|episodes|cc-studios)/(?P<title>.*)' _FEED_URL = u'http://comedycentral.com/feeds/mrss/' _TEST = { @@ -25,12 +25,6 @@ class ComedyCentralIE(MTVIE): u'description': u'After a certain point, breastfeeding becomes c**kblocking.', }, } - # Overwrite MTVIE properties we don't want - _TESTS = [] - - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -197,7 +191,7 @@ class ComedyCentralShowsIE(InfoExtractor): }) effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) - info = { + results.append({ 'id': shortMediaId, 'formats': formats, 'uploader': showId, @@ -205,11 +199,6 @@ class ComedyCentralShowsIE(InfoExtractor): 'title': effTitle, 'thumbnail': None, 'description': compat_str(officialTitle), - } - - # TODO: Remove when #980 has been merged - info.update(info['formats'][-1]) - - results.append(info) + }) return results diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 3d1dcb793..d418ce4a8 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -28,7 +28,8 @@ class DaumIE(InfoExtractor): video_id = mobj.group(1) canonical_url = 'http://tvpot.daum.net/v/%s' % video_id webpage = self._download_webpage(canonical_url, video_id) - full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', + full_id = self._search_regex( + r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', webpage, u'full id') query = compat_urllib_parse.urlencode({'vid': full_id}) info = self._download_xml( @@ -56,7 +57,7 @@ class DaumIE(InfoExtractor): 'format_id': profile, }) - info = { + return { 'id': video_id, 'title': info.find('TITLE').text, 'formats': formats, @@ -65,6 +66,3 @@ class DaumIE(InfoExtractor): 'duration': int(info.find('DURATION').text), 'upload_date': info.find('REGDTTM').text[:8], } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 008c99699..cb7226f82 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -65,7 +65,7 @@ class DreiSatIE(InfoExtractor): return (qidx, prefer_http, format['video_bitrate']) formats.sort(key=_sortkey) - info = { + return { '_type': 'video', 'id': video_id, 'title': video_title, @@ -76,8 +76,3 @@ class DreiSatIE(InfoExtractor): 'uploader': video_uploader, 'upload_date': upload_date, } - - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index 615674baf..c6ab6952e 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -44,13 +44,10 @@ class FazIE(InfoExtractor): }) descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') - info = { + return { 'id': video_id, 'title': self._og_search_title(webpage), 'formats': formats, 'description': descr, 'thumbnail': config.find('STILL/STILL_BIG').text, } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 9645b00c3..26b7d2ae5 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -47,13 +47,10 @@ class GameSpotIE(InfoExtractor): 'format_id': q, }) - info = { + return { 'id': data_video['guid'], 'title': compat_urllib_parse.unquote(data_video['title']), 'formats': formats, 'description': get_meta_content('description', webpage), 'thumbnail': self._og_search_thumbnail(webpage), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py index 88f656031..d82a5d4b2 100644 --- a/youtube_dl/extractor/gametrailers.py +++ b/youtube_dl/extractor/gametrailers.py @@ -1,12 +1,9 @@ import re -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor -class GametrailersIE(MTVIE): - """ - Gametrailers use the same videos system as MTVIE, it just changes the feed - url, where the uri is and the method to get the thumbnails. - """ + +class GametrailersIE(MTVServicesInfoExtractor): _VALID_URL = r'http://www\.gametrailers\.com/(?P<type>videos|reviews|full-episodes)/(?P<id>.*?)/(?P<title>.*)' _TEST = { u'url': u'http://www.gametrailers.com/videos/zbvr8i/mirror-s-edge-2-e3-2013--debut-trailer', @@ -17,15 +14,9 @@ class GametrailersIE(MTVIE): u'description': u'Faith is back! Check out the World Premiere trailer for Mirror\'s Edge 2 straight from the EA Press Conference at E3 2013!', }, } - # Overwrite MTVIE properties we don't want - _TESTS = [] _FEED_URL = 'http://www.gametrailers.com/feeds/mrss' - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - return itemdoc.find(search_path).attrib['url'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 449138b56..6b95b4998 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -43,13 +43,10 @@ class MetacriticIE(InfoExtractor): description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', webpage, u'description', flags=re.DOTALL) - info = { + return { 'id': video_id, 'title': clip.find('title').text, 'formats': formats, 'description': description, 'duration': int(clip.find('duration').text), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 42aee58be..6b3feb560 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -10,35 +10,8 @@ from ..utils import ( def _media_xml_tag(tag): return '{http://search.yahoo.com/mrss/}%s' % tag -class MTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' - - _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' - - _TESTS = [ - { - u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', - u'file': u'853555.mp4', - u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', - u'info_dict': { - u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', - u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', - }, - }, - { - u'add_ie': ['Vevo'], - u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', - u'file': u'USCJY1331283.mp4', - u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', - u'info_dict': { - u'title': u'Everything Has Changed', - u'upload_date': u'20130606', - u'uploader': u'Taylor Swift', - }, - u'skip': u'VEVO is only available in some countries', - }, - ] +class MTVServicesInfoExtractor(InfoExtractor): @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -53,7 +26,12 @@ class MTVIE(InfoExtractor): return base + m.group('finalid') def _get_thumbnail_url(self, uri, itemdoc): - return 'http://mtv.mtvnimages.com/uri/' + uri + search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) + thumb_node = itemdoc.find(search_path) + if thumb_node is None: + return None + else: + return thumb_node.attrib['url'] def _extract_video_formats(self, metadataXml): if '/error_country_block.swf' in metadataXml: @@ -93,7 +71,7 @@ class MTVIE(InfoExtractor): else: description = None - info = { + return { 'title': itemdoc.find('title').text, 'formats': self._extract_video_formats(mediagen_page), 'id': video_id, @@ -101,11 +79,6 @@ class MTVIE(InfoExtractor): 'description': description, } - # TODO: Remove when #980 has been merged - info.update(info['formats'][-1]) - - return info - def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) data = compat_urllib_parse.urlencode({'uri': uri}) @@ -113,6 +86,39 @@ class MTVIE(InfoExtractor): u'Downloading info') return [self._get_video_info(item) for item in idoc.findall('.//item')] + +class MTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$' + + _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/' + + _TESTS = [ + { + u'url': u'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', + u'file': u'853555.mp4', + u'md5': u'850f3f143316b1e71fa56a4edfd6e0f8', + u'info_dict': { + u'title': u'Taylor Swift - "Ours (VH1 Storytellers)"', + u'description': u'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.', + }, + }, + { + u'add_ie': ['Vevo'], + u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', + u'file': u'USCJY1331283.mp4', + u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', + u'info_dict': { + u'title': u'Everything Has Changed', + u'upload_date': u'20130606', + u'uploader': u'Taylor Swift', + }, + u'skip': u'VEVO is only available in some countries', + }, + ] + + def _get_thumbnail_url(self, uri, itemdoc): + return 'http://mtv.mtvnimages.com/uri/' + uri + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index d290397c7..c012ec0cf 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -56,7 +56,7 @@ class NaverIE(InfoExtractor): 'height': int(format_el.find('height').text), }) - info = { + return { 'id': video_id, 'title': info.find('Subject').text, 'formats': formats, @@ -65,6 +65,3 @@ class NaverIE(InfoExtractor): 'upload_date': info.find('WriteDate').text.replace('.', ''), 'view_count': int(info.find('PlayCount').text), } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 3bbda128e..c2254ae8a 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -30,7 +30,7 @@ class RedTubeIE(InfoExtractor): r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL') video_title = self._html_search_regex( - r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>', + r'<h1 class="videoTitle[^"]*">(.+?)</h1>', webpage, u'title') # No self-labeling, but they describe themselves as diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index a711531e6..fd90cc5dd 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -1,15 +1,14 @@ import re -from .mtv import MTVIE, _media_xml_tag +from .mtv import MTVServicesInfoExtractor -class SouthParkStudiosIE(MTVIE): +class SouthParkStudiosIE(MTVServicesInfoExtractor): IE_NAME = u'southparkstudios.com' _VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' - # Overwrite MTVIE properties we don't want _TESTS = [{ u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', @@ -19,14 +18,6 @@ class SouthParkStudiosIE(MTVIE): }, }] - def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) - thumb_node = itemdoc.find(search_path) - if thumb_node is None: - return None - else: - return thumb_node.attrib['url'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) url = u'http://www.' + mobj.group(u'url') diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index 1c49e580d..d64aaa41f 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -55,7 +55,7 @@ class TriluliluIE(InfoExtractor): for fnode in format_doc.findall('./formats/format') ] - info = { + return { '_type': 'video', 'id': video_id, 'formats': formats, @@ -64,7 +64,3 @@ class TriluliluIE(InfoExtractor): 'thumbnail': thumbnail, } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 36d1bde08..138a35b2a 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -47,7 +47,7 @@ class ViddlerIE(InfoExtractor): r"thumbnail\s*:\s*'([^']*)'", webpage, u'thumbnail', fatal=False) - info = { + return { '_type': 'video', 'id': video_id, 'title': title, @@ -56,9 +56,3 @@ class ViddlerIE(InfoExtractor): 'duration': duration, 'formats': formats, } - - # TODO: Remove when #980 has been merged - info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url']) - info.update(info['formats'][-1]) - - return info diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 7444d3393..279f75e7a 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -26,7 +26,7 @@ class XHamsterIE(InfoExtractor): { u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', u'file': u'2221348.flv', - u'md5': u'e767b9475de189320f691f49c679c4c7', + u'md5': u'970a94178ca4118c5aa3aaea21211b81', u'info_dict': { u"upload_date": u"20130914", u"uploader_id": u"jojo747400", diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 765b4a9bf..7fff761bd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -336,7 +336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"uploader": u"Philipp Hagemeister", u"uploader_id": u"phihag", u"upload_date": u"20121002", - u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." + u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ." } }, { @@ -1366,6 +1366,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # description video_description = get_element_by_id("eow-description", video_webpage) if video_description: + video_description = re.sub(r'''(?x) + <a\s+ + (?:[a-zA-Z-]+="[^"]+"\s+)*? + title="([^"]+)"\s+ + (?:[a-zA-Z-]+="[^"]+"\s+)*? + class="yt-uix-redirect-link"\s*> + [^<]+ + </a> + ''', r'\1', video_description) video_description = clean_html(video_description) else: fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage) @@ -1765,6 +1774,7 @@ class YoutubeSearchIE(SearchInfoExtractor): return self.playlist_result(videos, query) class YoutubeSearchDateIE(YoutubeSearchIE): + IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' _SEARCH_KEY = 'ytsearchdate' IE_DESC = u'YouTube.com searches, newest videos first' |