diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/dailymotion.py | 43 | ||||
-rw-r--r-- | youtube_dl/extractor/francetv.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/ign.py | 54 | ||||
-rw-r--r-- | youtube_dl/extractor/rtlnow.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 29 |
5 files changed, 120 insertions, 31 deletions
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 3f012aedc..259806f38 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -27,15 +27,31 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' IE_NAME = u'dailymotion' - _TEST = { - u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', - u'file': u'x33vw9.mp4', - u'md5': u'392c4b85a60a90dc4792da41ce3144eb', - u'info_dict': { - u"uploader": u"Amphora Alex and Van .", - u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" - } - } + _TESTS = [ + { + u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', + u'file': u'x33vw9.mp4', + u'md5': u'392c4b85a60a90dc4792da41ce3144eb', + u'info_dict': { + u"uploader": u"Amphora Alex and Van .", + u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" + } + }, + # Vevo video + { + u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', + u'file': u'USUV71301934.mp4', + u'info_dict': { + u'title': u'Roar (Official)', + u'uploader': u'Katy Perry', + u'upload_date': u'20130905', + }, + u'params': { + u'skip_download': True, + }, + u'skip': u'VEVO is only available in some countries', + }, + ] def _real_extract(self, url): # Extract id and simplified title from URL @@ -53,6 +69,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) + # It may just embed a vevo video: + m_vevo = re.search( + r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', + webpage) + if m_vevo is not None: + vevo_id = m_vevo.group('id') + self.to_screen(u'Vevo video detected: %s' % vevo_id) + return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') + video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', # Looking for official user r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index b1530e549..461dac8ef 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -70,7 +70,11 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): class France2IE(FranceTVBaseInfoExtractor): IE_NAME = u'france2.fr' - _VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P<id>\d+)' + _VALID_URL = r'''(?x)https?://www\.france2\.fr/ + (?: + emissions/.*?/videos/(?P<id>\d+) + | emission/(?P<key>[^/?]+) + )''' _TEST = { u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', @@ -86,7 +90,15 @@ class France2IE(FranceTVBaseInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + if mobj.group('key'): + webpage = self._download_webpage(url, mobj.group('key')) + video_id = self._html_search_regex( + r'''(?x)<div\s+class="video-player">\s* + <a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+ + class="francetv-video-player">''', + webpage, u'video ID') + else: + video_id = mobj.group('id') return self._extract_video(video_id) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index b1c84278a..c52146f7d 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -13,7 +13,7 @@ class IGNIE(InfoExtractor): Some videos of it.ign.com are also supported """ - _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)' + _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)' IE_NAME = u'ign.com' _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' @@ -21,15 +21,39 @@ class IGNIE(InfoExtractor): r'id="my_show_video">.*?<p>(.*?)</p>', ] - _TEST = { - u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', - u'file': u'8f862beef863986b2785559b9e1aa599.mp4', - u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', - u'info_dict': { - u'title': u'The Last of Us Review', - u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', - } - } + _TESTS = [ + { + u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', + u'file': u'8f862beef863986b2785559b9e1aa599.mp4', + u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', + u'info_dict': { + u'title': u'The Last of Us Review', + u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', + } + }, + { + u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', + u'playlist': [ + { + u'file': u'5ebbd138523268b93c9141af17bec937.mp4', + u'info_dict': { + u'title': u'GTA 5 Video Review', + u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', + }, + }, + { + u'file': u'638672ee848ae4ff108df2a296418ee2.mp4', + u'info_dict': { + u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion', + u'description': u'The twisted beauty of GTA 5 in stunning slow motion.', + }, + }, + ], + u'params': { + u'skip_download': True, + }, + }, + ] def _find_video_id(self, webpage): res_id = [r'data-video-id="(.+?)"', @@ -46,6 +70,13 @@ class IGNIE(InfoExtractor): if page_type == 'articles': video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url') return self.url_result(video_url, ie='IGN') + elif page_type != 'video': + multiple_urls = re.findall( + '<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', + webpage) + if multiple_urls: + return [self.url_result(u, ie='IGN') for u in multiple_urls] + video_id = self._find_video_id(webpage) result = self._get_video_info(video_id) description = self._html_search_regex(self._DESCRIPTION_RE, @@ -87,6 +118,9 @@ class OneUPIE(IGNIE): } } + # Override IGN tests + _TESTS = [] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) id = mobj.group('name_or_id') diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index fe66cc6e5..580f9e6d5 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -110,14 +110,17 @@ class RTLnowIE(InfoExtractor): webpage, u'playerdata_url') playerdata = self._download_webpage(playerdata_url, video_id) - mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata) + mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata) if mobj: video_description = mobj.group(u'description') if mobj.group('upload_date_Y'): video_upload_date = mobj.group('upload_date_Y') - else: + elif mobj.group('upload_date_y'): video_upload_date = u'20' + mobj.group('upload_date_y') - video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') + else: + video_upload_date = None + if video_upload_date: + video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') else: video_description = None video_upload_date = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 53f13b516..39ff33290 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -23,9 +23,11 @@ from ..utils import ( compat_urllib_error, compat_urllib_parse, compat_urllib_request, + compat_urlparse, compat_str, clean_html, + get_cachedir, get_element_by_id, ExtractorError, unescapeHTML, @@ -420,8 +422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Read from filesystem cache func_id = '%s_%s_%d' % (player_type, player_id, slen) assert os.path.basename(func_id) == func_id - cache_dir = self._downloader.params.get('cachedir', - u'~/.youtube-dl/cache') + cache_dir = get_cachedir(self._downloader.params) cache_enabled = cache_dir is not None if cache_enabled: @@ -1086,7 +1087,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 83: return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] elif len(s) == 82: - return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54] + return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] elif len(s) == 81: return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] elif len(s) == 80: @@ -1333,9 +1334,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self._downloader.report_warning(u'unable to extract uploader nickname') # title - if 'title' not in video_info: - raise ExtractorError(u'Unable to extract video title') - video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) + if 'title' in video_info: + video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) + else: + self._downloader.report_warning(u'Unable to extract video title') + video_title = u'_' # thumbnail image # We try first to get a high quality image: @@ -1390,6 +1393,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): args = info['args'] # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted + if 'url_encoded_fmt_stream_map' not in args: + raise ValueError(u'No stream_map present') # caught below m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map']) if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) @@ -1523,9 +1528,19 @@ class YoutubePlaylistIE(InfoExtractor): mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) + playlist_id = mobj.group(1) or mobj.group(2) + + # Check if it's a video-specific URL + query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + if 'v' in query_dict: + video_id = query_dict['v'][0] + if self._downloader.params.get('noplaylist'): + self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) + return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') + else: + self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) # Download playlist videos from API - playlist_id = mobj.group(1) or mobj.group(2) videos = [] for page_num in itertools.count(1): |