diff options
-rw-r--r-- | devscripts/youtube_genalgo.py | 4 | ||||
-rwxr-xr-x | test/test_youtube_sig.py | 2 | ||||
-rw-r--r-- | youtube_dl/YoutubeDL.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 62 | ||||
-rw-r--r-- | youtube_dl/extractor/dotsub.py | 29 | ||||
-rw-r--r-- | youtube_dl/extractor/gamespot.py | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 41 | ||||
-rw-r--r-- | youtube_dl/extractor/steam.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/veoh.py | 47 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 60 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
12 files changed, 225 insertions, 50 deletions
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index c3d69e6f4..150c88d17 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -20,9 +20,9 @@ tests = [ # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), - # 83 + # 83 - vfl26ng3K 2013/07/10 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", - "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ"), + "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>"), # 82 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py index e87b6259b..e76604244 100755 --- a/test/test_youtube_sig.py +++ b/test/test_youtube_sig.py @@ -45,7 +45,7 @@ class TestYoutubeSig(unittest.TestCase): def test_83(self): wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" - right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS<FGHJKLPOIUYTREWZ0987654321mnbvcxzasdfghjklpoiuytrQ" + right = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>" self.assertEqual(sig(wrong), right) def test_82(self): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d3281fed2..e24706115 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -348,6 +348,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system if result_type == 'video': + ie_result.update(extra_info) if 'playlist' not in ie_result: # It isn't part of a playlist ie_result['playlist'] = None diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d2a71a6f1..4b67f333b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -59,6 +59,7 @@ from .tumblr import TumblrIE from .tutv import TutvIE from .ustream import UstreamIE from .vbox7 import Vbox7IE +from .veoh import VeohIE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f85acbb5d..68ee5292b 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,28 +1,80 @@ import re import json +import xml.etree.ElementTree from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) class BrightcoveIE(InfoExtractor): - _VALID_URL = r'http://.*brightcove\.com/.*\?(?P<query>.*videoPlayer=(?P<id>\d*).*)' + _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' + _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' + _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' + + # There is a test for Brigtcove in GenericIE, that way we test both the download + # and the detection of videos, and we don't have to find an URL that is always valid + + @classmethod + def _build_brighcove_url(cls, object_str): + """ + Build a Brightcove url from a xml string containing + <object class="BrightcoveExperience">{params}</object> + """ + object_doc = xml.etree.ElementTree.fromstring(object_str) + assert u'BrightcoveExperience' in object_doc.attrib['class'] + params = {'flashID': object_doc.attrib['id'], + 'playerID': object_doc.find('./param[@name="playerID"]').attrib['value'], + } + playerKey = object_doc.find('./param[@name="playerKey"]') + # Not all pages define this value + if playerKey is not None: + params['playerKey'] = playerKey.attrib['value'] + videoPlayer = object_doc.find('./param[@name="@videoPlayer"]') + if videoPlayer is not None: + params['@videoPlayer'] = videoPlayer.attrib['value'] + data = compat_urllib_parse.urlencode(params) + return cls._FEDERATED_URL_TEMPLATE % data def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) query = mobj.group('query') - video_id = mobj.group('id') - request_url = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' % query + m_video_id = re.search(r'videoPlayer=(\d+)', query) + if m_video_id is not None: + video_id = m_video_id.group(1) + return self._get_video_info(video_id, query) + else: + player_key = self._search_regex(r'playerKey=(.+?)(&|$)', query, 'playlist_id') + return self._get_playlist_info(player_key) + + def _get_video_info(self, video_id, query): + request_url = self._FEDERATED_URL_TEMPLATE % query webpage = self._download_webpage(request_url, video_id) self.report_extraction(video_id) info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') info = json.loads(info)['data'] video_info = info['programmedContent']['videoPlayer']['mediaDTO'] + + return self._extract_video_info(video_info) + + def _get_playlist_info(self, player_key): + playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, + player_key, u'Downloading playlist information') + + playlist_info = json.loads(playlist_info)['videoList'] + videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] + + return self.playlist_result(videos, playlist_id=playlist_info['id'], + playlist_title=playlist_info['mediaCollectionDTO']['displayName']) + + def _extract_video_info(self, video_info): renditions = video_info['renditions'] renditions = sorted(renditions, key=lambda r: r['size']) best_format = renditions[-1] - - return {'id': video_id, + + return {'id': video_info['id'], 'title': video_info['displayName'], 'url': best_format['defaultURL'], 'ext': 'mp4', diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index 2afeaba07..0ee9a684e 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -1,5 +1,7 @@ import re import json +import time + from .common import InfoExtractor @@ -13,7 +15,8 @@ class DotsubIE(InfoExtractor): u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary", u"uploader": u"4v4l0n42", u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', - u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p' + u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', + u'upload_date': u'20101213', } } @@ -23,20 +26,16 @@ class DotsubIE(InfoExtractor): info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id) webpage = self._download_webpage(info_url, video_id) info = json.loads(webpage) - video_url = info['mediaURI'] - uploader = info['user'] - description = info['description'] - view_count = info['numberOfViews'] - title = info['title'] - thumbnail_url = info['screenshotURI'] - ext = 'flv' + date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds + return [{ 'id': video_id, - 'url': video_url, - 'ext': ext, - 'title': title, - 'thumbnail': thumbnail_url, - 'description': description, - 'uploader': uploader, - 'view_count': view_count, + 'url': info['mediaURI'], + 'ext': 'flv', + 'title': info['title'], + 'thumbnail': info['screenshotURI'], + 'description': info['description'], + 'uploader': info['user'], + 'view_count': info['numberOfViews'], + 'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), }] diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index cec3b7ac8..7585b7061 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -4,14 +4,15 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( unified_strdate, + compat_urllib_parse, ) class GameSpotIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/' + _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' _TEST = { u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", u"file": u"6410818.mp4", - u"md5": u"5569d64ca98db01f0177c934fe8c1e9b", + u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", u"info_dict": { u"title": u"Arma III - Community Guide: SITREP I", u"upload_date": u"20130627", @@ -21,13 +22,22 @@ class GameSpotIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(3).split("-")[-1] - info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id) + page_id = mobj.group('page_id') + webpage = self._download_webpage(url, page_id) + video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"', + r'http://www\.gamespot\.com/videoembed/(\d+)'], + webpage, 'video id') + data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'}) + info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data info_xml = self._download_webpage(info_url, video_id) doc = xml.etree.ElementTree.fromstring(info_xml) clip_el = doc.find('./playList/clip') - video_url = clip_el.find('./URI').text + http_urls = [{'url': node.find('filePath').text, + 'rate': int(node.find('rate').text)} + for node in clip_el.find('./httpURI')] + best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1] + video_url = best_quality['url'] title = clip_el.find('./title').text ext = video_url.rpartition('.')[2] thumbnail_url = clip_el.find('./screenGrabURI').text diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 20bc53330..33790741f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1,3 +1,5 @@ +# encoding: utf-8 + import os import re @@ -9,20 +11,34 @@ from ..utils import ( ExtractorError, ) +from .brightcove import BrightcoveIE class GenericIE(InfoExtractor): IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' IE_NAME = u'generic' - _TEST = { - u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', - u'file': u'13601338388002.mp4', - u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', - u'info_dict': { - u"uploader": u"www.hodiho.fr", - u"title": u"R\u00e9gis plante sa Jeep" - } - } + _TESTS = [ + { + u'url': u'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', + u'file': u'13601338388002.mp4', + u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', + u'info_dict': { + u"uploader": u"www.hodiho.fr", + u"title": u"R\u00e9gis plante sa Jeep" + } + }, + { + u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/', + u'file': u'2371591881001.mp4', + u'md5': u'9e80619e0a94663f0bdc849b4566af19', + u'note': u'Test Brightcove downloads and detection in GenericIE', + u'info_dict': { + u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', + u'uploader': u'8TV', + u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', + } + }, + ] def report_download_webpage(self, video_id): """Report webpage download.""" @@ -103,6 +119,13 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) self.report_extraction(video_id) + # Look for BrigthCove: + m_brightcove = re.search(r'<object.+?class=".*?BrightcoveExperience.*?".+?</object>', webpage, re.DOTALL) + if m_brightcove is not None: + self.to_screen(u'Brightcove video detected.') + bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) + return self.url_result(bc_url, 'Brightcove') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index ecac4ec40..91658f892 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -23,14 +23,16 @@ class SteamIE(InfoExtractor): u"file": u"81300.flv", u"md5": u"f870007cee7065d7c76b88f0a45ecc07", u"info_dict": { - u"title": u"Terraria 1.1 Trailer" + u"title": u"Terraria 1.1 Trailer", + u'playlist_index': 1, } }, { u"file": u"80859.flv", u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751", u"info_dict": { - u"title": u"Terraria Trailer" + u"title": u"Terraria Trailer", + u'playlist_index': 2, } } ] diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py new file mode 100644 index 000000000..00672c9e5 --- /dev/null +++ b/youtube_dl/extractor/veoh.py @@ -0,0 +1,47 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, +) + +class VeohIE(InfoExtractor): + _VALID_URL = r'http://www\.veoh\.com/watch/v(?P<id>\d*)' + + _TEST = { + u'url': u'http://www.veoh.com/watch/v56314296nk7Zdmz3', + u'file': u'56314296.mp4', + u'md5': u'620e68e6a3cff80086df3348426c9ca3', + u'info_dict': { + u'title': u'Straight Backs Are Stronger', + u'uploader': u'LUMOback', + u'description': u'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage) + if m_youtube is not None: + youtube_id = m_youtube.group(1) + self.to_screen(u'%s: detected Youtube video.' % video_id) + return self.url_result(youtube_id, 'Youtube') + + self.report_extraction(video_id) + info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info') + info = json.loads(info) + video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath') + + return {'id': info['videoId'], + 'title': info['title'], + 'ext': determine_ext(video_url), + 'url': video_url, + 'uploader': info['username'], + 'thumbnail': info.get('highResImage') or info.get('medResImage'), + 'description': info['description'], + 'view_count': info['views'], + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 61b7b561f..87f9994ba 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -117,7 +117,19 @@ class YoutubeIE(InfoExtractor): u"uploader": u"IconaPop", u"uploader_id": u"IconaPop" } - } + }, + { + u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ", + u"file": u"07FYdnEawAQ.mp4", + u"note": u"Test VEVO video with age protection (#956)", + u"info_dict": { + u"upload_date": u"20130703", + u"title": u"Justin Timberlake - Tunnel Vision (Explicit)", + u"description": u"md5:64249768eec3bc4276236606ea996373", + u"uploader": u"justintimberlakeVEVO", + u"uploader_id": u"justintimberlakeVEVO" + } + }, ] @@ -178,7 +190,7 @@ class YoutubeIE(InfoExtractor): elif len(s) == 84: return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] elif len(s) == 83: - return s[52] + s[81:55:-1] + s[2] + s[54:52:-1] + s[82] + s[51:36:-1] + s[55] + s[35:2:-1] + s[36] + return s[:81] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] @@ -410,15 +422,35 @@ class YoutubeIE(InfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) - for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + if re.search(r'player-age-gate-content">', video_webpage) is not None: + self.report_age_confirmation() + age_gate = True + # We simulate the access to the video from www.youtube.com/v/{video_id} + # this can be viewed without login into Youtube + data = compat_urllib_parse.urlencode({'video_id': video_id, + 'el': 'embedded', + 'gl': 'US', + 'hl': 'en', + 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + 'asv': 3, + 'sts':'1588', + }) + video_info_url = 'https://www.youtube.com/get_video_info?' + data video_info_webpage = self._download_webpage(video_info_url, video_id, note=False, errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) - if 'token' in video_info: - break + else: + age_gate = False + for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: + video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + % (video_id, el_type)) + video_info_webpage = self._download_webpage(video_info_url, video_id, + note=False, + errnote='unable to download video info webpage') + video_info = compat_parse_qs(video_info_webpage) + if 'token' in video_info: + break if 'token' not in video_info: if 'reason' in video_info: raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True) @@ -535,6 +567,8 @@ class YoutubeIE(InfoExtractor): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]: + raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) url_map = {} for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','): url_data = compat_parse_qs(url_data_str) @@ -545,9 +579,15 @@ class YoutubeIE(InfoExtractor): elif 's' in url_data: if self._downloader.params.get('verbose'): s = url_data['s'][0] - player = self._search_regex(r'html5player-(.+?)\.js', video_webpage, - 'html5 player', fatal=False) - self.to_screen('encrypted signature length %d (%d.%d), itag %s, html5 player %s' % + if age_gate: + player_version = self._search_regex(r'ad3-(.+?)\.swf', + video_info['ad3_module'][0], 'flash player', + fatal=False) + player = 'flash player %s' % player_version + else: + player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, + 'html5 player', fatal=False) + self.to_screen('encrypted signature length %d (%d.%d), itag %s, %s' % (len(s), len(s.split('.')[0]), len(s.split('.')[1]), url_data['itag'][0], player)) signature = self._decrypt_signature(url_data['s'][0]) url += '&signature=' + signature diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e7a15714a..2f20826c2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.07.08.1' +__version__ = '2013.07.10' |