diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/mpora.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/vodlocker.py | 65 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 83 | ||||
-rw-r--r-- | youtube_dl/jsinterp.py | 4 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
7 files changed, 98 insertions, 70 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e8598a2f5..f43f3f702 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -343,6 +343,7 @@ from .vine import ( ) from .viki import VikiIE from .vk import VKIE +from .vodlocker import VodlockerIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4feef9..f1ed30704 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -575,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._search_regex( - r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') + r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') data = json.loads(data_json) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..dfc570930 --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +from .common import InfoExtractor +from ..utils import ( + determine_ext, + compat_urllib_parse, + compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' + + _TESTS = [{ + 'url': 'http://vodlocker.com/e8wvyzz4sl42', + 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', + 'info_dict': { + 'id': 'e8wvyzz4sl42', + 'ext': 'mp4', + 'title': 'Germany vs Brazil', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)<input\s+ + type="hidden"\s+ + name="([^"]+)"\s+ + (?:id="[^"]+"\s+)? + value="([^"]*)" + ''', webpage)) + + if fields['op'] == 'download1': + self._sleep(3, video_id) # they do detect when requests happen too fast! + post = compat_urllib_parse.urlencode(fields) + req = compat_urllib_request.Request(url, post) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + webpage = self._download_webpage( + req, video_id, 'Downloading video page') + + title = self._search_regex( + r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title') + thumbnail = self._search_regex( + r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') + url = self._search_regex( + r'file:\s*"(http[^\"]+)",', webpage, 'file url') + + formats = [{ + 'format_id': 'sd', + 'url': url, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f420b8148..6123e1256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if player_url is not None: - if player_url.startswith(u'//'): - player_url = u'https:' + player_url - try: - player_id = (player_url, len(s)) - if player_id not in self._player_cache: - func = self._extract_signature_function( - video_id, player_url, len(s) - ) - self._player_cache[player_id] = func - func = self._player_cache[player_id] - if self._downloader.params.get('youtube_print_sig_code'): - self._print_sig_code(func, len(s)) - return func(s) - except Exception: - tb = traceback.format_exc() - self._downloader.report_warning( - u'Automatic signature extraction failed: ' + tb) - - self._downloader.report_warning( - u'Warning: Falling back to static signature algorithm') - - return self._static_decrypt_signature( - s, video_id, player_url, age_gate) - - def _static_decrypt_signature(self, s, video_id, player_url, age_gate): - if age_gate: - # The videos with age protection use another player, so the - # algorithms can be different. - if len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] - - if len(s) == 93: - return s[86:29:-1] + s[88] + s[28:5:-1] - elif len(s) == 92: - return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] - elif len(s) == 91: - return s[84:27:-1] + s[86] + s[26:5:-1] - elif len(s) == 90: - return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] - elif len(s) == 89: - return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] - elif len(s) == 88: - return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] - elif len(s) == 87: - return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] - elif len(s) == 86: - return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] - elif len(s) == 85: - return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] - elif len(s) == 84: - return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] - elif len(s) == 83: - return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] - elif len(s) == 82: - return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] - elif len(s) == 81: - return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - elif len(s) == 80: - return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] - elif len(s) == 79: - return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] + if player_url is None: + raise ExtractorError(u'Cannot decrypt signature without player_url') - else: - raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) + if player_url.startswith(u'//'): + player_url = u'https:' + player_url + try: + player_id = (player_url, len(s)) + if player_id not in self._player_cache: + func = self._extract_signature_function( + video_id, player_url, len(s) + ) + self._player_cache[player_id] = func + func = self._player_cache[player_id] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) + except Exception as e: + tb = traceback.format_exc() + raise ExtractorError( + u'Automatic signature extraction failed: ' + tb, cause=e) def _get_available_subtitles(self, video_id, webpage): try: diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object): if member == 'split("")': return list(val) if member == 'join("")': - return u''.join(val) + return ''.join(val) if member == 'length': return len(val) if member == 'reverse()': @@ -99,7 +99,7 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( - (r'(?:function %s|%s\s*=\s*function)' % ( + (r'(?:function %s|[{;]%s\s*=\s*function)' % ( re.escape(funcname), re.escape(funcname))) + r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', self.code) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ab076489f..d6b05892c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.26' +__version__ = '2014.07.11' |