aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md5
-rw-r--r--test/test_youtube_signature.py12
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/common.py11
-rw-r--r--youtube_dl/extractor/mpora.py2
-rw-r--r--youtube_dl/extractor/vodlocker.py65
-rw-r--r--youtube_dl/extractor/youtube.py83
-rw-r--r--youtube_dl/jsinterp.py4
-rw-r--r--youtube_dl/version.py2
9 files changed, 111 insertions, 74 deletions
diff --git a/README.md b/README.md
index 2bea609bf..dffdaa9dc 100644
--- a/README.md
+++ b/README.md
@@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
- apple". By default (with value "auto")
- youtube-dl guesses.
+ apple". Use the value "auto" to let
+ youtube-dl guess. The default value "error"
+ just throws an error.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 8417c55a6..8d46fe108 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -33,6 +33,12 @@ _TESTS = [
90,
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
+ (
+ u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
+ u'js',
+ u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
+ u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
+ ),
]
@@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase):
os.mkdir(self.TESTDATA_DIR)
-def make_tfunc(url, stype, sig_length, expected_sig):
+def make_tfunc(url, stype, sig_input, expected_sig):
basename = url.rpartition('/')[2]
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
assert m, '%r should follow URL format' % basename
@@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig):
with open(fn, 'rb') as testf:
swfcode = testf.read()
func = ie._parse_sig_swf(swfcode)
- src_sig = compat_str(string.printable[:sig_length])
+ src_sig = (
+ compat_str(string.printable[:sig_input])
+ if isinstance(sig_input, int) else sig_input)
got_sig = func(src_sig)
self.assertEqual(got_sig, expected_sig)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e8598a2f5..f43f3f702 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -343,6 +343,7 @@ from .vine import (
)
from .viki import VikiIE
from .vk import VKIE
+from .vodlocker import VodlockerIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vulture import VultureIE
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e4e4feef9..f1ed30704 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,11 +1,12 @@
import base64
import hashlib
import json
+import netrc
import os
import re
import socket
import sys
-import netrc
+import time
import xml.etree.ElementTree
from ..utils import (
@@ -575,6 +576,13 @@ class InfoExtractor(object):
else:
return url
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
class SearchInfoExtractor(InfoExtractor):
"""
@@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):
@property
def SEARCH_KEY(self):
return self._SEARCH_KEY
-
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py
index 39d6feb98..387935d4d 100644
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
- r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
+ r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = json.loads(data_json)
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
new file mode 100644
index 000000000..dfc570930
--- /dev/null
+++ b/youtube_dl/extractor/vodlocker.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+import time
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+
+
+class VodlockerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
+
+ _TESTS = [{
+ 'url': 'http://vodlocker.com/e8wvyzz4sl42',
+ 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
+ 'info_dict': {
+ 'id': 'e8wvyzz4sl42',
+ 'ext': 'mp4',
+ 'title': 'Germany vs Brazil',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ if fields['op'] == 'download1':
+ self._sleep(3, video_id) # they do detect when requests happen too fast!
+ post = compat_urllib_parse.urlencode(fields)
+ req = compat_urllib_request.Request(url, post)
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ webpage = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ title = self._search_regex(
+ r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
+ thumbnail = self._search_regex(
+ r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
+ url = self._search_regex(
+ r'file:\s*"(http[^\"]+)",', webpage, 'file url')
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f420b8148..6123e1256 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
- if player_url is not None:
- if player_url.startswith(u'//'):
- player_url = u'https:' + player_url
- try:
- player_id = (player_url, len(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, len(s)
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, len(s))
- return func(s)
- except Exception:
- tb = traceback.format_exc()
- self._downloader.report_warning(
- u'Automatic signature extraction failed: ' + tb)
-
- self._downloader.report_warning(
- u'Warning: Falling back to static signature algorithm')
-
- return self._static_decrypt_signature(
- s, video_id, player_url, age_gate)
-
- def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
- if age_gate:
- # The videos with age protection use another player, so the
- # algorithms can be different.
- if len(s) == 86:
- return s[2:63] + s[82] + s[64:82] + s[63]
-
- if len(s) == 93:
- return s[86:29:-1] + s[88] + s[28:5:-1]
- elif len(s) == 92:
- return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
- elif len(s) == 91:
- return s[84:27:-1] + s[86] + s[26:5:-1]
- elif len(s) == 90:
- return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
- elif len(s) == 89:
- return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
- elif len(s) == 88:
- return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
- elif len(s) == 87:
- return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
- elif len(s) == 86:
- return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
- elif len(s) == 85:
- return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
- elif len(s) == 84:
- return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
- elif len(s) == 83:
- return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
- elif len(s) == 82:
- return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
- elif len(s) == 81:
- return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
- elif len(s) == 80:
- return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
- elif len(s) == 79:
- return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+ if player_url is None:
+ raise ExtractorError(u'Cannot decrypt signature without player_url')
- else:
- raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
+ if player_url.startswith(u'//'):
+ player_url = u'https:' + player_url
+ try:
+ player_id = (player_url, len(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, len(s)
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, len(s))
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ u'Automatic signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage):
try:
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 449482d3c..3bbb07704 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -59,7 +59,7 @@ class JSInterpreter(object):
if member == 'split("")':
return list(val)
if member == 'join("")':
- return u''.join(val)
+ return ''.join(val)
if member == 'length':
return len(val)
if member == 'reverse()':
@@ -99,7 +99,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
- (r'(?:function %s|%s\s*=\s*function)' % (
+ (r'(?:function %s|[{;]%s\s*=\s*function)' % (
re.escape(funcname), re.escape(funcname))) +
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
self.code)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index ab076489f..d6b05892c 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.06.26'
+__version__ = '2014.07.11'