diff options
| -rw-r--r-- | README.md | 5 | ||||
| -rw-r--r-- | test/test_youtube_signature.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/mpora.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/vodlocker.py | 65 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 83 | ||||
| -rw-r--r-- | youtube_dl/jsinterp.py | 4 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
9 files changed, 111 insertions, 74 deletions
| @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.      --default-search PREFIX          Use this prefix for unqualified URLs. For                                       example "gvsearch2:" downloads two videos                                       from google videos for  youtube-dl "large -                                     apple". By default (with value "auto") -                                     youtube-dl guesses. +                                     apple". Use the value "auto" to let +                                     youtube-dl guess. The default value "error" +                                     just throws an error.      --ignore-config                  Do not read configuration files. When given                                       in the global configuration file /etc                                       /youtube-dl.conf: do not read the user diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55a6..8d46fe108 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [          90,          u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',      ), +    ( +        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', +        u'js', +        u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', +        u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', +    ),  ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase):              os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig):      basename = url.rpartition('/')[2]      m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)      assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig):              with open(fn, 'rb') as testf:                  swfcode = testf.read()              func = ie._parse_sig_swf(swfcode) -        src_sig = compat_str(string.printable[:sig_length]) +        src_sig = ( +            compat_str(string.printable[:sig_input]) +            if isinstance(sig_input, int) else sig_input)          got_sig = func(src_sig)          self.assertEqual(got_sig, expected_sig) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e8598a2f5..f43f3f702 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -343,6 +343,7 @@ from .vine import (  )  from .viki import VikiIE  from .vk import VKIE +from .vodlocker import VodlockerIE  from .vube import VubeIE  from .vuclip import VuClipIE  from .vulture import VultureIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4feef9..f1ed30704 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@  import base64  import hashlib  import json +import netrc  import os  import re  import socket  import sys -import netrc +import time  import xml.etree.ElementTree  from ..utils import ( @@ -575,6 +576,13 @@ class InfoExtractor(object):          else:              return url +    def _sleep(self, timeout, video_id, msg_template=None): +        if msg_template is None: +            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' +        msg = msg_template % {'video_id': video_id, 'timeout': timeout} +        self.to_screen(msg) +        time.sleep(timeout) +  class SearchInfoExtractor(InfoExtractor):      """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):      @property      def SEARCH_KEY(self):          return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          data_json = self._search_regex( -            r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') +            r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')          data = json.loads(data_json) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..dfc570930 --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    compat_urllib_parse, +    compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' + +    _TESTS = [{ +        'url': 'http://vodlocker.com/e8wvyzz4sl42', +        'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', +        'info_dict': { +            'id': 'e8wvyzz4sl42', +            'ext': 'mp4', +            'title': 'Germany vs Brazil', +            'thumbnail': 're:http://.*\.jpg', +        }, +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) + +        fields = dict(re.findall(r'''(?x)<input\s+ +            type="hidden"\s+ +            name="([^"]+)"\s+ +            (?:id="[^"]+"\s+)? +            value="([^"]*)" +            ''', webpage)) + +        if fields['op'] == 'download1': +            self._sleep(3, video_id)  # they do detect when requests happen too fast! +            post = compat_urllib_parse.urlencode(fields) +            req = compat_urllib_request.Request(url, post) +            req.add_header('Content-type', 'application/x-www-form-urlencoded') +            webpage = self._download_webpage( +                req, video_id, 'Downloading video page') + +        title = self._search_regex( +            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title') +        thumbnail = self._search_regex( +            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') +        url = self._search_regex( +            r'file:\s*"(http[^\"]+)",', webpage, 'file url') + +        formats = [{ +            'format_id': 'sd', +            'url': url, +        }] + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f420b8148..6123e1256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def _decrypt_signature(self, s, video_id, player_url, age_gate=False):          """Turn the encrypted s field into a working signature""" -        if player_url is not None: -            if player_url.startswith(u'//'): -                player_url = u'https:' + player_url -            try: -                player_id = (player_url, len(s)) -                if player_id not in self._player_cache: -                    func = self._extract_signature_function( -                        video_id, player_url, len(s) -                    ) -                    self._player_cache[player_id] = func -                func = self._player_cache[player_id] -                if self._downloader.params.get('youtube_print_sig_code'): -                    self._print_sig_code(func, len(s)) -                return func(s) -            except Exception: -                tb = traceback.format_exc() -                self._downloader.report_warning( -                    u'Automatic signature extraction failed: ' + tb) - -            self._downloader.report_warning( -                u'Warning: Falling back to static signature algorithm') - -        return self._static_decrypt_signature( -            s, video_id, player_url, age_gate) - -    def _static_decrypt_signature(self, s, video_id, player_url, age_gate): -        if age_gate: -            # The videos with age protection use another player, so the -            # algorithms can be different. -            if len(s) == 86: -                return s[2:63] + s[82] + s[64:82] + s[63] - -        if len(s) == 93: -            return s[86:29:-1] + s[88] + s[28:5:-1] -        elif len(s) == 92: -            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] -        elif len(s) == 91: -            return s[84:27:-1] + s[86] + s[26:5:-1] -        elif len(s) == 90: -            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] -        elif len(s) == 89: -            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] -        elif len(s) == 88: -            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] -        elif len(s) == 87: -            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] -        elif len(s) == 86: -            return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] -        elif len(s) == 85: -            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] -        elif len(s) == 84: -            return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] -        elif len(s) == 83: -            return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] -        elif len(s) == 82: -            return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] -        elif len(s) == 81: -            return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] -        elif len(s) == 80: -            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] -        elif len(s) == 79: -            return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] +        if player_url is None: +            raise ExtractorError(u'Cannot decrypt signature without player_url') -        else: -            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) +        if player_url.startswith(u'//'): +            player_url = u'https:' + player_url +        try: +            player_id = (player_url, len(s)) +            if player_id not in self._player_cache: +                func = self._extract_signature_function( +                    video_id, player_url, len(s) +                ) +                self._player_cache[player_id] = func +            func = self._player_cache[player_id] +            if self._downloader.params.get('youtube_print_sig_code'): +                self._print_sig_code(func, len(s)) +            return func(s) +        except Exception as e: +            tb = traceback.format_exc() +            raise ExtractorError( +                u'Automatic signature extraction failed: ' + tb, cause=e)      def _get_available_subtitles(self, video_id, webpage):          try: diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object):              if member == 'split("")':                  return list(val)              if member == 'join("")': -                return u''.join(val) +                return ''.join(val)              if member == 'length':                  return len(val)              if member == 'reverse()': @@ -99,7 +99,7 @@ class JSInterpreter(object):      def extract_function(self, funcname):          func_m = re.search( -            (r'(?:function %s|%s\s*=\s*function)' % ( +            (r'(?:function %s|[{;]%s\s*=\s*function)' % (                  re.escape(funcname), re.escape(funcname))) +              r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',              self.code) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ab076489f..d6b05892c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.26' +__version__ = '2014.07.11' | 
