diff options
| -rw-r--r-- | README.md | 5 | ||||
| -rw-r--r-- | test/test_youtube_signature.py | 12 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/arte.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/gorillavid.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/goshgay.py | 73 | ||||
| -rw-r--r-- | youtube_dl/extractor/mpora.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/ninegag.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/veoh.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/vodlocker.py | 65 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 83 | ||||
| -rw-r--r-- | youtube_dl/jsinterp.py | 4 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
15 files changed, 207 insertions, 79 deletions
| @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.      --default-search PREFIX          Use this prefix for unqualified URLs. For                                       example "gvsearch2:" downloads two videos                                       from google videos for  youtube-dl "large -                                     apple". By default (with value "auto") -                                     youtube-dl guesses. +                                     apple". Use the value "auto" to let +                                     youtube-dl guess. The default value "error" +                                     just throws an error.      --ignore-config                  Do not read configuration files. When given                                       in the global configuration file /etc                                       /youtube-dl.conf: do not read the user diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55a6..8d46fe108 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [          90,          u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',      ), +    ( +        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', +        u'js', +        u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', +        u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', +    ),  ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase):              os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig):      basename = url.rpartition('/')[2]      m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)      assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig):              with open(fn, 'rb') as testf:                  swfcode = testf.read()              func = ie._parse_sig_swf(swfcode) -        src_sig = compat_str(string.printable[:sig_length]) +        src_sig = ( +            compat_str(string.printable[:sig_input]) +            if isinstance(sig_input, int) else sig_input)          got_sig = func(src_sig)          self.assertEqual(got_sig, expected_sig) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dc0ba986a..3dff723b8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -993,6 +993,8 @@ class YoutubeDL(object):                          fd = get_suitable_downloader(info)(self, self.params)                          for ph in self._progress_hooks:                              fd.add_progress_hook(ph) +                        if self.params.get('verbose'): +                            self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))                          return fd.download(name, info)                      if info_dict.get('requested_formats') is not None:                          downloaded = [] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 12cca5c2e..f43f3f702 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,6 +112,7 @@ from .generic import GenericIE  from .googleplus import GooglePlusIE  from .googlesearch import GoogleSearchIE  from .gorillavid import GorillaVidIE +from .goshgay import GoshgayIE  from .hark import HarkIE  from .helsinki import HelsinkiIE  from .hentaistigma import HentaiStigmaIE @@ -342,6 +343,7 @@ from .vine import (  )  from .viki import VikiIE  from .vk import VKIE +from .vodlocker import VodlockerIE  from .vube import VubeIE  from .vuclip import VuClipIE  from .vulture import VultureIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b42102f3d..9591bad8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor):          formats = [{              'forma_id': q.attrib['quality'], -            'url': q.text, +            # The playpath starts at 'mp4:', if we don't manually +            # split the url, rtmpdump will incorrectly parse them +            'url': q.text.split('mp4:', 1)[0], +            'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],              'ext': 'flv',              'quality': 2 if q.attrib['quality'] == 'hd' else 1,          } for q in config.findall('./urls/url')] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4feef9..f1ed30704 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@  import base64  import hashlib  import json +import netrc  import os  import re  import socket  import sys -import netrc +import time  import xml.etree.ElementTree  from ..utils import ( @@ -575,6 +576,13 @@ class InfoExtractor(object):          else:              return url +    def _sleep(self, timeout, video_id, msg_template=None): +        if msg_template is None: +            msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' +        msg = msg_template % {'video_id': video_id, 'timeout': timeout} +        self.to_screen(msg) +        time.sleep(timeout) +  class SearchInfoExtractor(InfoExtractor):      """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):      @property      def SEARCH_KEY(self):          return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index aa15cafc3..50ef54cce 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -12,7 +12,12 @@ from ..utils import (  class GorillaVidIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' +    IE_DESC = 'GorillaVid.in and daclips.in' +    _VALID_URL = r'''(?x) +        https?://(?:www\.)? +            (?:daclips\.in|gorillavid\.in)/ +        (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? +    '''      _TESTS = [{          'url': 'http://gorillavid.in/06y9juieqpmi', @@ -32,14 +37,20 @@ class GorillaVidIE(InfoExtractor):              'title': 'Say something nice',              'thumbnail': 're:http://.*\.jpg',          }, +    }, { +        'url': 'http://daclips.in/3rso4kdn6f9m', +        'info_dict': { +            'id': '3rso4kdn6f9m', +            'ext': 'mp4', +            'title': 'Micro Pig piglets ready on 16th July 2009', +            'thumbnail': 're:http://.*\.jpg', +        },      }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        url = 'http://gorillavid.in/%s' % video_id -          webpage = self._download_webpage(url, video_id)          fields = dict(re.findall(r'''(?x)<input\s+ diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py new file mode 100644 index 000000000..7bca21ad0 --- /dev/null +++ b/youtube_dl/extractor/goshgay.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urlparse, +    str_to_int, +    ExtractorError, +) +import json + + +class GoshgayIE(InfoExtractor): +    _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' +    _TEST = { +        'url': 'http://www.goshgay.com/video4116282', +        'md5': '268b9f3c3229105c57859e166dd72b03', +        'info_dict': { +            'id': '4116282', +            'ext': 'flv', +            'title': 'md5:089833a4790b5e103285a07337f245bf', +            'thumbnail': 're:http://.*\.jpg', +            'age_limit': 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) +        title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') + +        player_config = self._search_regex( +            r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') +        player_vars = json.loads(player_config.replace("'", '"')) +        width = str_to_int(player_vars.get('width')) +        height = str_to_int(player_vars.get('height')) +        config_uri = player_vars.get('config') + +        if config_uri is None: +            raise ExtractorError('Missing config URI') +        node = self._download_xml(config_uri, video_id, 'Downloading player config XML', +                                  errnote='Unable to download XML') +        if node is None: +            raise ExtractorError('Missing config XML') +        if node.tag != 'config': +            raise ExtractorError('Missing config attribute') +        fns = node.findall('file') +        imgs = node.findall('image') +        if len(fns) != 1: +            raise ExtractorError('Missing media URI') +        video_url = fns[0].text +        if len(imgs) < 1: +            thumbnail = None +        else: +            thumbnail = imgs[0].text + +        url_comp = compat_urlparse.urlparse(url) +        ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'width': width, +            'height': height, +            'thumbnail': thumbnail, +            'http_referer': ref, +            'age_limit': 18, +        } diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          data_json = self._search_regex( -            r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') +            r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')          data = json.loads(data_json) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index c2e7b67c7..33daa0dec 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          post_view = json.loads(self._html_search_regex( -            r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) +            r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))          youtube_id = post_view['videoExternalId']          title = post_view['title'] diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index fb132aef6..a7953a7e7 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -49,6 +49,7 @@ class VeohIE(InfoExtractor):                  'description': 'md5:f5a11c51f8fb51d2315bca0937526891',                  'uploader': 'newsy-videos',              }, +            'skip': 'This video has been deleted.',          },      ] diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..dfc570930 --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    compat_urllib_parse, +    compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' + +    _TESTS = [{ +        'url': 'http://vodlocker.com/e8wvyzz4sl42', +        'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', +        'info_dict': { +            'id': 'e8wvyzz4sl42', +            'ext': 'mp4', +            'title': 'Germany vs Brazil', +            'thumbnail': 're:http://.*\.jpg', +        }, +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) + +        fields = dict(re.findall(r'''(?x)<input\s+ +            type="hidden"\s+ +            name="([^"]+)"\s+ +            (?:id="[^"]+"\s+)? +            value="([^"]*)" +            ''', webpage)) + +        if fields['op'] == 'download1': +            self._sleep(3, video_id)  # they do detect when requests happen too fast! +            post = compat_urllib_parse.urlencode(fields) +            req = compat_urllib_request.Request(url, post) +            req.add_header('Content-type', 'application/x-www-form-urlencoded') +            webpage = self._download_webpage( +                req, video_id, 'Downloading video page') + +        title = self._search_regex( +            r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title') +        thumbnail = self._search_regex( +            r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') +        url = self._search_regex( +            r'file:\s*"(http[^\"]+)",', webpage, 'file url') + +        formats = [{ +            'format_id': 'sd', +            'url': url, +        }] + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f420b8148..6123e1256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def _decrypt_signature(self, s, video_id, player_url, age_gate=False):          """Turn the encrypted s field into a working signature""" -        if player_url is not None: -            if player_url.startswith(u'//'): -                player_url = u'https:' + player_url -            try: -                player_id = (player_url, len(s)) -                if player_id not in self._player_cache: -                    func = self._extract_signature_function( -                        video_id, player_url, len(s) -                    ) -                    self._player_cache[player_id] = func -                func = self._player_cache[player_id] -                if self._downloader.params.get('youtube_print_sig_code'): -                    self._print_sig_code(func, len(s)) -                return func(s) -            except Exception: -                tb = traceback.format_exc() -                self._downloader.report_warning( -                    u'Automatic signature extraction failed: ' + tb) - -            self._downloader.report_warning( -                u'Warning: Falling back to static signature algorithm') - -        return self._static_decrypt_signature( -            s, video_id, player_url, age_gate) - -    def _static_decrypt_signature(self, s, video_id, player_url, age_gate): -        if age_gate: -            # The videos with age protection use another player, so the -            # algorithms can be different. -            if len(s) == 86: -                return s[2:63] + s[82] + s[64:82] + s[63] - -        if len(s) == 93: -            return s[86:29:-1] + s[88] + s[28:5:-1] -        elif len(s) == 92: -            return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] -        elif len(s) == 91: -            return s[84:27:-1] + s[86] + s[26:5:-1] -        elif len(s) == 90: -            return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] -        elif len(s) == 89: -            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] -        elif len(s) == 88: -            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] -        elif len(s) == 87: -            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] -        elif len(s) == 86: -            return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] -        elif len(s) == 85: -            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] -        elif len(s) == 84: -            return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] -        elif len(s) == 83: -            return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] -        elif len(s) == 82: -            return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] -        elif len(s) == 81: -            return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] -        elif len(s) == 80: -            return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] -        elif len(s) == 79: -            return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] +        if player_url is None: +            raise ExtractorError(u'Cannot decrypt signature without player_url') -        else: -            raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) +        if player_url.startswith(u'//'): +            player_url = u'https:' + player_url +        try: +            player_id = (player_url, len(s)) +            if player_id not in self._player_cache: +                func = self._extract_signature_function( +                    video_id, player_url, len(s) +                ) +                self._player_cache[player_id] = func +            func = self._player_cache[player_id] +            if self._downloader.params.get('youtube_print_sig_code'): +                self._print_sig_code(func, len(s)) +            return func(s) +        except Exception as e: +            tb = traceback.format_exc() +            raise ExtractorError( +                u'Automatic signature extraction failed: ' + tb, cause=e)      def _get_available_subtitles(self, video_id, webpage):          try: diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object):              if member == 'split("")':                  return list(val)              if member == 'join("")': -                return u''.join(val) +                return ''.join(val)              if member == 'length':                  return len(val)              if member == 'reverse()': @@ -99,7 +99,7 @@ class JSInterpreter(object):      def extract_function(self, funcname):          func_m = re.search( -            (r'(?:function %s|%s\s*=\s*function)' % ( +            (r'(?:function %s|[{;]%s\s*=\s*function)' % (                  re.escape(funcname), re.escape(funcname))) +              r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',              self.code) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ab076489f..d6b05892c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.26' +__version__ = '2014.07.11' | 
