aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/arte.py5
-rw-r--r--youtube_dl/extractor/common.py11
-rw-r--r--youtube_dl/extractor/gorillavid.py17
-rw-r--r--youtube_dl/extractor/goshgay.py73
-rw-r--r--youtube_dl/extractor/mpora.py2
-rw-r--r--youtube_dl/extractor/ninegag.py2
-rw-r--r--youtube_dl/extractor/veoh.py1
-rw-r--r--youtube_dl/extractor/vodlocker.py65
-rw-r--r--youtube_dl/extractor/youtube.py83
-rw-r--r--youtube_dl/jsinterp.py4
-rw-r--r--youtube_dl/version.py2
13 files changed, 194 insertions, 75 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dc0ba986a..3dff723b8 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -993,6 +993,8 @@ class YoutubeDL(object):
fd = get_suitable_downloader(info)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
if info_dict.get('requested_formats') is not None:
downloaded = []
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 12cca5c2e..f43f3f702 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -112,6 +112,7 @@ from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
+from .goshgay import GoshgayIE
from .hark import HarkIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
@@ -342,6 +343,7 @@ from .vine import (
)
from .viki import VikiIE
from .vk import VKIE
+from .vodlocker import VodlockerIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vulture import VultureIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index b42102f3d..9591bad8a 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor):
formats = [{
'forma_id': q.attrib['quality'],
- 'url': q.text,
+ # The playpath starts at 'mp4:', if we don't manually
+ # split the url, rtmpdump will incorrectly parse them
+ 'url': q.text.split('mp4:', 1)[0],
+ 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
'ext': 'flv',
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
} for q in config.findall('./urls/url')]
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e4e4feef9..f1ed30704 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,11 +1,12 @@
import base64
import hashlib
import json
+import netrc
import os
import re
import socket
import sys
-import netrc
+import time
import xml.etree.ElementTree
from ..utils import (
@@ -575,6 +576,13 @@ class InfoExtractor(object):
else:
return url
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
class SearchInfoExtractor(InfoExtractor):
"""
@@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor):
@property
def SEARCH_KEY(self):
return self._SEARCH_KEY
-
diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py
index aa15cafc3..50ef54cce 100644
--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -12,7 +12,12 @@ from ..utils import (
class GorillaVidIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?'
+ IE_DESC = 'GorillaVid.in and daclips.in'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?
+ (?:daclips\.in|gorillavid\.in)/
+ (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
+ '''
_TESTS = [{
'url': 'http://gorillavid.in/06y9juieqpmi',
@@ -32,14 +37,20 @@ class GorillaVidIE(InfoExtractor):
'title': 'Say something nice',
'thumbnail': 're:http://.*\.jpg',
},
+ }, {
+ 'url': 'http://daclips.in/3rso4kdn6f9m',
+ 'info_dict': {
+ 'id': '3rso4kdn6f9m',
+ 'ext': 'mp4',
+ 'title': 'Micro Pig piglets ready on 16th July 2009',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- url = 'http://gorillavid.in/%s' % video_id
-
webpage = self._download_webpage(url, video_id)
fields = dict(re.findall(r'''(?x)<input\s+
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
new file mode 100644
index 000000000..7bca21ad0
--- /dev/null
+++ b/youtube_dl/extractor/goshgay.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+ str_to_int,
+ ExtractorError,
+)
+import json
+
+
+class GoshgayIE(InfoExtractor):
+ _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
+ _TEST = {
+ 'url': 'http://www.goshgay.com/video4116282',
+ 'md5': '268b9f3c3229105c57859e166dd72b03',
+ 'info_dict': {
+ 'id': '4116282',
+ 'ext': 'flv',
+ 'title': 'md5:089833a4790b5e103285a07337f245bf',
+ 'thumbnail': 're:http://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
+
+ player_config = self._search_regex(
+ r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
+ player_vars = json.loads(player_config.replace("'", '"'))
+ width = str_to_int(player_vars.get('width'))
+ height = str_to_int(player_vars.get('height'))
+ config_uri = player_vars.get('config')
+
+ if config_uri is None:
+ raise ExtractorError('Missing config URI')
+ node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
+ errnote='Unable to download XML')
+ if node is None:
+ raise ExtractorError('Missing config XML')
+ if node.tag != 'config':
+ raise ExtractorError('Missing config attribute')
+ fns = node.findall('file')
+ imgs = node.findall('image')
+ if len(fns) != 1:
+ raise ExtractorError('Missing media URI')
+ video_url = fns[0].text
+ if len(imgs) < 1:
+ thumbnail = None
+ else:
+ thumbnail = imgs[0].text
+
+ url_comp = compat_urlparse.urlparse(url)
+ ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'width': width,
+ 'height': height,
+ 'thumbnail': thumbnail,
+ 'http_referer': ref,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py
index 39d6feb98..387935d4d 100644
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
data_json = self._search_regex(
- r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
+ r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
data = json.loads(data_json)
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py
index c2e7b67c7..33daa0dec 100644
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
post_view = json.loads(self._html_search_regex(
- r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
+ r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
youtube_id = post_view['videoExternalId']
title = post_view['title']
diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py
index fb132aef6..a7953a7e7 100644
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@@ -49,6 +49,7 @@ class VeohIE(InfoExtractor):
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
'uploader': 'newsy-videos',
},
+ 'skip': 'This video has been deleted.',
},
]
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
new file mode 100644
index 000000000..dfc570930
--- /dev/null
+++ b/youtube_dl/extractor/vodlocker.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+import time
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+
+
+class VodlockerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
+
+ _TESTS = [{
+ 'url': 'http://vodlocker.com/e8wvyzz4sl42',
+ 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
+ 'info_dict': {
+ 'id': 'e8wvyzz4sl42',
+ 'ext': 'mp4',
+ 'title': 'Germany vs Brazil',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ if fields['op'] == 'download1':
+ self._sleep(3, video_id) # they do detect when requests happen too fast!
+ post = compat_urllib_parse.urlencode(fields)
+ req = compat_urllib_request.Request(url, post)
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ webpage = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ title = self._search_regex(
+ r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
+ thumbnail = self._search_regex(
+ r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
+ url = self._search_regex(
+ r'file:\s*"(http[^\"]+)",', webpage, 'file url')
+
+ formats = [{
+ 'format_id': 'sd',
+ 'url': url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f420b8148..6123e1256 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
- if player_url is not None:
- if player_url.startswith(u'//'):
- player_url = u'https:' + player_url
- try:
- player_id = (player_url, len(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, len(s)
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, len(s))
- return func(s)
- except Exception:
- tb = traceback.format_exc()
- self._downloader.report_warning(
- u'Automatic signature extraction failed: ' + tb)
-
- self._downloader.report_warning(
- u'Warning: Falling back to static signature algorithm')
-
- return self._static_decrypt_signature(
- s, video_id, player_url, age_gate)
-
- def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
- if age_gate:
- # The videos with age protection use another player, so the
- # algorithms can be different.
- if len(s) == 86:
- return s[2:63] + s[82] + s[64:82] + s[63]
-
- if len(s) == 93:
- return s[86:29:-1] + s[88] + s[28:5:-1]
- elif len(s) == 92:
- return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
- elif len(s) == 91:
- return s[84:27:-1] + s[86] + s[26:5:-1]
- elif len(s) == 90:
- return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
- elif len(s) == 89:
- return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
- elif len(s) == 88:
- return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
- elif len(s) == 87:
- return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
- elif len(s) == 86:
- return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
- elif len(s) == 85:
- return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
- elif len(s) == 84:
- return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
- elif len(s) == 83:
- return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
- elif len(s) == 82:
- return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
- elif len(s) == 81:
- return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
- elif len(s) == 80:
- return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
- elif len(s) == 79:
- return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
+ if player_url is None:
+ raise ExtractorError(u'Cannot decrypt signature without player_url')
- else:
- raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
+ if player_url.startswith(u'//'):
+ player_url = u'https:' + player_url
+ try:
+ player_id = (player_url, len(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, len(s)
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, len(s))
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ u'Automatic signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage):
try:
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 449482d3c..3bbb07704 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -59,7 +59,7 @@ class JSInterpreter(object):
if member == 'split("")':
return list(val)
if member == 'join("")':
- return u''.join(val)
+ return ''.join(val)
if member == 'length':
return len(val)
if member == 'reverse()':
@@ -99,7 +99,7 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
- (r'(?:function %s|%s\s*=\s*function)' % (
+ (r'(?:function %s|[{;]%s\s*=\s*function)' % (
re.escape(funcname), re.escape(funcname))) +
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
self.code)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index ab076489f..d6b05892c 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.06.26'
+__version__ = '2014.07.11'