aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py3
-rw-r--r--youtube_dl/compat.py38
-rw-r--r--youtube_dl/downloader/hls.py16
-rw-r--r--youtube_dl/extractor/ard.py8
-rw-r--r--youtube_dl/extractor/defense.py34
-rw-r--r--youtube_dl/extractor/francetv.py13
-rw-r--r--youtube_dl/extractor/globo.py28
-rw-r--r--youtube_dl/extractor/kankan.py6
-rw-r--r--youtube_dl/extractor/keezmovies.py16
-rw-r--r--youtube_dl/extractor/la7.py9
-rw-r--r--youtube_dl/extractor/macgamestore.py17
-rw-r--r--youtube_dl/extractor/mpora.py16
-rw-r--r--youtube_dl/extractor/mtv.py34
-rw-r--r--youtube_dl/extractor/nerdcubed.py1
-rw-r--r--youtube_dl/extractor/ringtv.py5
-rw-r--r--youtube_dl/extractor/rottentomatoes.py3
-rw-r--r--youtube_dl/extractor/rutv.py6
-rw-r--r--youtube_dl/extractor/servingsys.py19
-rw-r--r--youtube_dl/extractor/sina.py11
-rw-r--r--youtube_dl/extractor/soundcloud.py3
-rw-r--r--youtube_dl/extractor/teamcoco.py8
-rw-r--r--youtube_dl/extractor/teletask.py2
-rw-r--r--youtube_dl/extractor/toutv.py3
-rw-r--r--youtube_dl/extractor/vimeo.py3
-rw-r--r--youtube_dl/extractor/wdr.py3
-rw-r--r--youtube_dl/extractor/youtube.py13
-rw-r--r--youtube_dl/jsinterp.py172
-rw-r--r--youtube_dl/utils.py7
-rw-r--r--youtube_dl/version.py2
29 files changed, 298 insertions, 201 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e5a96cad5..c18ce9660 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -25,6 +25,7 @@ if os.name == 'nt':
import ctypes
from .compat import (
+ compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_http_client,
@@ -1558,7 +1559,7 @@ class YoutubeDL(object):
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
# To work around aforementioned issue we will replace request's original URL with
# percent-encoded one
- req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
+ req_is_string = isinstance(req, compat_basestring)
url = req if req_is_string else req.get_full_url()
url_escaped = escape_url(url)
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 497ca52de..e989cdbbd 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -114,6 +114,26 @@ except ImportError:
string += pct_sequence.decode(encoding, errors)
return string
+try:
+ compat_str = unicode # Python 2
+except NameError:
+ compat_str = str
+
+try:
+ compat_basestring = basestring # Python 2
+except NameError:
+ compat_basestring = str
+
+try:
+ compat_chr = unichr # Python 2
+except NameError:
+ compat_chr = chr
+
+try:
+ from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError: # Python 2.6
+ from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
try:
from urllib.parse import parse_qs as compat_parse_qs
@@ -123,7 +143,7 @@ except ImportError: # Python 2
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, unicode
+ qs, _coerce_result = qs, compat_str
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = []
for name_value in pairs:
@@ -163,21 +183,6 @@ except ImportError: # Python 2
return parsed_result
try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
-
-try:
- from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError: # Python 2.6
- from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
-try:
from shlex import quote as shlex_quote
except ImportError: # Python < 3.3
def shlex_quote(s):
@@ -362,6 +367,7 @@ def workaround_optparse_bug9161():
__all__ = [
'compat_HTTPError',
+ 'compat_basestring',
'compat_chr',
'compat_cookiejar',
'compat_expanduser',
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index aa58b52ab..e527ee425 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -11,6 +11,7 @@ from ..compat import (
compat_urllib_request,
)
from ..utils import (
+ encodeArgument,
encodeFilename,
)
@@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- args = [
- '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
- '-bsf:a', 'aac_adtstoasc',
- encodeFilename(tmpfilename, for_subprocess=True)]
-
ffpp = FFmpegPostProcessor(downloader=self)
program = ffpp._executable
if program is None:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
ffpp.check_version()
- cmd = [program] + args
- retval = subprocess.call(cmd)
+ args = [
+ encodeArgument(opt)
+ for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
+ args.append(encodeFilename(tmpfilename, True))
+
+ retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
+ self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 967bd865c..783b53e23 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
_TESTS = [{
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
- 'file': '22429276.mp4',
- 'md5': '469751912f1de0816a9fc9df8336476c',
- 'info_dict': {
- 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
- 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
- },
- 'skip': 'Blocked outside of Germany',
+ 'only_matching': True,
}, {
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
'info_dict': {
diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py
index 5e50c63d9..2b90bf4fc 100644
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dl/extractor/defense.py
@@ -1,40 +1,38 @@
from __future__ import unicode_literals
-import re
-import json
-
from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr'
- _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
- r'ligthboxvideo/base-de-medias/webtv/(.*)')
+ _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
_TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
- 'file': '11213.mp4',
'md5': '75bba6124da7e63d2d60b5244ec9430c',
- "info_dict": {
- "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
+ 'info_dict': {
+ 'id': '11213',
+ 'ext': 'mp4',
+ 'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
}
}
def _real_extract(self, url):
- title = re.match(self._VALID_URL, url).group(1)
+ title = self._match_id(url)
webpage = self._download_webpage(url, title)
+
video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id)
- info = self._download_webpage(json_url, title,
- 'Downloading JSON config')
- video_url = json.loads(info)['renditions'][0]['url']
-
- return {'id': video_id,
- 'ext': 'mp4',
- 'url': video_url,
- 'title': title,
- }
+ info = self._download_json(json_url, title, 'Downloading JSON config')
+ video_url = info['renditions'][0]['url']
+
+ return {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index bbc760a49..170d68075 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
class GenerationQuoiIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-quoi'
- _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
+ _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
- 'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
'info_dict': {
+ 'id': 'k7FJX8VBcvvLmX4wA5Q',
+ 'ext': 'mp4',
'title': 'Génération Quoi - Garde à Vous',
'uploader': 'Génération Quoi',
},
@@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
# It uses Dailymotion
'skip_download': True,
},
- 'skip': 'Only available from France',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
- info_json = self._download_webpage(info_url, name)
+ display_id = self._match_id(url)
+ info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
+ info_json = self._download_webpage(info_url, display_id)
info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
ie='Dailymotion')
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
index 6949a57c7..29638a194 100644
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
'like_count': int,
}
},
+ {
+ 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
+ 'md5': 'c1defca721ce25b2354e927d3e4b3dec',
+ 'info_dict': {
+ 'id': '3928201',
+ 'ext': 'mp4',
+ 'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
+ 'duration': 1472.906,
+ 'uploader': 'Canal Brasil',
+ 'uploader_id': 705,
+ 'like_count': int,
+ }
+ },
]
class MD5():
@@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
- formats.append({
- 'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'),
- 'format_id': resource_id,
- 'height': resource['height']
- })
+ resource_url = resource['url']
+ signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
+ if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
+ else:
+ formats.append({
+ 'url': signed_url,
+ 'format_id': resource_id,
+ 'height': resource.get('height'),
+ })
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
index dbfe4cc03..364dc878e 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
_TEST = {
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
- 'file': '48863.flv',
'md5': '29aca1e47ae68fc28804aca89f29507e',
'info_dict': {
+ 'id': '48863',
+ 'ext': 'flv',
'title': 'Ready To Go',
},
'skip': 'Only available from China',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py
index 97dcb518a..82eddec51 100644
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -7,10 +7,6 @@ from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urllib_request,
- compat_urllib_parse,
-)
-from ..aes import (
- aes_decrypt_text
)
@@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = {
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
- 'file': '1214711.mp4',
'md5': '6e297b7e789329923fcf83abb67c9289',
'info_dict': {
+ 'id': '1214711',
+ 'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18,
}
@@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
embedded_url = mobj.group(1)
return self.url_result(embedded_url)
- video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
- video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&amp;', webpage, 'video_url'))
- if 'encrypted=true' in webpage:
- password = self._html_search_regex(r'video_title=(.+?)&amp;', webpage, 'password')
- video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
+ video_title = self._html_search_regex(
+ r'<h1 [^>]*>([^<]+)', webpage, 'title')
+ video_url = self._html_search_regex(
+ r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py
index db2028e9f..b08f6e3c9 100644
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
parse_duration,
@@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
_TEST = {
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
- 'file': '50355319.mp4',
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
'info_dict': {
+ 'id': '50355319',
+ 'ext': 'mp4',
'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254,
@@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
doc = self._download_xml(xml_url, video_id)
diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py
index b818cf50c..3cd4a3a19 100644
--- a/youtube_dl/extractor/macgamestore.py
+++ b/youtube_dl/extractor/macgamestore.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import ExtractorError
@@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
_TEST = {
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
- 'file': '2450.m4v',
'md5': '8649b8ea684b6666b4c5be736ecddc61',
'info_dict': {
+ 'id': '2450',
+ 'ext': 'm4v',
'title': 'Crow',
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading trailer page')
- if re.search(r'>Missing Media<', webpage) is not None:
- raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
+ if '>Missing Media<' in webpage:
+ raise ExtractorError(
+ 'Trailer %s does not exist' % video_id, expected=True)
video_title = self._html_search_regex(
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py
index 88c9501cd..6db3c67a5 100644
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -1,21 +1,19 @@
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
from ..utils import int_or_none
class MporaIE(InfoExtractor):
- _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
+ _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
IE_NAME = 'MPORA'
_TEST = {
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
- 'file': 'AAdo8okx4wiz.mp4',
'md5': 'a7a228473eedd3be741397cf452932eb',
'info_dict': {
+ 'id': 'AAdo8okx4wiz',
+ 'ext': 'mp4',
'title': 'Katy Curd - Winter in the Forest',
'duration': 416,
'uploader': 'Peter Newman Media',
@@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
}
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+
data_json = self._search_regex(
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
-
- data = json.loads(data_json)
+ data = self._parse_json(data_json, video_id)
uploader = data['info_overlay'].get('username')
duration = data['video']['duration'] // 1000
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 7a3b62ebe..bc7f49ebb 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -79,12 +79,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
try:
_, _, ext = rendition.attrib['type'].partition('/')
rtmp_video_url = rendition.find('./src').text
- formats.append({'ext': ext,
- 'url': self._transform_rtmp_url(rtmp_video_url),
- 'format_id': rendition.get('bitrate'),
- 'width': int(rendition.get('width')),
- 'height': int(rendition.get('height')),
- })
+ if rtmp_video_url.endswith('siteunavail.png'):
+ continue
+ formats.append({
+ 'ext': ext,
+ 'url': self._transform_rtmp_url(rtmp_video_url),
+ 'format_id': rendition.get('bitrate'),
+ 'width': int(rendition.get('width')),
+ 'height': int(rendition.get('height')),
+ })
except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.')
self._sort_formats(formats)
@@ -240,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
_TESTS = [
{
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
- 'file': '853555.mp4',
'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
'info_dict': {
+ 'id': '853555',
+ 'ext': 'mp4',
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
},
},
- {
- 'add_ie': ['Vevo'],
- 'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
- 'file': 'USCJY1331283.mp4',
- 'md5': '73b4e7fcadd88929292fe52c3ced8caf',
- 'info_dict': {
- 'title': 'Everything Has Changed',
- 'upload_date': '20130606',
- 'uploader': 'Taylor Swift',
- },
- 'skip': 'VEVO is only available in some countries',
- },
]
def _get_thumbnail_url(self, uri, itemdoc):
@@ -272,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
webpage = self._download_webpage(url, video_id)
# Some videos come from Vevo.com
- m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
- webpage, re.DOTALL)
+ m_vevo = re.search(
+ r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
if m_vevo:
vevo_id = m_vevo.group(1)
self.to_screen('Vevo video detected: %s' % vevo_id)
diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dl/extractor/nerdcubed.py
index efc903afa..dff78e486 100644
--- a/youtube_dl/extractor/nerdcubed.py
+++ b/youtube_dl/extractor/nerdcubed.py
@@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
_TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json',
'info_dict': {
+ 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed',
},
'playlist_mincount': 1300,
diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py
index 59dc137cc..efa4afeb6 100644
--- a/youtube_dl/extractor/ringtv.py
+++ b/youtube_dl/extractor/ringtv.py
@@ -6,12 +6,13 @@ from .common import InfoExtractor
class RingTVIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
_TEST = {
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
- "file": "857645.mp4",
"md5": "d25945f5df41cdca2d2587165ac28720",
"info_dict": {
+ 'id': '857645',
+ 'ext': 'mp4',
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
}
diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py
index c1500b82f..e8bb20a08 100644
--- a/youtube_dl/extractor/rottentomatoes.py
+++ b/youtube_dl/extractor/rottentomatoes.py
@@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
_TEST = {
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
- 'file': '613340.mp4',
'info_dict': {
+ 'id': '613340',
+ 'ext': 'mp4',
'title': 'TOY STORY 3',
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
},
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py
index a73e6f331..ef766237b 100644
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
'vbr': int(quality),
}
elif transport == 'm3u8':
- fmt = {
- 'url': url,
- 'ext': 'mp4',
- }
+ formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
+ continue
else:
fmt = {
'url': url
diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py
index 16dc3736b..c013d678f 100644
--- a/youtube_dl/extractor/servingsys.py
+++ b/youtube_dl/extractor/servingsys.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
_TEST = {
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
+ 'info_dict': {
+ 'id': '5349193',
+ 'title': 'AdAPPter_Hyundai_demo',
+ },
'playlist': [{
- 'file': '29955898.flv',
'md5': 'baed851342df6846eb8677a60a011a0f',
'info_dict': {
+ 'id': '29955898',
+ 'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (1)',
'duration': 74,
'tbr': 1378,
@@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
'height': 400,
},
}, {
- 'file': '29907998.flv',
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
'info_dict': {
+ 'id': '29907998',
+ 'ext': 'flv',
'title': 'AdAPPter_Hyundai_demo (2)',
'duration': 34,
'width': 854,
@@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
'params': {
'playlistend': 2,
},
- 'skip': 'Blocked in the US [sic]',
+ '_skip': 'Blocked in the US [sic]',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- pl_id = mobj.group('id')
-
+ pl_id = self._match_id(url)
vast_doc = self._download_xml(url, pl_id)
+
title = vast_doc.find('.//AdTitle').text
media = vast_doc.find('.//MediaFile').text
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py
index a63d126d4..0891a441f 100644
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -11,7 +11,7 @@ from ..compat import (
class SinaIE(InfoExtractor):
- _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+ _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
(
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
|
@@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
_TESTS = [
{
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
- 'file': '110028898.flv',
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
'info_dict': {
+ 'id': '110028898',
+ 'ext': 'flv',
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
}
},
@@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
},
]
- @classmethod
- def suitable(cls, url):
- return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
-
def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
@@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+ mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if mobj.group('token') is not None:
# The video id is in the redirected url
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 5d60c4939..c5284fa67 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
+ 'id': '2284613',
'title': 'The Royal Concept EP',
},
'playlist_mincount': 6,
@@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
return {
'_type': 'playlist',
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
- 'id': info['id'],
+ 'id': '%s' % info['id'],
'title': info['title'],
}
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 5fa67eb8d..18a823719 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
_TESTS = [
{
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
- 'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': {
+ 'id': '80187',
+ 'ext': 'mp4',
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
}
}, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
- 'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
'info_dict': {
+ 'id': '19705',
+ 'ext': 'mp4',
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
"title": "Louis C.K. Interview Pt. 1 11/3/11"
}
@@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(
- r'data-node-id="(\d+?)"',
+ r'<div\s+class="player".*?data-id="(\d+?)"',
webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py
index e54145105..b9e2ef8ca 100644
--- a/youtube_dl/extractor/teletask.py
+++ b/youtube_dl/extractor/teletask.py
@@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
_TEST = {
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
'info_dict': {
+ 'id': '26168',
'title': 'Duplicate Detection',
},
'playlist': [{
@@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
def _real_extract(self, url):
lecture_id = self._match_id(url)
-
webpage = self._download_webpage(url, lecture_id)
title = self._html_search_regex(
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py
index 2837f9c8e..4797d1310 100644
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
_TEST = {
'url': 'http://www.tou.tv/30-vies/S04E41',
- 'file': '30-vies_S04E41.mp4',
'info_dict': {
+ 'id': '30-vies_S04E41',
+ 'ext': 'mp4',
'title': '30 vies Saison 4 / Épisode 41',
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
'age_limit': 8,
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 06b0bed41..1bb473514 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
- 'file': '75524534.mp4',
'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': {
+ 'id': '75524534',
+ 'ext': 'mp4',
'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick',
}
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 313b9c15d..c90488500 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
{
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
'playlist_mincount': 146,
+ 'info_dict': {
+ 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
+ }
}
]
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3b3678c6e..e4b26b84f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1160,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
'info_dict': {
+ 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
'title': 'YDL_Empty_List',
},
'playlist_count': 0,
@@ -1168,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': {
'title': '29C3: Not my department',
+ 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
},
'playlist_count': 95,
}, {
@@ -1175,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'PLBB231211A4F62143',
'info_dict': {
'title': '[OLD]Team Fortress 2 (Class-based LP)',
+ 'id': 'PLBB231211A4F62143',
},
'playlist_mincount': 26,
}, {
@@ -1182,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
'info_dict': {
'title': 'Uploads from Cauchemar',
+ 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
},
'playlist_mincount': 799,
}, {
'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
'info_dict': {
'title': 'YDL_safe_search',
+ 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
},
'playlist_count': 2,
}, {
@@ -1196,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 4,
'info_dict': {
'title': 'JODA15',
+ 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
}
}, {
'note': 'Embedded SWF player',
@@ -1203,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
'playlist_count': 4,
'info_dict': {
'title': 'JODA7',
+ 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
}
}, {
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'info_dict': {
- 'title': 'Uploads from Interstellar Movie',
+ 'title': 'Uploads from Interstellar Movie',
+ 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
},
'playlist_mincout': 21,
}]
@@ -1314,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
'note': 'paginated channel',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'playlist_mincount': 91,
+ 'info_dict': {
+ 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ }
}]
def extract_videos_from_page(self, page):
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index b4617fbad..453e2732c 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,59 +1,122 @@
from __future__ import unicode_literals
import json
+import operator
import re
from .utils import (
ExtractorError,
)
+_OPERATORS = [
+ ('|', operator.or_),
+ ('^', operator.xor),
+ ('&', operator.and_),
+ ('>>', operator.rshift),
+ ('<<', operator.lshift),
+ ('-', operator.sub),
+ ('+', operator.add),
+ ('%', operator.mod),
+ ('/', operator.truediv),
+ ('*', operator.mul),
+]
+_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
+_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
+
+_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+
class JSInterpreter(object):
- def __init__(self, code):
- self.code = code
+ def __init__(self, code, objects=None):
+ if objects is None:
+ objects = {}
+ self.code = self._remove_comments(code)
self._functions = {}
- self._objects = {}
+ self._objects = objects
+
+ def _remove_comments(self, code):
+ return re.sub(r'(?s)/\*.*?\*/', '', code)
- def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0:
raise ExtractorError('Recursion limit reached')
- if stmt.startswith('var '):
- stmt = stmt[len('var '):]
- ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
- r'=(?P<expr>.*)$', stmt)
- if ass_m:
- if ass_m.groupdict().get('index'):
- def assign(val):
- lvar = local_vars[ass_m.group('out')]
- idx = self.interpret_expression(
- ass_m.group('index'), local_vars, allow_recursion)
- assert isinstance(idx, int)
- lvar[idx] = val
- return val
- expr = ass_m.group('expr')
- else:
- def assign(val):
- local_vars[ass_m.group('out')] = val
- return val
- expr = ass_m.group('expr')
- elif stmt.startswith('return '):
- assign = lambda v: v
- expr = stmt[len('return '):]
+ should_abort = False
+ stmt = stmt.lstrip()
+ stmt_m = re.match(r'var\s', stmt)
+ if stmt_m:
+ expr = stmt[len(stmt_m.group(0)):]
else:
- # Try interpreting it as an expression
- expr = stmt
- assign = lambda v: v
+ return_m = re.match(r'return(?:\s+|$)', stmt)
+ if return_m:
+ expr = stmt[len(return_m.group(0)):]
+ should_abort = True
+ else:
+ # Try interpreting it as an expression
+ expr = stmt
v = self.interpret_expression(expr, local_vars, allow_recursion)
- return assign(v)
+ return v, should_abort
def interpret_expression(self, expr, local_vars, allow_recursion):
+ expr = expr.strip()
+
+ if expr == '': # Empty expression
+ return None
+
+ if expr.startswith('('):
+ parens_count = 0
+ for m in re.finditer(r'[()]', expr):
+ if m.group(0) == '(':
+ parens_count += 1
+ else:
+ parens_count -= 1
+ if parens_count == 0:
+ sub_expr = expr[1:m.start()]
+ sub_result = self.interpret_expression(
+ sub_expr, local_vars, allow_recursion)
+ remaining_expr = expr[m.end():].strip()
+ if not remaining_expr:
+ return sub_result
+ else:
+ expr = json.dumps(sub_result) + remaining_expr
+ break
+ else:
+ raise ExtractorError('Premature end of parens in %r' % expr)
+
+ for op, opfunc in _ASSIGN_OPERATORS:
+ m = re.match(r'''(?x)
+ (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
+ \s*%s
+ (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
+ if not m:
+ continue
+ right_val = self.interpret_expression(
+ m.group('expr'), local_vars, allow_recursion - 1)
+
+ if m.groupdict().get('index'):
+ lvar = local_vars[m.group('out')]
+ idx = self.interpret_expression(
+ m.group('index'), local_vars, allow_recursion)
+ assert isinstance(idx, int)
+ cur = lvar[idx]
+ val = opfunc(cur, right_val)
+ lvar[idx] = val
+ return val
+ else:
+ cur = local_vars.get(m.group('out'))
+ val = opfunc(cur, right_val)
+ local_vars[m.group('out')] = val
+ return val
+
if expr.isdigit():
return int(expr)
- if expr.isalpha():
- return local_vars[expr]
+ var_m = re.match(
+ r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
+ expr)
+ if var_m:
+ return local_vars[var_m.group('name')]
try:
return json.loads(expr)
@@ -61,7 +124,7 @@ class JSInterpreter(object):
pass
m = re.match(
- r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+ r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
expr)
if m:
variable = m.group('var')
@@ -114,23 +177,31 @@ class JSInterpreter(object):
return obj[member](argvals)
m = re.match(
- r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+ r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
if m:
val = local_vars[m.group('in')]
idx = self.interpret_expression(
m.group('idx'), local_vars, allow_recursion - 1)
return val[idx]
- m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
- if m:
- a = self.interpret_expression(
- m.group('a'), local_vars, allow_recursion)
- b = self.interpret_expression(
- m.group('b'), local_vars, allow_recursion)
- return a % b
+ for op, opfunc in _OPERATORS:
+ m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
+ if not m:
+ continue
+ x, abort = self.interpret_statement(
+ m.group('x'), local_vars, allow_recursion - 1)
+ if abort:
+ raise ExtractorError(
+ 'Premature left-side return of %s in %r' % (op, expr))
+ y, abort = self.interpret_statement(
+ m.group('y'), local_vars, allow_recursion - 1)
+ if abort:
+ raise ExtractorError(
+ 'Premature right-side return of %s in %r' % (op, expr))
+ return opfunc(x, y)
m = re.match(
- r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+ r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
if m:
fname = m.group('func')
argvals = tuple([
@@ -139,6 +210,7 @@ class JSInterpreter(object):
if fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals)
+
raise ExtractorError('Unsupported JS expression %r' % expr)
def extract_object(self, objname):
@@ -162,9 +234,11 @@ class JSInterpreter(object):
def extract_function(self, funcname):
func_m = re.search(
- (r'(?:function %s|[{;]%s\s*=\s*function)' % (
- re.escape(funcname), re.escape(funcname))) +
- r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+ r'''(?x)
+ (?:function\s+%s|[{;]%s\s*=\s*function)\s*
+ \((?P<args>[^)]*)\)\s*
+ \{(?P<code>[^}]+)\}''' % (
+ re.escape(funcname), re.escape(funcname)),
self.code)
if func_m is None:
raise ExtractorError('Could not find JS function %r' % funcname)
@@ -172,10 +246,16 @@ class JSInterpreter(object):
return self.build_function(argnames, func_m.group('code'))
+ def call_function(self, funcname, *args):
+ f = self.extract_function(funcname)
+ return f(args)
+
def build_function(self, argnames, code):
def resf(args):
local_vars = dict(zip(argnames, args))
for stmt in code.split(';'):
- res = self.interpret_statement(stmt, local_vars)
+ res, abort = self.interpret_statement(stmt, local_vars)
+ if abort:
+ break
return res
return resf
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a4c9813ec..4ade0554e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -32,6 +32,7 @@ import xml.etree.ElementTree
import zlib
from .compat import (
+ compat_basestring,
compat_chr,
compat_getenv,
compat_html_entities,
@@ -140,7 +141,7 @@ else:
def find_xpath_attr(node, xpath, key, val):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . !
- if isinstance(xpath, unicode):
+ if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii')
for f in node.findall(xpath):
@@ -1262,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
def parse_duration(s):
- if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
+ if not isinstance(s, compat_basestring):
return None
s = s.strip()
@@ -1426,7 +1427,7 @@ def uppercase_escape(s):
def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
- if sys.version_info < (3, 0) and isinstance(s, unicode):
+ if sys.version_info < (3, 0) and isinstance(s, compat_str):
s = s.encode('utf-8')
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 5e6288e8e..1e469d93b 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.01.30.2'
+__version__ = '2015.02.02'