aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--test/helper.py2
-rw-r--r--test/test_compat.py44
-rw-r--r--test/test_download.py6
-rw-r--r--test/test_swfinterp.py4
-rw-r--r--test/test_utils.py26
-rwxr-xr-xyoutube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/cache.py6
-rw-r--r--youtube_dl/compat.py317
-rw-r--r--youtube_dl/downloader/rtmp.py6
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/addanime.py5
-rw-r--r--youtube_dl/extractor/brightcove.py19
-rw-r--r--youtube_dl/extractor/channel9.py35
-rw-r--r--youtube_dl/extractor/cinemassacre.py81
-rw-r--r--youtube_dl/extractor/cloudy.py10
-rw-r--r--youtube_dl/extractor/cnn.py6
-rw-r--r--youtube_dl/extractor/common.py24
-rw-r--r--youtube_dl/extractor/crunchyroll.py1
-rw-r--r--youtube_dl/extractor/dropbox.py3
-rw-r--r--youtube_dl/extractor/facebook.py4
-rw-r--r--youtube_dl/extractor/gamespot.py15
-rw-r--r--youtube_dl/extractor/generic.py21
-rw-r--r--youtube_dl/extractor/globo.py8
-rw-r--r--youtube_dl/extractor/goshgay.py43
-rw-r--r--youtube_dl/extractor/heise.py16
-rw-r--r--youtube_dl/extractor/imdb.py12
-rw-r--r--youtube_dl/extractor/izlesene.py15
-rw-r--r--youtube_dl/extractor/laola1tv.py1
-rw-r--r--youtube_dl/extractor/myvideo.py5
-rw-r--r--youtube_dl/extractor/niconico.py4
-rw-r--r--youtube_dl/extractor/played.py7
-rw-r--r--youtube_dl/extractor/ro220.py2
-rw-r--r--youtube_dl/extractor/trutube.py2
-rw-r--r--youtube_dl/extractor/ustream.py3
-rw-r--r--youtube_dl/extractor/vice.py38
-rw-r--r--youtube_dl/extractor/vimeo.py4
-rw-r--r--youtube_dl/extractor/wimp.py2
-rw-r--r--youtube_dl/extractor/youtube.py10
-rw-r--r--youtube_dl/options.py4
-rw-r--r--youtube_dl/postprocessor/atomicparsley.py5
-rw-r--r--youtube_dl/postprocessor/execafterdownload.py6
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py25
-rw-r--r--youtube_dl/postprocessor/xattrpp.py4
-rw-r--r--youtube_dl/utils.py443
-rw-r--r--youtube_dl/version.py2
47 files changed, 700 insertions, 609 deletions
diff --git a/README.md b/README.md
index 230c12a88..933411fa2 100644
--- a/README.md
+++ b/README.md
@@ -381,7 +381,7 @@ Again, from then on you'll be able to update with `sudo youtube-dl -U`.
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
-If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. See above for a way to update.
+If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
diff --git a/test/helper.py b/test/helper.py
index fb8618120..325f72f0a 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -57,7 +57,7 @@ class FakeYDL(YoutubeDL):
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
params = get_params(override=override)
- super(FakeYDL, self).__init__(params)
+ super(FakeYDL, self).__init__(params, auto_init=False)
self.result = []
def to_screen(self, s, skip_eol=None):
diff --git a/test/test_compat.py b/test/test_compat.py
new file mode 100644
index 000000000..4a7fc3606
--- /dev/null
+++ b/test/test_compat.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from youtube_dl.utils import get_filesystem_encoding
+from youtube_dl.compat import (
+ compat_getenv,
+ compat_expanduser,
+)
+
+
+class TestCompat(unittest.TestCase):
+ def test_compat_getenv(self):
+ test_str = 'тест'
+ os.environ['YOUTUBE-DL-TEST'] = (
+ test_str if sys.version_info >= (3, 0)
+ else test_str.encode(get_filesystem_encoding()))
+ self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
+
+ def test_compat_expanduser(self):
+ test_str = 'C:\Documents and Settings\тест\Application Data'
+ os.environ['HOME'] = (
+ test_str if sys.version_info >= (3, 0)
+ else test_str.encode(get_filesystem_encoding()))
+ self.assertEqual(compat_expanduser('~'), test_str)
+
+ def test_all_present(self):
+ import youtube_dl.compat
+ all_names = youtube_dl.compat.__all__
+ present_names = set(filter(
+ lambda c: '_' in c and not c.startswith('_'),
+ dir(youtube_dl.compat))) - set(['unicode_literals'])
+ self.assertEqual(all_names, sorted(present_names))
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_download.py b/test/test_download.py
index 75e0bb289..87aced97c 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -23,10 +23,12 @@ import json
import socket
import youtube_dl.YoutubeDL
-from youtube_dl.utils import (
+from youtube_dl.compat import (
compat_http_client,
compat_urllib_error,
compat_HTTPError,
+)
+from youtube_dl.utils import (
DownloadError,
ExtractorError,
format_bytes,
@@ -94,7 +96,7 @@ def generator(test_case):
params.setdefault('extract_flat', True)
params.setdefault('skip_download', True)
- ydl = YoutubeDL(params)
+ ydl = YoutubeDL(params, auto_init=False)
ydl.add_default_info_extractors()
finished_hook_called = set()
def _hook(status):
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
index b42cd74c7..273e0f074 100644
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -37,7 +37,9 @@ def _make_testfunc(testfile):
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
- subprocess.check_call(['mxmlc', '-output', swf_file, as_file])
+ subprocess.check_call([
+ 'mxmlc', '-output', swf_file,
+ '-static-link-runtime-shared-libraries', as_file])
except OSError as ose:
if ose.errno == errno.ENOENT:
print('mxmlc not found! Skipping test.')
diff --git a/test/test_utils.py b/test/test_utils.py
index 0b31d1a39..e59547784 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -20,7 +20,6 @@ from youtube_dl.utils import (
encodeFilename,
find_xpath_attr,
fix_xml_ampersands,
- get_meta_content,
orderedSet,
OnDemandPagedList,
InAdvancePagedList,
@@ -46,8 +45,6 @@ from youtube_dl.utils import (
escape_url,
js_to_json,
get_filesystem_encoding,
- compat_getenv,
- compat_expanduser,
)
@@ -157,17 +154,6 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
- def test_meta_parser(self):
- testhtml = '''
- <head>
- <meta name="description" content="foo &amp; bar">
- <meta content='Plato' name='author'/>
- </head>
- '''
- get_meta = lambda name: get_meta_content(name, testhtml)
- self.assertEqual(get_meta('description'), 'foo & bar')
- self.assertEqual(get_meta('author'), 'Plato')
-
def test_xpath_with_ns(self):
testxml = '''<root xmlns:media="http://example.com/">
<media:song>
@@ -359,17 +345,5 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True})
- def test_compat_getenv(self):
- test_str = 'тест'
- os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0)
- else test_str.encode(get_filesystem_encoding()))
- self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
-
- def test_compat_expanduser(self):
- test_str = 'C:\Documents and Settings\тест\Application Data'
- os.environ['HOME'] = (test_str if sys.version_info >= (3, 0)
- else test_str.encode(get_filesystem_encoding()))
- self.assertEqual(compat_expanduser('~'), test_str)
-
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 73a372df4..8732f3db4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -22,13 +22,15 @@ import traceback
if os.name == 'nt':
import ctypes
-from .utils import (
+from .compat import (
compat_cookiejar,
compat_expanduser,
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_request,
+)
+from .utils import (
escape_url,
ContentTooShortError,
date_from_str,
@@ -62,6 +64,7 @@ from .utils import (
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
+from .downloader.rtmp import rtmpdump_version
from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
from .version import __version__
@@ -1321,6 +1324,7 @@ class YoutubeDL(object):
platform.python_version(), platform_name()))
exe_versions = FFmpegPostProcessor.get_versions()
+ exe_versions['rtmpdump'] = rtmpdump_version()
exe_str = ', '.join(
'%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items())
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 3c968082c..685dd8e5e 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -13,10 +13,12 @@ import sys
from .options import (
parseOpts,
)
-from .utils import (
+from .compat import (
compat_expanduser,
compat_getpass,
compat_print,
+)
+from .utils import (
DateRange,
DEFAULT_OUTTMPL,
decodeOption,
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
index ac5925d32..2d9b426cb 100644
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -8,10 +8,8 @@ import re
import shutil
import traceback
-from .utils import (
- compat_expanduser,
- write_json_file,
-)
+from .compat import compat_expanduser
+from .utils import write_json_file
class Cache(object):
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
new file mode 100644
index 000000000..b3752634a
--- /dev/null
+++ b/youtube_dl/compat.py
@@ -0,0 +1,317 @@
+from __future__ import unicode_literals
+
+import getpass
+import os
+import subprocess
+import sys
+
+
+try:
+ import urllib.request as compat_urllib_request
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_request
+
+try:
+ import urllib.error as compat_urllib_error
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_error
+
+try:
+ import urllib.parse as compat_urllib_parse
+except ImportError: # Python 2
+ import urllib as compat_urllib_parse
+
+try:
+ from urllib.parse import urlparse as compat_urllib_parse_urlparse
+except ImportError: # Python 2
+ from urlparse import urlparse as compat_urllib_parse_urlparse
+
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
+try:
+ import http.cookiejar as compat_cookiejar
+except ImportError: # Python 2
+ import cookielib as compat_cookiejar
+
+try:
+ import html.entities as compat_html_entities
+except ImportError: # Python 2
+ import htmlentitydefs as compat_html_entities
+
+try:
+ import html.parser as compat_html_parser
+except ImportError: # Python 2
+ import HTMLParser as compat_html_parser
+
+try:
+ import http.client as compat_http_client
+except ImportError: # Python 2
+ import httplib as compat_http_client
+
+try:
+ from urllib.error import HTTPError as compat_HTTPError
+except ImportError: # Python 2
+ from urllib2 import HTTPError as compat_HTTPError
+
+try:
+ from urllib.request import urlretrieve as compat_urlretrieve
+except ImportError: # Python 2
+ from urllib import urlretrieve as compat_urlretrieve
+
+
+try:
+ from subprocess import DEVNULL
+ compat_subprocess_get_DEVNULL = lambda: DEVNULL
+except ImportError:
+ compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
+try:
+ from urllib.parse import unquote as compat_urllib_parse_unquote
+except ImportError:
+ def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
+ if string == '':
+ return string
+ res = string.split('%')
+ if len(res) == 1:
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'replace'
+ # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
+ pct_sequence = b''
+ string = res[0]
+ for item in res[1:]:
+ try:
+ if not item:
+ raise ValueError
+ pct_sequence += item[:2].decode('hex')
+ rest = item[2:]
+ if not rest:
+ # This segment was just a single percent-encoded character.
+ # May be part of a sequence of code units, so delay decoding.
+ # (Stored in pct_sequence).
+ continue
+ except ValueError:
+ rest = '%' + item
+ # Encountered non-percent-encoded characters. Flush the current
+ # pct_sequence.
+ string += pct_sequence.decode(encoding, errors) + rest
+ pct_sequence = b''
+ if pct_sequence:
+ # Flush the final pct_sequence
+ string += pct_sequence.decode(encoding, errors)
+ return string
+
+
+try:
+ from urllib.parse import parse_qs as compat_parse_qs
+except ImportError: # Python 2
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, unicode
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError("bad query field: %r" % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = compat_urllib_parse_unquote(
+ name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = compat_urllib_parse_unquote(
+ value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+try:
+ compat_str = unicode # Python 2
+except NameError:
+ compat_str = str
+
+try:
+ compat_chr = unichr # Python 2
+except NameError:
+ compat_chr = chr
+
+try:
+ from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError: # Python 2.6
+ from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
+try:
+ from shlex import quote as shlex_quote
+except ImportError: # Python < 3.3
+ def shlex_quote(s):
+ return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+def compat_ord(c):
+ if type(c) is int: return c
+ else: return ord(c)
+
+
+if sys.version_info >= (3, 0):
+ compat_getenv = os.getenv
+ compat_expanduser = os.path.expanduser
+else:
+ # Environment variables should be decoded with filesystem encoding.
+ # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
+ def compat_getenv(key, default=None):
+ from .utils import get_filesystem_encoding
+ env = os.getenv(key, default)
+ if env:
+ env = env.decode(get_filesystem_encoding())
+ return env
+
+ # HACK: The default implementations of os.path.expanduser from cpython do not decode
+ # environment variables with filesystem encoding. We will work around this by
+ # providing adjusted implementations.
+ # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+ # for different platforms with correct environment variables decoding.
+
+ if os.name == 'posix':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructions. If user or $HOME is unknown,
+ do nothing."""
+ if not path.startswith('~'):
+ return path
+ i = path.find('/', 1)
+ if i < 0:
+ i = len(path)
+ if i == 1:
+ if 'HOME' not in os.environ:
+ import pwd
+ userhome = pwd.getpwuid(os.getuid()).pw_dir
+ else:
+ userhome = compat_getenv('HOME')
+ else:
+ import pwd
+ try:
+ pwent = pwd.getpwnam(path[1:i])
+ except KeyError:
+ return path
+ userhome = pwent.pw_dir
+ userhome = userhome.rstrip('/')
+ return (userhome + path[i:]) or '/'
+ elif os.name == 'nt' or os.name == 'ce':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructs.
+
+ If user or $HOME is unknown, do nothing."""
+ if path[:1] != '~':
+ return path
+ i, n = 1, len(path)
+ while i < n and path[i] not in '/\\':
+ i = i + 1
+
+ if 'HOME' in os.environ:
+ userhome = compat_getenv('HOME')
+ elif 'USERPROFILE' in os.environ:
+ userhome = compat_getenv('USERPROFILE')
+ elif not 'HOMEPATH' in os.environ:
+ return path
+ else:
+ try:
+ drive = compat_getenv('HOMEDRIVE')
+ except KeyError:
+ drive = ''
+ userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+ if i != 1: #~user
+ userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+ return userhome + path[i:]
+ else:
+ compat_expanduser = os.path.expanduser
+
+
+if sys.version_info < (3, 0):
+ def compat_print(s):
+ from .utils import preferredencoding
+ print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
+else:
+ def compat_print(s):
+ assert type(s) == type(u'')
+ print(s)
+
+
+try:
+ subprocess_check_output = subprocess.check_output
+except AttributeError:
+ def subprocess_check_output(*args, **kwargs):
+ assert 'input' not in kwargs
+ p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
+ output, _ = p.communicate()
+ ret = p.poll()
+ if ret:
+ raise subprocess.CalledProcessError(ret, p.args, output=output)
+ return output
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+ def compat_getpass(prompt, *args, **kwargs):
+ if isinstance(prompt, compat_str):
+ from .utils import preferredencoding
+ prompt = prompt.encode(preferredencoding())
+ return getpass.getpass(prompt, *args, **kwargs)
+else:
+ compat_getpass = getpass.getpass
+
+
+__all__ = [
+ 'compat_HTTPError',
+ 'compat_chr',
+ 'compat_cookiejar',
+ 'compat_expanduser',
+ 'compat_getenv',
+ 'compat_getpass',
+ 'compat_html_entities',
+ 'compat_html_parser',
+ 'compat_http_client',
+ 'compat_ord',
+ 'compat_parse_qs',
+ 'compat_print',
+ 'compat_str',
+ 'compat_subprocess_get_DEVNULL',
+ 'compat_urllib_error',
+ 'compat_urllib_parse',
+ 'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_urlparse',
+ 'compat_urllib_request',
+ 'compat_urlparse',
+ 'compat_urlretrieve',
+ 'compat_xml_parse_error',
+ 'shlex_quote',
+ 'subprocess_check_output',
+]
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index 5eb108302..17d9631fa 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -12,9 +12,15 @@ from ..utils import (
compat_str,
encodeFilename,
format_bytes,
+ get_exe_version,
)
+def rtmpdump_version():
+ return get_exe_version(
+ 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
+
+
class RtmpFD(FileDownloader):
def real_download(self, filename, info_dict):
def run_rtmpdump(args):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3f85c99cd..3c1807f15 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -421,6 +421,7 @@ from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
from .vh1 import VH1IE
+from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
index fcf296057..11f149f9e 100644
--- a/youtube_dl/extractor/addanime.py
+++ b/youtube_dl/extractor/addanime.py
@@ -3,12 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_HTTPError,
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
-
+)
+from ..utils import (
ExtractorError,
)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index ad22cbafd..a6920685e 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -14,6 +14,7 @@ from ..utils import (
compat_str,
compat_urllib_request,
compat_parse_qs,
+ compat_urllib_parse_urlparse,
determine_ext,
ExtractorError,
@@ -23,7 +24,7 @@ from ..utils import (
class BrightcoveIE(InfoExtractor):
- _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
+ _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
_TESTS = [
@@ -260,11 +261,19 @@ class BrightcoveIE(InfoExtractor):
formats = []
for rend in renditions:
url = rend['defaultURL']
+ if not url:
+ continue
if rend['remote']:
- # This type of renditions are served through akamaihd.net,
- # but they don't use f4m manifests
- url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
- ext = 'flv'
+ url_comp = compat_urllib_parse_urlparse(url)
+ if url_comp.path.endswith('.m3u8'):
+ formats.extend(
+ self._extract_m3u8_formats(url, info['id'], 'mp4'))
+ continue
+ elif 'akamaihd.net' in url_comp.netloc:
+ # This type of renditions are served through
+ # akamaihd.net, but they don't use f4m manifests
+ url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
+ ext = 'flv'
else:
ext = determine_ext(url)
size = rend.get('size')
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 4f000292b..16d800512 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -27,7 +27,7 @@ class Channel9IE(InfoExtractor):
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'duration': 4576,
- 'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
+ 'thumbnail': 'http://video.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
'session_code': 'KOS002',
'session_day': 'Day 1',
'session_room': 'Arena 1A',
@@ -43,7 +43,7 @@ class Channel9IE(InfoExtractor):
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,
- 'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
+ 'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
'authors': [ 'Mike Wilmot' ],
},
}
@@ -94,7 +94,7 @@ class Channel9IE(InfoExtractor):
def _extract_title(self, html):
title = self._html_search_meta('title', html, 'title')
- if title is None:
+ if title is None:
title = self._og_search_title(html)
TITLE_SUFFIX = ' (Channel 9)'
if title is not None and title.endswith(TITLE_SUFFIX):
@@ -115,7 +115,7 @@ class Channel9IE(InfoExtractor):
return self._html_search_meta('description', html, 'description')
def _extract_duration(self, html):
- m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
+ m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
def _extract_slides(self, html):
@@ -167,7 +167,7 @@ class Channel9IE(InfoExtractor):
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
def _extract_content(self, html, content_path):
- # Look for downloadable content
+ # Look for downloadable content
formats = self._formats_from_html(html)
slides = self._extract_slides(html)
zip_ = self._extract_zip(html)
@@ -258,16 +258,17 @@ class Channel9IE(InfoExtractor):
webpage = self._download_webpage(url, content_path, 'Downloading web page')
- page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
- if page_type_m is None:
- raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)
-
- page_type = page_type_m.group('pagetype')
- if page_type == 'List': # List page, may contain list of 'item'-like objects
+ page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage)
+ if page_type_m is not None:
+ page_type = page_type_m.group('pagetype')
+ if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
+ return self._extract_entry_item(webpage, content_path)
+ elif page_type == 'Session': # Event session page, may contain downloadable content
+ return self._extract_session(webpage, content_path)
+ elif page_type == 'Event':
+ return self._extract_list(content_path)
+ else:
+ raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
+
+ else: # Assuming list
return self._extract_list(content_path)
- elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
- return self._extract_entry_item(webpage, content_path)
- elif page_type == 'Session': # Event session page, may contain downloadable content
- return self._extract_session(webpage, content_path)
- else:
- raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True) \ No newline at end of file
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index d064a28f9..31fe906b4 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -42,11 +42,12 @@ class CinemassacreIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
- mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
+ mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_id = mobj.group('video_id')
+ full_video_id = mobj.group('full_video_id')
video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title')
@@ -59,41 +60,53 @@ class CinemassacreIE(InfoExtractor):
vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
- vidid = self._search_regex(
- r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid')
- videoserver = self._html_search_regex(
- r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver')
- videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
- videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
+ videolist_url = None
- formats = []
- baseurl = vidurl[:vidurl.rfind('/')+1]
- for video in videolist.findall('.//video'):
- src = video.get('src')
- if not src:
- continue
- file_ = src.partition(':')[-1]
- width = int_or_none(video.get('width'))
- height = int_or_none(video.get('height'))
- bitrate = int_or_none(video.get('system-bitrate'))
- format = {
- 'url': baseurl + file_,
- 'format_id': src.rpartition('.')[0].rpartition('_')[-1],
- }
- if width or height:
- format.update({
- 'tbr': bitrate // 1000 if bitrate else None,
- 'width': width,
- 'height': height,
- })
- else:
- format.update({
- 'abr': bitrate // 1000 if bitrate else None,
- 'vcodec': 'none',
- })
- formats.append(format)
- self._sort_formats(formats)
+ mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata)
+ if mobj:
+ videoserver = mobj.group('videoserver')
+ mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
+ vidid = mobj.group('vidid') if mobj else full_video_id
+ videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
+ else:
+ mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
+ if mobj:
+ videolist_url = mobj.group('smil')
+
+ if videolist_url:
+ videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
+ formats = []
+ baseurl = vidurl[:vidurl.rfind('/')+1]
+ for video in videolist.findall('.//video'):
+ src = video.get('src')
+ if not src:
+ continue
+ file_ = src.partition(':')[-1]
+ width = int_or_none(video.get('width'))
+ height = int_or_none(video.get('height'))
+ bitrate = int_or_none(video.get('system-bitrate'))
+ format = {
+ 'url': baseurl + file_,
+ 'format_id': src.rpartition('.')[0].rpartition('_')[-1],
+ }
+ if width or height:
+ format.update({
+ 'tbr': bitrate // 1000 if bitrate else None,
+ 'width': width,
+ 'height': height,
+ })
+ else:
+ format.update({
+ 'abr': bitrate // 1000 if bitrate else None,
+ 'vcodec': 'none',
+ })
+ formats.append(format)
+ self._sort_formats(formats)
+ else:
+ formats = [{
+ 'url': vidurl,
+ }]
return {
'id': video_id,
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py
index 386f080d2..abf8cc280 100644
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dl/extractor/cloudy.py
@@ -4,14 +4,16 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
+from ..compat import (
compat_parse_qs,
compat_urllib_parse,
- remove_end,
- HEADRequest,
compat_HTTPError,
)
+from ..utils import (
+ ExtractorError,
+ HEADRequest,
+ remove_end,
+)
class CloudyIE(InfoExtractor):
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 78877b1cf..3826ce7e1 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -16,9 +16,10 @@ class CNNIE(InfoExtractor):
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
- 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': {
+ 'id': 'sports_2013_06_09_nadal-1-on-1.cnn',
+ 'ext': 'mp4',
'title': 'Nadal wins 8th French Open title',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
'duration': 135,
@@ -27,9 +28,10 @@ class CNNIE(InfoExtractor):
},
{
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
- "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
"info_dict": {
+ 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology',
+ 'ext': 'mp4',
"title": "Student's epic speech stuns new freshmen",
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
"upload_date": "20130821",
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7e4113213..b77f0e519 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -12,13 +12,14 @@ import sys
import time
import xml.etree.ElementTree
-from ..utils import (
+from ..compat import (
compat_http_client,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_str,
-
+)
+from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
@@ -403,7 +404,7 @@ class InfoExtractor(object):
video_info['title'] = playlist_title
return video_info
- def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
+ def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
@@ -424,8 +425,11 @@ class InfoExtractor(object):
_name = name
if mobj:
- # return the first matching group
- return next(g for g in mobj.groups() if g is not None)
+ if group is None:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ else:
+ return mobj.group(group)
elif default is not _NO_DEFAULT:
return default
elif fatal:
@@ -435,11 +439,11 @@ class InfoExtractor(object):
'please report this issue on http://yt-dl.org/bug' % _name)
return None
- def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
+ def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""
- res = self._search_regex(pattern, string, name, default, fatal, flags)
+ res = self._search_regex(pattern, string, name, default, fatal, flags, group)
if res:
return clean_html(res).strip()
else:
@@ -533,9 +537,9 @@ class InfoExtractor(object):
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
- (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
- [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
- html, display_name, fatal=fatal, **kwargs)
+ (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
+ [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+ html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index cc612d08e..0bd0eccba 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -17,7 +17,6 @@ from ..utils import (
bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
- clean_html,
urlencode_postdata,
)
from ..aes import (
diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py
index 5f24ac721..aefca848a 100644
--- a/youtube_dl/extractor/dropbox.py
+++ b/youtube_dl/extractor/dropbox.py
@@ -5,7 +5,8 @@ import os.path
import re
from .common import InfoExtractor
-from ..utils import compat_urllib_parse_unquote, url_basename
+from ..compat import compat_urllib_parse_unquote
+from ..utils import url_basename
class DropboxIE(InfoExtractor):
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 3ad993751..104803563 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -5,12 +5,14 @@ import re
import socket
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
+)
+from ..utils import (
urlencode_postdata,
ExtractorError,
limit_length,
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 3d67b9d60..d570e3f6a 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -8,12 +8,11 @@ from ..utils import (
compat_urllib_parse,
compat_urlparse,
unescapeHTML,
- get_meta_content,
)
class GameSpotIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
+ _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
_TEST = {
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -26,10 +25,10 @@ class GameSpotIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_id = mobj.group('page_id')
+ page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
- data_video_json = self._search_regex(r'data-video=["\'](.*?)["\']', webpage, 'data video')
+ data_video_json = self._search_regex(
+ r'data-video=["\'](.*?)["\']', webpage, 'data video')
data_video = json.loads(unescapeHTML(data_video_json))
# Transform the manifest url to a link to the mp4 files
@@ -41,7 +40,8 @@ class GameSpotIE(InfoExtractor):
http_path = f4m_path[1:].split('/', 1)[1]
http_template = re.sub(QUALITIES_RE, r'%s', http_path)
http_template = http_template.replace('.csmil/manifest.f4m', '')
- http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template)
+ http_template = compat_urlparse.urljoin(
+ 'http://video.gamespotcdn.com/', http_template)
formats = []
for q in qualities:
formats.append({
@@ -52,8 +52,9 @@ class GameSpotIE(InfoExtractor):
return {
'id': data_video['guid'],
+ 'display_id': page_id,
'title': compat_urllib_parse.unquote(data_video['title']),
'formats': formats,
- 'description': get_meta_content('description', webpage),
+ 'description': self._html_search_meta('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 8abc340b4..01d6a57f8 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -7,11 +7,12 @@ import re
from .common import InfoExtractor
from .youtube import YoutubeIE
-from ..utils import (
+from ..compat import (
compat_urllib_parse,
compat_urlparse,
compat_xml_parse_error,
-
+)
+from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
@@ -99,6 +100,22 @@ class GenericIE(InfoExtractor):
'uploader': 'Championat',
},
},
+ {
+ # https://github.com/rg3/youtube-dl/issues/3541
+ 'add_ie': ['Brightcove'],
+ 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
+ 'info_dict': {
+ 'id': '3866516442001',
+ 'ext': 'mp4',
+ 'title': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'description': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'uploader': 'SBS Broadcasting',
+ },
+ 'skip': 'Restricted to Netherlands',
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
# Direct link to a video
{
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
index 77c3ad4fc..66ca37918 100644
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -5,13 +5,15 @@ import random
import math
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- float_or_none,
+from ..compat import (
compat_str,
compat_chr,
compat_ord,
)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+)
class GloboIE(InfoExtractor):
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
index 7bca21ad0..18474cbb7 100644
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -1,15 +1,11 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
- str_to_int,
ExtractorError,
)
-import json
class GoshgayIE(InfoExtractor):
@@ -27,36 +23,27 @@ class GoshgayIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
+ title = self._og_search_title(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', webpage, default='false')
+ config_url = self._search_regex(
+ r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
- player_config = self._search_regex(
- r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
- player_vars = json.loads(player_config.replace("'", '"'))
- width = str_to_int(player_vars.get('width'))
- height = str_to_int(player_vars.get('height'))
- config_uri = player_vars.get('config')
+ config = self._download_xml(
+ config_url, video_id, 'Downloading player config XML')
- if config_uri is None:
- raise ExtractorError('Missing config URI')
- node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
- errnote='Unable to download XML')
- if node is None:
+ if config is None:
raise ExtractorError('Missing config XML')
- if node.tag != 'config':
+ if config.tag != 'config':
raise ExtractorError('Missing config attribute')
- fns = node.findall('file')
- imgs = node.findall('image')
- if len(fns) != 1:
+ fns = config.findall('file')
+ if len(fns) < 1:
raise ExtractorError('Missing media URI')
video_url = fns[0].text
- if len(imgs) < 1:
- thumbnail = None
- else:
- thumbnail = imgs[0].text
url_comp = compat_urlparse.urlparse(url)
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
@@ -65,9 +52,7 @@ class GoshgayIE(InfoExtractor):
'id': video_id,
'url': video_url,
'title': title,
- 'width': width,
- 'height': height,
'thumbnail': thumbnail,
'http_referer': ref,
- 'age_limit': 18,
+ 'age_limit': 0 if family_friendly == 'true' else 18,
}
diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py
index d41c0413f..278d9f527 100644
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- get_meta_content,
+ determine_ext,
int_or_none,
parse_iso8601,
)
@@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor):
'title': (
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
),
- 'format_id': 'mp4_720',
+ 'format_id': 'mp4_720p',
'timestamp': 1411812600,
'upload_date': '20140927',
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
- 'thumbnail': 're:https?://.*\.jpg$',
+ 'thumbnail': 're:^https?://.*\.jpe?g$',
}
}
@@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor):
info = {
'id': video_id,
'thumbnail': self._og_search_thumbnail(webpage),
- 'timestamp': parse_iso8601(get_meta_content('date', webpage)),
+ 'timestamp': parse_iso8601(
+ self._html_search_meta('date', webpage)),
'description': self._og_search_description(webpage),
}
- title = get_meta_content('fulltitle', webpage)
+ title = self._html_search_meta('fulltitle', webpage)
if title:
info['title'] = title
else:
@@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor):
label = source_node.attrib['label']
height = int_or_none(self._search_regex(
r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
+ video_url = source_node.attrib['file']
+ ext = determine_ext(video_url, '')
formats.append({
- 'url': source_node.attrib['file'],
+ 'url': video_url,
'format_note': label,
+ 'format_id': '%s_%s' % (ext, label),
'height': height,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 4536db3bf..6108ed552 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -6,7 +6,6 @@ import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
- get_element_by_attribute,
)
@@ -27,10 +26,11 @@ class ImdbIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
- descr = get_element_by_attribute('itemprop', 'description', webpage)
+ descr = self._html_search_regex(
+ r'(?s)<span itemprop="description">(.*?)</span>',
+ webpage, 'description', fatal=False)
available_formats = re.findall(
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
flags=re.MULTILINE)
@@ -73,9 +73,7 @@ class ImdbListIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- list_id = mobj.group('id')
-
+ list_id = self._match_id(url)
webpage = self._download_webpage(url, list_id)
entries = [
self.url_result('http://www.imdb.com' + m, 'Imdb')
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py
index 07ef682ee..d16d483ee 100644
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -5,11 +5,11 @@ import re
from .common import InfoExtractor
from ..utils import (
- get_element_by_id,
- parse_iso8601,
determine_ext,
- int_or_none,
float_or_none,
+ get_element_by_id,
+ int_or_none,
+ parse_iso8601,
str_to_int,
)
@@ -30,7 +30,7 @@ class IzleseneIE(InfoExtractor):
'description': 'md5:253753e2655dde93f59f74b572454f6d',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'pelikzzle',
- 'timestamp': 1404298698,
+ 'timestamp': 1404302298,
'upload_date': '20140702',
'duration': 95.395,
'age_limit': 0,
@@ -46,7 +46,7 @@ class IzleseneIE(InfoExtractor):
'description': 'Tarkan Dortmund 2006 Konseri',
'thumbnail': 're:^http://.*\.jpg',
'uploader_id': 'parlayankiz',
- 'timestamp': 1163318593,
+ 'timestamp': 1163322193,
'upload_date': '20061112',
'duration': 253.666,
'age_limit': 0,
@@ -55,10 +55,9 @@ class IzleseneIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- url = 'http://www.izlesene.com/video/%s' % video_id
+ video_id = self._match_id(url)
+ url = 'http://www.izlesene.com/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py
index 263f68773..102e29f7a 100644
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@@ -4,6 +4,7 @@ import random
import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class Laola1TvIE(InfoExtractor):
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
index ccb5959c4..a89153985 100644
--- a/youtube_dl/extractor/myvideo.py
+++ b/youtube_dl/extractor/myvideo.py
@@ -7,11 +7,12 @@ import re
import json
from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
compat_ord,
compat_urllib_parse,
compat_urllib_request,
-
+)
+from ..utils import (
ExtractorError,
)
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 62d5707fe..45cbd4ee9 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -12,6 +12,7 @@ from ..utils import (
unified_strdate,
parse_duration,
int_or_none,
+ ExtractorError,
)
@@ -108,6 +109,9 @@ class NiconicoIE(InfoExtractor):
flv_info_request, video_id,
note='Downloading flv info', errnote='Unable to download flv info')
+ if 'deleted=' in flv_info_webpage:
+ raise ExtractorError('The video has been deleted.',
+ expected=True)
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py
index 645a1e06d..17880471d 100644
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -6,6 +6,7 @@ import os.path
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
compat_urllib_parse,
compat_urllib_request,
)
@@ -29,6 +30,12 @@ class PlayedIE(InfoExtractor):
video_id = self._match_id(url)
orig_webpage = self._download_webpage(url, video_id)
+
+ m_error = re.search(
+ r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage)
+ if m_error:
+ raise ExtractorError(m_error.group('msg'), expected=True)
+
fields = re.findall(
r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
data = dict(fields)
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py
index 0a3a71448..962b524e9 100644
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -1,7 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import compat_urllib_parse_unquote
+from ..compat import compat_urllib_parse_unquote
class Ro220IE(InfoExtractor):
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
index a73f3c43a..e7b79243a 100644
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@@ -29,7 +29,7 @@ class TruTubeIE(InfoExtractor):
# filehd is always 404
video_url = xpath_text(config, './file', 'video URL', fatal=True)
- title = xpath_text(config, './title', 'title')
+ title = xpath_text(config, './title', 'title').strip()
thumbnail = xpath_text(config, './image', ' thumbnail')
return {
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index cee1ea8f6..875450908 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
- get_meta_content,
)
@@ -79,7 +78,7 @@ class UstreamChannelIE(InfoExtractor):
m = re.match(self._VALID_URL, url)
display_id = m.group('slug')
webpage = self._download_webpage(url, display_id)
- channel_id = get_meta_content('ustream:channel_id', webpage)
+ channel_id = self._html_search_meta('ustream:channel_id', webpage)
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
new file mode 100644
index 000000000..f11ca8217
--- /dev/null
+++ b/youtube_dl/extractor/vice.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import ExtractorError
+
+
+class ViceIE(InfoExtractor):
+ _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
+
+ _TEST = {
+ 'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+ 'info_dict': {
+ 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
+ 'ext': 'mp4',
+ 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ },
+ 'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ try:
+ embed_code = self._search_regex(
+ r'embedCode=([^&\'"]+)', webpage,
+ 'ooyala embed code')
+ ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+ print(ooyala_url)
+ except ExtractorError:
+ raise ExtractorError('The page doesn\'t contain a video', expected=True)
+ return self.url_result(ooyala_url, ie='Ooyala')
+
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index d9cad0ea5..c744d4f04 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -7,11 +7,13 @@ import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
+from ..compat import (
compat_HTTPError,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
+)
+from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index 3377a543e..d6dec25ca 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -37,7 +37,7 @@ class WimpIE(InfoExtractor):
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r"'file'\s*:\s*'([^']+)'", webpage, 'video URL')
+ r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL')
if YoutubeIE.suitable(video_url):
self.to_screen('Found YouTube video')
return {
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index aad8ffbf4..c77d4056f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -684,7 +684,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Get video info
self.report_video_info_webpage_download(video_id)
if re.search(r'player-age-gate-content">', video_webpage) is not None:
- self.report_age_confirmation()
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
@@ -692,12 +691,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'sts': self._search_regex(
- r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
+ r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(video_info_url, video_id,
- note=False,
- errnote='unable to download video info webpage')
+ video_info_webpage = self._download_webpage(
+ video_info_url, video_id,
+ note='Refetching age-gated info webpage',
+ errnote='unable to download video info webpage')
video_info = compat_parse_qs(video_info_webpage)
else:
age_gate = False
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 98e20d549..997e92ad7 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -5,9 +5,11 @@ import optparse
import shlex
import sys
-from .utils import (
+from .compat import (
compat_expanduser,
compat_getenv,
+)
+from .utils import (
get_term_width,
write_string,
)
diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py
index 765b2d9ee..448ccc5f3 100644
--- a/youtube_dl/postprocessor/atomicparsley.py
+++ b/youtube_dl/postprocessor/atomicparsley.py
@@ -6,10 +6,11 @@ import os
import subprocess
from .common import PostProcessor
-
+from ..compat import (
+ compat_urlretrieve,
+)
from ..utils import (
check_executable,
- compat_urlretrieve,
encodeFilename,
PostProcessingError,
prepend_extension,
diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py
index 08419a3d4..baf1b1945 100644
--- a/youtube_dl/postprocessor/execafterdownload.py
+++ b/youtube_dl/postprocessor/execafterdownload.py
@@ -3,10 +3,8 @@ from __future__ import unicode_literals
import subprocess
from .common import PostProcessor
-from ..utils import (
- shlex_quote,
- PostProcessingError,
-)
+from ..compat import shlex_quote
+from ..utils import PostProcessingError
class ExecAfterDownloadPP(PostProcessor):
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 083c79592..f3f2743c0 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -1,5 +1,4 @@
import os
-import re
import subprocess
import sys
import time
@@ -7,10 +6,13 @@ import time
from .common import AudioConversionError, PostProcessor
-from ..utils import (
+from ..compat import (
compat_subprocess_get_DEVNULL,
+)
+from ..utils import (
encodeArgument,
encodeFilename,
+ get_exe_version,
is_outdated_version,
PostProcessingError,
prepend_extension,
@@ -19,23 +21,6 @@ from ..utils import (
)
-def get_version(executable):
- """ Returns the version of the specified executable,
- or False if the executable is not present """
- try:
- out, err = subprocess.Popen(
- [executable, '-version'],
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
- except OSError:
- return False
- firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
- m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline)
- if not m:
- return u'present'
- else:
- return m.group(1)
-
-
class FFmpegPostProcessorError(PostProcessingError):
pass
@@ -61,7 +46,7 @@ class FFmpegPostProcessor(PostProcessor):
@staticmethod
def get_versions():
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
- return dict((program, get_version(program)) for program in programs)
+ return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
@property
def _executable(self):
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py
index f6940940b..b5cae41c8 100644
--- a/youtube_dl/postprocessor/xattrpp.py
+++ b/youtube_dl/postprocessor/xattrpp.py
@@ -3,10 +3,12 @@ import subprocess
import sys
from .common import PostProcessor
+from ..compat import (
+ subprocess_check_output
+)
from ..utils import (
check_executable,
hyphenate_date,
- subprocess_check_output
)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 2864e5142..7c0fb1592 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
import calendar
import codecs
import contextlib
@@ -8,7 +10,6 @@ import ctypes
import datetime
import email.utils
import errno
-import getpass
import gzip
import itertools
import io
@@ -29,254 +30,19 @@ import traceback
import xml.etree.ElementTree
import zlib
-try:
- import urllib.request as compat_urllib_request
-except ImportError: # Python 2
- import urllib2 as compat_urllib_request
-
-try:
- import urllib.error as compat_urllib_error
-except ImportError: # Python 2
- import urllib2 as compat_urllib_error
-
-try:
- import urllib.parse as compat_urllib_parse
-except ImportError: # Python 2
- import urllib as compat_urllib_parse
-
-try:
- from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
- from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
- import urllib.parse as compat_urlparse
-except ImportError: # Python 2
- import urlparse as compat_urlparse
-
-try:
- import http.cookiejar as compat_cookiejar
-except ImportError: # Python 2
- import cookielib as compat_cookiejar
-
-try:
- import html.entities as compat_html_entities
-except ImportError: # Python 2
- import htmlentitydefs as compat_html_entities
-
-try:
- import html.parser as compat_html_parser
-except ImportError: # Python 2
- import HTMLParser as compat_html_parser
-
-try:
- import http.client as compat_http_client
-except ImportError: # Python 2
- import httplib as compat_http_client
-
-try:
- from urllib.error import HTTPError as compat_HTTPError
-except ImportError: # Python 2
- from urllib2 import HTTPError as compat_HTTPError
-
-try:
- from urllib.request import urlretrieve as compat_urlretrieve
-except ImportError: # Python 2
- from urllib import urlretrieve as compat_urlretrieve
-
-
-try:
- from subprocess import DEVNULL
- compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
- compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
-
-try:
- from urllib.parse import unquote as compat_urllib_parse_unquote
-except ImportError:
- def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
- if string == '':
- return string
- res = string.split('%')
- if len(res) == 1:
- return string
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'replace'
- # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
- pct_sequence = b''
- string = res[0]
- for item in res[1:]:
- try:
- if not item:
- raise ValueError
- pct_sequence += item[:2].decode('hex')
- rest = item[2:]
- if not rest:
- # This segment was just a single percent-encoded character.
- # May be part of a sequence of code units, so delay decoding.
- # (Stored in pct_sequence).
- continue
- except ValueError:
- rest = '%' + item
- # Encountered non-percent-encoded characters. Flush the current
- # pct_sequence.
- string += pct_sequence.decode(encoding, errors) + rest
- pct_sequence = b''
- if pct_sequence:
- # Flush the final pct_sequence
- string += pct_sequence.decode(encoding, errors)
- return string
-
-
-try:
- from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
- # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
- # Python 2's version is apparently totally broken
-
- def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, unicode
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError("bad query field: %r" % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = compat_urllib_parse_unquote(
- name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = compat_urllib_parse_unquote(
- value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
-
- def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- parsed_result = {}
- pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
-
-try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
-
-try:
- from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError: # Python 2.6
- from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
-try:
- from shlex import quote as shlex_quote
-except ImportError: # Python < 3.3
- def shlex_quote(s):
- return "'" + s.replace("'", "'\"'\"'") + "'"
-
-
-def compat_ord(c):
- if type(c) is int: return c
- else: return ord(c)
-
-
-if sys.version_info >= (3, 0):
- compat_getenv = os.getenv
- compat_expanduser = os.path.expanduser
-else:
- # Environment variables should be decoded with filesystem encoding.
- # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
-
- def compat_getenv(key, default=None):
- env = os.getenv(key, default)
- if env:
- env = env.decode(get_filesystem_encoding())
- return env
-
- # HACK: The default implementations of os.path.expanduser from cpython do not decode
- # environment variables with filesystem encoding. We will work around this by
- # providing adjusted implementations.
- # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
- # for different platforms with correct environment variables decoding.
-
- if os.name == 'posix':
- def compat_expanduser(path):
- """Expand ~ and ~user constructions. If user or $HOME is unknown,
- do nothing."""
- if not path.startswith('~'):
- return path
- i = path.find('/', 1)
- if i < 0:
- i = len(path)
- if i == 1:
- if 'HOME' not in os.environ:
- import pwd
- userhome = pwd.getpwuid(os.getuid()).pw_dir
- else:
- userhome = compat_getenv('HOME')
- else:
- import pwd
- try:
- pwent = pwd.getpwnam(path[1:i])
- except KeyError:
- return path
- userhome = pwent.pw_dir
- userhome = userhome.rstrip('/')
- return (userhome + path[i:]) or '/'
- elif os.name == 'nt' or os.name == 'ce':
- def compat_expanduser(path):
- """Expand ~ and ~user constructs.
-
- If user or $HOME is unknown, do nothing."""
- if path[:1] != '~':
- return path
- i, n = 1, len(path)
- while i < n and path[i] not in '/\\':
- i = i + 1
-
- if 'HOME' in os.environ:
- userhome = compat_getenv('HOME')
- elif 'USERPROFILE' in os.environ:
- userhome = compat_getenv('USERPROFILE')
- elif not 'HOMEPATH' in os.environ:
- return path
- else:
- try:
- drive = compat_getenv('HOMEDRIVE')
- except KeyError:
- drive = ''
- userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
-
- if i != 1: #~user
- userhome = os.path.join(os.path.dirname(userhome), path[1:i])
-
- return userhome + path[i:]
- else:
- compat_expanduser = os.path.expanduser
+from .compat import (
+ compat_chr,
+ compat_getenv,
+ compat_html_entities,
+ compat_html_parser,
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_parse,
+ compat_urllib_parse_urlparse,
+ compat_urllib_request,
+ compat_urlparse,
+)
# This is not clearly defined otherwise
@@ -304,14 +70,6 @@ def preferredencoding():
return pref
-if sys.version_info < (3,0):
- def compat_print(s):
- print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
-else:
- def compat_print(s):
- assert type(s) == type(u'')
- print(s)
-
def write_json_file(obj, fn):
""" Encode obj as JSON and write it to fn, atomically """
@@ -394,127 +152,32 @@ def xpath_text(node, xpath, name=None, fatal=False):
return n.text
-compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
-class BaseHTMLParser(compat_html_parser.HTMLParser):
- def __init(self):
- compat_html_parser.HTMLParser.__init__(self)
- self.html = None
-
- def loads(self, html):
- self.html = html
- self.feed(html)
- self.close()
-
-class AttrParser(BaseHTMLParser):
- """Modified HTMLParser that isolates a tag with the specified attribute"""
- def __init__(self, attribute, value):
- self.attribute = attribute
- self.value = value
- self.result = None
- self.started = False
- self.depth = {}
- self.watch_startpos = False
- self.error_count = 0
- BaseHTMLParser.__init__(self)
-
- def error(self, message):
- if self.error_count > 10 or self.started:
- raise compat_html_parser.HTMLParseError(message, self.getpos())
- self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
- self.error_count += 1
- self.goahead(1)
-
- def handle_starttag(self, tag, attrs):
- attrs = dict(attrs)
- if self.started:
- self.find_startpos(None)
- if self.attribute in attrs and attrs[self.attribute] == self.value:
- self.result = [tag]
- self.started = True
- self.watch_startpos = True
- if self.started:
- if not tag in self.depth: self.depth[tag] = 0
- self.depth[tag] += 1
-
- def handle_endtag(self, tag):
- if self.started:
- if tag in self.depth: self.depth[tag] -= 1
- if self.depth[self.result[0]] == 0:
- self.started = False
- self.result.append(self.getpos())
-
- def find_startpos(self, x):
- """Needed to put the start position of the result (self.result[1])
- after the opening tag with the requested id"""
- if self.watch_startpos:
- self.watch_startpos = False
- self.result.append(self.getpos())
- handle_entityref = handle_charref = handle_data = handle_comment = \
- handle_decl = handle_pi = unknown_decl = find_startpos
-
- def get_result(self):
- if self.result is None:
- return None
- if len(self.result) != 3:
- return None
- lines = self.html.split('\n')
- lines = lines[self.result[1][0]-1:self.result[2][0]]
- lines[0] = lines[0][self.result[1][1]:]
- if len(lines) == 1:
- lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
- lines[-1] = lines[-1][:self.result[2][1]]
- return '\n'.join(lines).strip()
-# Hack for https://github.com/rg3/youtube-dl/issues/662
-if sys.version_info < (2, 7, 3):
- AttrParser.parse_endtag = (lambda self, i:
- i + len("</scr'+'ipt>")
- if self.rawdata[i:].startswith("</scr'+'ipt>")
- else compat_html_parser.HTMLParser.parse_endtag(self, i))
-
def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document"""
return get_element_by_attribute("id", id, html)
+
def get_element_by_attribute(attribute, value, html):
"""Return the content of the tag with the specified attribute in the passed HTML document"""
- parser = AttrParser(attribute, value)
- try:
- parser.loads(html)
- except compat_html_parser.HTMLParseError:
- pass
- return parser.get_result()
-class MetaParser(BaseHTMLParser):
- """
- Modified HTMLParser that isolates a meta tag with the specified name
- attribute.
- """
- def __init__(self, name):
- BaseHTMLParser.__init__(self)
- self.name = name
- self.content = None
- self.result = None
-
- def handle_starttag(self, tag, attrs):
- if tag != 'meta':
- return
- attrs = dict(attrs)
- if attrs.get('name') == self.name:
- self.result = attrs.get('content')
+ m = re.search(r'''(?xs)
+ <([a-zA-Z0-9:._-]+)
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+ \s+%s=['"]?%s['"]?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+ \s*>
+ (?P<content>.*?)
+ </\1>
+ ''' % (re.escape(attribute), re.escape(value)), html)
- def get_result(self):
- return self.result
+ if not m:
+ return None
+ res = m.group('content')
-def get_meta_content(name, html):
- """
- Return the content attribute from the meta tag with the given name attribute.
- """
- parser = MetaParser(name)
- try:
- parser.loads(html)
- except compat_html_parser.HTMLParseError:
- pass
- return parser.get_result()
+ if res.startswith('"') or res.startswith("'"):
+ res = res[1:-1]
+
+ return unescapeHTML(res)
def clean_html(html):
@@ -1472,6 +1135,25 @@ def check_executable(exe, args=[]):
return exe
+def get_exe_version(exe, args=['--version'],
+ version_re=r'version\s+([0-9._-a-zA-Z]+)',
+ unrecognized=u'present'):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ try:
+ out, err = subprocess.Popen(
+ [exe] + args,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ except OSError:
+ return False
+ firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
+ m = re.search(version_re, firstline)
+ if m:
+ return m.group(1)
+ else:
+ return unrecognized
+
+
class PagedList(object):
def __len__(self):
# This is only useful for tests
@@ -1562,7 +1244,7 @@ def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0) and isinstance(s, unicode):
s = s.encode('utf-8')
- return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
+ return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
def escape_url(url):
@@ -1636,15 +1318,6 @@ def parse_xml(s):
return tree
-if sys.version_info < (3, 0) and sys.platform == 'win32':
- def compat_getpass(prompt, *args, **kwargs):
- if isinstance(prompt, compat_str):
- prompt = prompt.encode(preferredencoding())
- return getpass.getpass(prompt, *args, **kwargs)
-else:
- compat_getpass = getpass.getpass
-
-
US_RATINGS = {
'G': 0,
'PG': 10,
@@ -1702,18 +1375,6 @@ def qualities(quality_ids):
DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
-try:
- subprocess_check_output = subprocess.check_output
-except AttributeError:
- def subprocess_check_output(*args, **kwargs):
- assert 'input' not in kwargs
- p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
- output, _ = p.communicate()
- ret = p.poll()
- if ret:
- raise subprocess.CalledProcessError(ret, p.args, output=output)
- return output
-
def limit_length(s, length):
""" Add ellipses to overly long strings """
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 87d56c164..07cdb22ad 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.10.30'
+__version__ = '2014.11.04'