aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--devscripts/buildserver.py405
-rwxr-xr-xdevscripts/release.sh4
-rw-r--r--devscripts/youtube_genalgo.py4
-rw-r--r--test/test_all_urls.py1
-rw-r--r--test/test_dailymotion_subtitles.py2
-rw-r--r--test/test_playlists.py10
-rw-r--r--test/test_youtube_subtitles.py2
-rw-r--r--youtube_dl/YoutubeDL.py10
-rw-r--r--youtube_dl/__init__.py28
-rw-r--r--youtube_dl/extractor/__init__.py6
-rw-r--r--youtube_dl/extractor/archiveorg.py7
-rw-r--r--youtube_dl/extractor/bloomberg.py27
-rw-r--r--youtube_dl/extractor/dreisat.py6
-rw-r--r--youtube_dl/extractor/francetv.py12
-rw-r--r--youtube_dl/extractor/googleplus.py3
-rw-r--r--youtube_dl/extractor/hotnewhiphop.py4
-rw-r--r--youtube_dl/extractor/mixcloud.py122
-rw-r--r--youtube_dl/extractor/newgrounds.py38
-rw-r--r--youtube_dl/extractor/ooyala.py10
-rw-r--r--youtube_dl/extractor/soundcloud.py45
-rw-r--r--youtube_dl/extractor/southparkstudios.py34
-rw-r--r--youtube_dl/extractor/subtitles.py5
-rw-r--r--youtube_dl/extractor/trilulilu.py4
-rw-r--r--youtube_dl/extractor/vice.py38
-rw-r--r--youtube_dl/extractor/xhamster.py19
-rw-r--r--youtube_dl/extractor/youtube.py13
-rw-r--r--youtube_dl/utils.py23
-rw-r--r--youtube_dl/version.py2
28 files changed, 735 insertions, 149 deletions
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
new file mode 100644
index 000000000..e0c3cc83e
--- /dev/null
+++ b/devscripts/buildserver.py
@@ -0,0 +1,405 @@
+#!/usr/bin/python3
+
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from socketserver import ThreadingMixIn
+import argparse
+import ctypes
+import functools
+import sys
+import threading
+import traceback
+import os.path
+
+
+class BuildHTTPServer(ThreadingMixIn, HTTPServer):
+ allow_reuse_address = True
+
+
+advapi32 = ctypes.windll.advapi32
+
+SC_MANAGER_ALL_ACCESS = 0xf003f
+SC_MANAGER_CREATE_SERVICE = 0x02
+SERVICE_WIN32_OWN_PROCESS = 0x10
+SERVICE_AUTO_START = 0x2
+SERVICE_ERROR_NORMAL = 0x1
+DELETE = 0x00010000
+SERVICE_STATUS_START_PENDING = 0x00000002
+SERVICE_STATUS_RUNNING = 0x00000004
+SERVICE_ACCEPT_STOP = 0x1
+
+SVCNAME = 'youtubedl_builder'
+
+LPTSTR = ctypes.c_wchar_p
+START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
+
+
+class SERVICE_TABLE_ENTRY(ctypes.Structure):
+ _fields_ = [
+ ('lpServiceName', LPTSTR),
+ ('lpServiceProc', START_CALLBACK)
+ ]
+
+
+HandlerEx = ctypes.WINFUNCTYPE(
+ ctypes.c_int, # return
+ ctypes.c_int, # dwControl
+ ctypes.c_int, # dwEventType
+ ctypes.c_void_p, # lpEventData,
+ ctypes.c_void_p, # lpContext,
+)
+
+
+def _ctypes_array(c_type, py_array):
+ ar = (c_type * len(py_array))()
+ ar[:] = py_array
+ return ar
+
+
+def win_OpenSCManager():
+ res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
+ if not res:
+ raise Exception('Opening service manager failed - '
+ 'are you running this as administrator?')
+ return res
+
+
+def win_install_service(service_name, cmdline):
+ manager = win_OpenSCManager()
+ try:
+ h = advapi32.CreateServiceW(
+ manager, service_name, None,
+ SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
+ SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
+ cmdline, None, None, None, None, None)
+ if not h:
+ raise OSError('Service creation failed: %s' % ctypes.FormatError())
+
+ advapi32.CloseServiceHandle(h)
+ finally:
+ advapi32.CloseServiceHandle(manager)
+
+
+def win_uninstall_service(service_name):
+ manager = win_OpenSCManager()
+ try:
+ h = advapi32.OpenServiceW(manager, service_name, DELETE)
+ if not h:
+ raise OSError('Could not find service %s: %s' % (
+ service_name, ctypes.FormatError()))
+
+ try:
+ if not advapi32.DeleteService(h):
+ raise OSError('Deletion failed: %s' % ctypes.FormatError())
+ finally:
+ advapi32.CloseServiceHandle(h)
+ finally:
+ advapi32.CloseServiceHandle(manager)
+
+
+def win_service_report_event(service_name, msg, is_error=True):
+ with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
+ f.write(msg + '\n')
+
+ event_log = advapi32.RegisterEventSourceW(None, service_name)
+ if not event_log:
+ raise OSError('Could not report event: %s' % ctypes.FormatError())
+
+ try:
+ type_id = 0x0001 if is_error else 0x0004
+ event_id = 0xc0000000 if is_error else 0x40000000
+ lines = _ctypes_array(LPTSTR, [msg])
+
+ if not advapi32.ReportEventW(
+ event_log, type_id, 0, event_id, None, len(lines), 0,
+ lines, None):
+ raise OSError('Event reporting failed: %s' % ctypes.FormatError())
+ finally:
+ advapi32.DeregisterEventSource(event_log)
+
+
+def win_service_handler(stop_event, *args):
+ try:
+ raise ValueError('Handler called with args ' + repr(args))
+ TODO
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def win_service_set_status(handle, status_code):
+ svcStatus = SERVICE_STATUS()
+ svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
+ svcStatus.dwCurrentState = status_code
+ svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
+
+ svcStatus.dwServiceSpecificExitCode = 0
+
+ if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
+ raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
+
+
+def win_service_main(service_name, real_main, argc, argv_raw):
+ try:
+ #args = [argv_raw[i].value for i in range(argc)]
+ stop_event = threading.Event()
+ handler = HandlerEx(functools.partial(stop_event, win_service_handler))
+ h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
+ if not h:
+ raise OSError('Handler registration failed: %s' %
+ ctypes.FormatError())
+
+ TODO
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def win_service_start(service_name, real_main):
+ try:
+ cb = START_CALLBACK(
+ functools.partial(win_service_main, service_name, real_main))
+ dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
+ SERVICE_TABLE_ENTRY(
+ service_name,
+ cb
+ ),
+ SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
+ ])
+
+ if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
+ raise OSError('ctypes start failed: %s' % ctypes.FormatError())
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def main(args=None):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--install',
+ action='store_const', dest='action', const='install',
+ help='Launch at Windows startup')
+ parser.add_argument('-u', '--uninstall',
+ action='store_const', dest='action', const='uninstall',
+ help='Remove Windows service')
+ parser.add_argument('-s', '--service',
+ action='store_const', dest='action', const='service',
+ help='Run as a Windows service')
+ parser.add_argument('-b', '--bind', metavar='<host:port>',
+ action='store', default='localhost:8142',
+ help='Bind to host:port (default %default)')
+ options = parser.parse_args(args=args)
+
+ if options.action == 'install':
+ fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
+ cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
+ win_install_service(SVCNAME, cmdline)
+ return
+
+ if options.action == 'uninstall':
+ win_uninstall_service(SVCNAME)
+ return
+
+ if options.action == 'service':
+ win_service_start(SVCNAME, main)
+ return
+
+ host, port_str = options.bind.split(':')
+ port = int(port_str)
+
+ print('Listening on %s:%d' % (host, port))
+ srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
+ thr = threading.Thread(target=srv.serve_forever)
+ thr.start()
+ input('Press ENTER to shut down')
+ srv.shutdown()
+ thr.join()
+
+
+def rmtree(path):
+ for name in os.listdir(path):
+ fname = os.path.join(path, name)
+ if os.path.isdir(fname):
+ rmtree(fname)
+ else:
+ os.chmod(fname, 0o666)
+ os.remove(fname)
+ os.rmdir(path)
+
+#==============================================================================
+
+class BuildError(Exception):
+ def __init__(self, output, code=500):
+ self.output = output
+ self.code = code
+
+ def __str__(self):
+ return self.output
+
+
+class HTTPError(BuildError):
+ pass
+
+
+class PythonBuilder(object):
+ def __init__(self, **kwargs):
+ pythonVersion = kwargs.pop('python', '2.7')
+ try:
+ key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
+ try:
+ self.pythonPath, _ = _winreg.QueryValueEx(key, '')
+ finally:
+ _winreg.CloseKey(key)
+ except Exception:
+ raise BuildError('No such Python version: %s' % pythonVersion)
+
+ super(PythonBuilder, self).__init__(**kwargs)
+
+
+class GITInfoBuilder(object):
+ def __init__(self, **kwargs):
+ try:
+ self.user, self.repoName = kwargs['path'][:2]
+ self.rev = kwargs.pop('rev')
+ except ValueError:
+ raise BuildError('Invalid path')
+ except KeyError as e:
+ raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
+
+ path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
+ if not os.path.exists(path):
+ os.makedirs(path)
+ self.basePath = tempfile.mkdtemp(dir=path)
+ self.buildPath = os.path.join(self.basePath, 'build')
+
+ super(GITInfoBuilder, self).__init__(**kwargs)
+
+
+class GITBuilder(GITInfoBuilder):
+ def build(self):
+ try:
+ subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
+ subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ super(GITBuilder, self).build()
+
+
+class YoutubeDLBuilder(object):
+ authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
+
+ def __init__(self, **kwargs):
+ if self.repoName != 'youtube-dl':
+ raise BuildError('Invalid repository "%s"' % self.repoName)
+ if self.user not in self.authorizedUsers:
+ raise HTTPError('Unauthorized user "%s"' % self.user, 401)
+
+ super(YoutubeDLBuilder, self).__init__(**kwargs)
+
+ def build(self):
+ try:
+ subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
+ cwd=self.buildPath)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ super(YoutubeDLBuilder, self).build()
+
+
+class DownloadBuilder(object):
+ def __init__(self, **kwargs):
+ self.handler = kwargs.pop('handler')
+ self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
+ self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
+ if not self.srcPath.startswith(self.buildPath):
+ raise HTTPError(self.srcPath, 401)
+
+ super(DownloadBuilder, self).__init__(**kwargs)
+
+ def build(self):
+ if not os.path.exists(self.srcPath):
+ raise HTTPError('No such file', 404)
+ if os.path.isdir(self.srcPath):
+ raise HTTPError('Is a directory: %s' % self.srcPath, 401)
+
+ self.handler.send_response(200)
+ self.handler.send_header('Content-Type', 'application/octet-stream')
+ self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
+ self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
+ self.handler.end_headers()
+
+ with open(self.srcPath, 'rb') as src:
+ shutil.copyfileobj(src, self.handler.wfile)
+
+ super(DownloadBuilder, self).build()
+
+
+class CleanupTempDir(object):
+ def build(self):
+ try:
+ rmtree(self.basePath)
+ except Exception as e:
+ print('WARNING deleting "%s": %s' % (self.basePath, e))
+
+ super(CleanupTempDir, self).build()
+
+
+class Null(object):
+ def __init__(self, **kwargs):
+ pass
+
+ def start(self):
+ pass
+
+ def close(self):
+ pass
+
+ def build(self):
+ pass
+
+
+class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
+ pass
+
+
+class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
+ actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
+
+ def do_GET(self):
+ path = urlparse.urlparse(self.path)
+ paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
+ action, _, path = path.path.strip('/').partition('/')
+ if path:
+ path = path.split('/')
+ if action in self.actionDict:
+ try:
+ builder = self.actionDict[action](path=path, handler=self, **paramDict)
+ builder.start()
+ try:
+ builder.build()
+ finally:
+ builder.close()
+ except BuildError as e:
+ self.send_response(e.code)
+ msg = unicode(e).encode('UTF-8')
+ self.send_header('Content-Type', 'text/plain; charset=UTF-8')
+ self.send_header('Content-Length', len(msg))
+ self.end_headers()
+ self.wfile.write(msg)
+ except HTTPError as e:
+ self.send_response(e.code, str(e))
+ else:
+ self.send_response(500, 'Unknown build method "%s"' % action)
+ else:
+ self.send_response(500, 'Malformed URL')
+
+#==============================================================================
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 62c68a6cf..796468b4b 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -55,8 +55,8 @@ git push origin "$version"
/bin/echo -e "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
-wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
- wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+read -p "VM running? (y/n) " -n 1
+wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index b390c7e2e..66019ee55 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -24,8 +24,8 @@ tests = [
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85 - vflkuzxcs 2013/09/11
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
- "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
+ ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
+ '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 99fc7bd28..ff1c86efe 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+ self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index bcd9f79f6..83c65d57e 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
@@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 4a2e00b01..d079a4f23 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -8,7 +8,7 @@ import json
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
from youtube_dl.utils import *
from helper import FakeYDL
@@ -42,5 +42,13 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'5124905')
self.assertTrue(len(result['entries']) >= 11)
+ def test_soundcloud_user(self):
+ dl = FakeYDL()
+ ie = SoundcloudUserIE(dl)
+ result = ie.extract('https://soundcloud.com/the-concept-band')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], u'9615865')
+ self.assertTrue(len(result['entries']) >= 12)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index 5632871ac..168e6c66c 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
@@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.url = 'sAjKT8FhjI8'
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c2f992b8e..de2b133e0 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -74,6 +74,7 @@ class YoutubeDL(object):
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslangs: List of languages of the subtitles to download
@@ -141,14 +142,10 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
- assert type(message) == type(u'')
if not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
- if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
- output = output.encode(preferredencoding(), 'ignore')
- self._screen_file.write(output)
- self._screen_file.flush()
+ write_string(output, self._screen_file)
def to_stderr(self, message):
"""Print message to stderr."""
@@ -499,8 +496,7 @@ class YoutubeDL(object):
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
- self.params.get('writeautomaticsub'),
- self.params.get('allsubtitles', False)])
+ self.params.get('writeautomaticsub')])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 696e54f49..df4feefe7 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -30,6 +30,7 @@ __authors__ = (
'Pierre Rudloff',
'Huarong Huo',
'Ismael Mejía',
+ 'Steffan \'Ruirize\' James',
)
__license__ = 'Public Domain'
@@ -149,7 +150,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-U', '--update',
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors',
- action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+ action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
@@ -354,7 +355,7 @@ def parseOpts(overrideArguments=None):
if overrideArguments is not None:
opts, args = parser.parse_args(overrideArguments)
if opts.verbose:
- sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+ write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
@@ -367,9 +368,9 @@ def parseOpts(overrideArguments=None):
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
- sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
- sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
- sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+ write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+ write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+ write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args
@@ -392,7 +393,7 @@ def _real_main(argv=None):
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
- sys.stderr.write(u'ERROR: unable to open cookie file\n')
+ write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
@@ -419,7 +420,7 @@ def _real_main(argv=None):
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
if opts.verbose:
- sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+ write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
@@ -533,6 +534,11 @@ def _real_main(argv=None):
else:
date = DateRange(opts.dateafter, opts.datebefore)
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ # this was the old behaviour if only --all-sub was given.
+ if opts.allsubtitles and (opts.writeautomaticsub == False):
+ opts.writesubtitles = True
+
if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None:
@@ -606,7 +612,7 @@ def _real_main(argv=None):
})
if opts.verbose:
- sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
+ write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
@@ -615,14 +621,14 @@ def _real_main(argv=None):
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
+ write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
- sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
- sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+ write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
+ write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors()
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 25a8e3cf5..726c9fa15 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -6,6 +6,7 @@ from .arte import ArteTvIE
from .auengine import AUEngineIE
from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE
+from .bloomberg import BloombergIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE
@@ -71,6 +72,7 @@ from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
+from .newgrounds import NewgroundsIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
@@ -86,7 +88,8 @@ from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
-from .soundcloud import SoundcloudIE, SoundcloudSetIE
+from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
+from .southparkstudios import SouthParkStudiosIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
@@ -106,6 +109,7 @@ from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
+from .vice import ViceIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 7efd1d823..61ce4469a 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):
for fn,fdata in data['files'].items()
if 'Video' in fdata['format']]
formats.sort(key=lambda fdata: fdata['file_size'])
+ for f in formats:
+ f['ext'] = determine_ext(f['url'])
info = {
'_type': 'video',
@@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):
info['thumbnail'] = thumbnail
# TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = determine_ext(formats[-1]['url'])
+ info.update(formats[-1])
- return info \ No newline at end of file
+ return info
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
new file mode 100644
index 000000000..3666a780b
--- /dev/null
+++ b/youtube_dl/extractor/bloomberg.py
@@ -0,0 +1,27 @@
+import re
+
+from .common import InfoExtractor
+
+
+class BloombergIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html'
+
+ _TEST = {
+ u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+ u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
+ u'info_dict': {
+ u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
+ u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
+ },
+ u'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ ooyala_url = self._og_search_video_url(webpage)
+ return self.url_result(ooyala_url, ie='Ooyala')
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 64b465805..765cb1f37 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):
'width': int(fe.find('./width').text),
'height': int(fe.find('./height').text),
'url': fe.find('./url').text,
+ 'ext': determine_ext(fe.find('./url').text),
'filesize': int(fe.find('./filesize').text),
'video_bitrate': int(fe.find('./videoBitrate').text),
'3sat_qualityname': fe.find('./quality').text,
@@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):
}
# TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = determine_ext(formats[-1]['url'])
+ info.update(formats[-1])
- return info \ No newline at end of file
+ return info
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index f2b12c884..b8fe82e47 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
- _TEST = {
- u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
- u'file': u'88439064.mp4',
- u'info_dict': {
- u'title': u'Allô Rufo',
- u'description': u'md5:d909f1ebdf963814b65772aea250400e',
- },
- u'params': {
- u'skip_download': True,
- },
- }
+ # Can't use tests, videos expire in 7 days
def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1)
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py
index f1cd88983..8895ad289 100644
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):
self.report_extraction(video_id)
# Extract update date
- upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>',
+ upload_date = self._html_search_regex(
+ ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
webpage, u'upload date', fatal=False)
if upload_date:
# Convert timestring to a format suitable for filename
diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py
index ccca1d7e0..3798118a7 100644
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@@ -7,11 +7,11 @@ from .common import InfoExtractor
class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'
_TEST = {
- u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
+ u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': {
- u"title": u"Freddie Gibbs Songs - Lay It Down"
+ u"title": u"Freddie Gibbs - Lay It Down"
}
}
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 8245b5583..a200dcd74 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -5,34 +5,27 @@ import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
- compat_str,
compat_urllib_error,
compat_urllib_request,
-
- ExtractorError,
+ unified_strdate,
)
class MixcloudIE(InfoExtractor):
- _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'mixcloud'
- def report_download_json(self, file_id):
- """Report JSON download."""
- self.to_screen(u'Downloading json')
-
- def get_urls(self, jsonData, fmt, bitrate='best'):
- """Get urls from 'audio_formats' section in json"""
- try:
- bitrate_list = jsonData[fmt]
- if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
- bitrate = max(bitrate_list) # select highest
-
- url_list = jsonData[fmt][bitrate]
- except TypeError: # we have no bitrate info.
- url_list = jsonData[fmt]
- return url_list
+ _TEST = {
+ u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
+ u'file': u'dholbach-cryptkeeper.mp3',
+ u'info_dict': {
+ u'title': u'Cryptkeeper',
+ u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+ u'uploader': u'Daniel Holbach',
+ u'uploader_id': u'dholbach',
+ u'upload_date': u'20111115',
+ },
+ }
def check_urls(self, url_list):
"""Returns 1st active url from list"""
@@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):
return None
- def _print_formats(self, formats):
- print('Available formats:')
- for fmt in formats.keys():
- for b in formats[fmt]:
- try:
- ext = formats[fmt][b][0]
- print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
- except TypeError: # we have no bitrate info
- ext = formats[fmt][0]
- print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
- break
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- # extract uploader & filename from url
- uploader = mobj.group(1).decode('utf-8')
- file_id = uploader + "-" + mobj.group(2).decode('utf-8')
-
- # construct API request
- file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
- # retrieve .json file with links to files
- request = compat_urllib_request.Request(file_url)
- try:
- self.report_download_json(file_url)
- jsonData = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
-
- # parse JSON
- json_data = json.loads(jsonData)
- player_url = json_data['player_swf_url']
- formats = dict(json_data['audio_formats'])
-
- req_format = self._downloader.params.get('format', None)
-
- if self._downloader.params.get('listformats', None):
- self._print_formats(formats)
- return
-
- if req_format is None or req_format == 'best':
- for format_param in formats.keys():
- url_list = self.get_urls(formats, format_param)
- # check urls
- file_url = self.check_urls(url_list)
- if file_url is not None:
- break # got it!
- else:
- if req_format not in formats:
- raise ExtractorError(u'Format is not available')
-
- url_list = self.get_urls(formats, req_format)
- file_url = self.check_urls(url_list)
- format_param = req_format
- return [{
- 'id': file_id.decode('utf-8'),
- 'url': file_url.decode('utf-8'),
- 'uploader': uploader.decode('utf-8'),
- 'upload_date': None,
- 'title': json_data['name'],
- 'ext': file_url.split('.')[-1].decode('utf-8'),
- 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
- 'thumbnail': json_data['thumbnail_url'],
- 'description': json_data['description'],
- 'player_url': player_url.decode('utf-8'),
- }]
+ uploader = mobj.group(1)
+ cloudcast_name = mobj.group(2)
+ track_id = '-'.join((uploader, cloudcast_name))
+ api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+ webpage = self._download_webpage(url, track_id)
+ json_data = self._download_webpage(api_url, track_id,
+ u'Downloading cloudcast info')
+ info = json.loads(json_data)
+
+ preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+ song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+ template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
+ final_song_url = self.check_urls(template_url % i for i in range(30))
+
+ return {
+ 'id': track_id,
+ 'title': info['name'],
+ 'url': final_song_url,
+ 'ext': 'mp3',
+ 'description': info['description'],
+ 'thumbnail': info['pictures'].get('extra_large'),
+ 'uploader': info['user']['name'],
+ 'uploader_id': info['user']['username'],
+ 'upload_date': unified_strdate(info['created_time']),
+ 'view_count': info['play_count'],
+ }
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
new file mode 100644
index 000000000..2ef80bce0
--- /dev/null
+++ b/youtube_dl/extractor/newgrounds.py
@@ -0,0 +1,38 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class NewgroundsIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
+ _TEST = {
+ u'url': u'http://www.newgrounds.com/audio/listen/549479',
+ u'file': u'549479.mp3',
+ u'md5': u'fe6033d297591288fa1c1f780386f07a',
+ u'info_dict': {
+ u"title": u"B7 - BusMode",
+ u"uploader": u"Burn7",
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ music_id = mobj.group('id')
+ webpage = self._download_webpage(url, music_id)
+
+ title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title')
+ uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader')
+
+ music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}'
+ music_url_json = json.loads(music_url_json_string)
+ music_url = music_url_json['url']
+
+ return {
+ 'id': music_id,
+ 'title': title,
+ 'url': music_url,
+ 'uploader': uploader,
+ 'ext': determine_ext(music_url),
+ }
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index b734722d0..1f7b4d2e7 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor):
},
}
+ @staticmethod
+ def _url_for_embed_code(embed_code):
+ return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
'title': unescapeHTML(info['title']),
- 'url': info['url'],
+ 'url': info.get('ipad_url') or info['url'],
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
}
@@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor):
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, u'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode)
- videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
+ videos_info = self._search_regex(
+ r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+ mobile_player, u'info').replace('\\"','"')
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info)
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 5f3a5540d..29cd5617c 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,10 +1,12 @@
import json
import re
+import itertools
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urlparse,
+ compat_urllib_parse,
ExtractorError,
unified_strdate,
@@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
def _resolv_url(cls, url):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
- def _extract_info_dict(self, info, full_title=None):
+ def _extract_info_dict(self, info, full_title=None, quiet=False):
video_id = info['id']
name = full_title or video_id
- self.report_extraction(name)
+ if quiet == False:
+ self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
@@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
'id': info['id'],
'title': info['title'],
}
+
+
+class SoundcloudUserIE(SoundcloudIE):
+ _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
+ IE_NAME = u'soundcloud:user'
+
+ # it's in tests/test_playlists.py
+ _TEST = None
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader = mobj.group('user')
+
+ url = 'http://soundcloud.com/%s/' % uploader
+ resolv_url = self._resolv_url(url)
+ user_json = self._download_webpage(resolv_url, uploader,
+ u'Downloading user info')
+ user = json.loads(user_json)
+
+ tracks = []
+ for i in itertools.count():
+ data = compat_urllib_parse.urlencode({'offset': i*50,
+ 'client_id': self._CLIENT_ID,
+ })
+ tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
+ response = self._download_webpage(tracks_url, uploader,
+ u'Downloading tracks page %s' % (i+1))
+ new_tracks = json.loads(response)
+ tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
+ if len(new_tracks) < 50:
+ break
+
+ return {
+ '_type': 'playlist',
+ 'id': compat_str(user['id']),
+ 'title': user['username'],
+ 'entries': tracks,
+ }
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py
new file mode 100644
index 000000000..a5dc754dd
--- /dev/null
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -0,0 +1,34 @@
+import re
+
+from .mtv import MTVIE, _media_xml_tag
+
+
+class SouthParkStudiosIE(MTVIE):
+ IE_NAME = u'southparkstudios.com'
+ _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'
+
+ _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+
+ _TEST = {
+ u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+ u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
+ u'info_dict': {
+ u'title': u'Bat Daded',
+ u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+ },
+ }
+
+ # Overwrite MTVIE properties we don't want
+ _TESTS = []
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+ return itemdoc.find(search_path).attrib['url']
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
+ webpage, u'mgid')
+ return self._get_videos_info(mgid)
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 97215f289..90de7de3a 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
@property
def _have_to_download_any_subtitles(self):
return any([self._downloader.params.get('writesubtitles', False),
- self._downloader.params.get('writeautomaticsub'),
- self._downloader.params.get('allsubtitles', False)])
+ self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage=None):
""" outputs the available subtitles for the video """
@@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
- if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+ if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id))
if not available_subs_list: # error, it didn't get the available subtitles
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
index f278951ba..0bf028f61 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):
{
'format': fnode.text,
'url': video_url_template % fnode.text,
+ 'ext': fnode.text.partition('-')[0]
}
for fnode in format_doc.findall('./formats/format')
@@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):
}
# TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = formats[-1]['format'].partition('-')[0]
+ info.update(formats[-1])
return info
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
new file mode 100644
index 000000000..6b93afa50
--- /dev/null
+++ b/youtube_dl/extractor/vice.py
@@ -0,0 +1,38 @@
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import ExtractorError
+
+
+class ViceIE(InfoExtractor):
+ _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)'
+
+ _TEST = {
+ u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+ u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
+ u'info_dict': {
+ u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ },
+ u'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ try:
+ ooyala_url = self._og_search_video_url(webpage)
+ except ExtractorError:
+ try:
+ embed_code = self._search_regex(
+ r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
+ u'ooyala embed code')
+ ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+ except ExtractorError:
+ raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
+ return self.url_result(ooyala_url, ie='Ooyala')
+
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 88b8b6be0..361619694 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -11,8 +11,8 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
- _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
- _TEST = {
+ _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+ _TESTS = [{
u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
u'file': u'1509445.flv',
u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
@@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):
u"uploader_id": u"Ruseful2011",
u"title": u"FemaleAgent Shy beauty takes the bait"
}
- }
+ },
+ {
+ u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ u'file': u'2221348.flv',
+ u'md5': u'e767b9475de189320f691f49c679c4c7',
+ u'info_dict': {
+ u"upload_date": u"20130914",
+ u"uploader_id": u"jojo747400",
+ u"title": u"Britney Spears Sexy Booty"
+ }
+ }]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+ seo = mobj.group('seo')
+ mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f49665925..23a8097c5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
(
(?:https?://)? # http(s):// (optional)
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
- tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
+ tube\.majestyc\.net/|
+ youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
@@ -428,7 +429,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 86:
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
elif len(s) == 85:
- return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
+ return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
elif len(s) == 84:
return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83:
@@ -782,10 +783,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if self._downloader.params.get('verbose'):
s = url_data['s'][0]
if age_gate:
- player_version = self._search_regex(r'ad3-(.+?)\.swf',
- video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
- 'flash player', fatal=False)
- player = 'flash player %s' % player_version
+ player = 'flash player'
else:
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
@@ -1007,6 +1005,9 @@ class YoutubeUserIE(InfoExtractor):
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+ if 'entry' not in response['feed']:
+ # Number of videos is a multiple of self._MAX_RESULTS
+ break
# Extract video identifiers
ids_in_page = []
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 768c6207d..814a9b6be 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -700,7 +700,16 @@ def unified_strdate(date_str):
date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
+ format_expressions = [
+ '%d %B %Y',
+ '%B %d %Y',
+ '%b %d %Y',
+ '%Y-%m-%d',
+ '%d/%m/%Y',
+ '%Y/%m/%d %H:%M:%S',
+ '%d.%m.%Y %H:%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ ]
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -781,6 +790,18 @@ def platform_name():
return res
+def write_string(s, out=None):
+ if out is None:
+ out = sys.stderr
+ assert type(s) == type(u'')
+
+ if ('b' in getattr(out, 'mode', '') or
+ sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ s = s.encode(preferredencoding(), 'ignore')
+ out.write(s)
+ out.flush()
+
+
def bytes_to_intlist(bs):
if not bs:
return []
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 3b2505c77..80ccfbd4f 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.12'
+__version__ = '2013.09.17'