diff options
28 files changed, 735 insertions, 149 deletions
| diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py new file mode 100644 index 000000000..e0c3cc83e --- /dev/null +++ b/devscripts/buildserver.py @@ -0,0 +1,405 @@ +#!/usr/bin/python3 + +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn +import argparse +import ctypes +import functools +import sys +import threading +import traceback +import os.path + + +class BuildHTTPServer(ThreadingMixIn, HTTPServer): +    allow_reuse_address = True + + +advapi32 = ctypes.windll.advapi32 + +SC_MANAGER_ALL_ACCESS = 0xf003f +SC_MANAGER_CREATE_SERVICE = 0x02 +SERVICE_WIN32_OWN_PROCESS = 0x10 +SERVICE_AUTO_START = 0x2 +SERVICE_ERROR_NORMAL = 0x1 +DELETE = 0x00010000 +SERVICE_STATUS_START_PENDING = 0x00000002 +SERVICE_STATUS_RUNNING = 0x00000004 +SERVICE_ACCEPT_STOP = 0x1 + +SVCNAME = 'youtubedl_builder' + +LPTSTR = ctypes.c_wchar_p +START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) + + +class SERVICE_TABLE_ENTRY(ctypes.Structure): +    _fields_ = [ +        ('lpServiceName', LPTSTR), +        ('lpServiceProc', START_CALLBACK) +    ] + + +HandlerEx = ctypes.WINFUNCTYPE( +    ctypes.c_int,     # return +    ctypes.c_int,     # dwControl +    ctypes.c_int,     # dwEventType +    ctypes.c_void_p,  # lpEventData, +    ctypes.c_void_p,  # lpContext, +) + + +def _ctypes_array(c_type, py_array): +    ar = (c_type * len(py_array))() +    ar[:] = py_array +    return ar + + +def win_OpenSCManager(): +    res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) +    if not res: +        raise Exception('Opening service manager failed - ' +                        'are you running this as administrator?') +    return res + + +def win_install_service(service_name, cmdline): +    manager = win_OpenSCManager() +    try: +        h = advapi32.CreateServiceW( +            manager, service_name, None, +            SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, +            SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, +            cmdline, None, None, None, None, None) +        if not h: +            raise OSError('Service creation failed: %s' % ctypes.FormatError()) + +        advapi32.CloseServiceHandle(h) +    finally: +        advapi32.CloseServiceHandle(manager) + + +def win_uninstall_service(service_name): +    manager = win_OpenSCManager() +    try: +        h = advapi32.OpenServiceW(manager, service_name, DELETE) +        if not h: +            raise OSError('Could not find service %s: %s' % ( +                service_name, ctypes.FormatError())) + +        try: +            if not advapi32.DeleteService(h): +                raise OSError('Deletion failed: %s' % ctypes.FormatError()) +        finally: +            advapi32.CloseServiceHandle(h) +    finally: +        advapi32.CloseServiceHandle(manager) + + +def win_service_report_event(service_name, msg, is_error=True): +    with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: +        f.write(msg + '\n') + +    event_log = advapi32.RegisterEventSourceW(None, service_name) +    if not event_log: +        raise OSError('Could not report event: %s' % ctypes.FormatError()) + +    try: +        type_id = 0x0001 if is_error else 0x0004 +        event_id = 0xc0000000 if is_error else 0x40000000 +        lines = _ctypes_array(LPTSTR, [msg]) + +        if not advapi32.ReportEventW( +                event_log, type_id, 0, event_id, None, len(lines), 0, +                lines, None): +            raise OSError('Event reporting failed: %s' % ctypes.FormatError()) +    finally: +        advapi32.DeregisterEventSource(event_log) + + +def win_service_handler(stop_event, *args): +    try: +        raise ValueError('Handler called with args ' + repr(args)) +        TODO +    except Exception as e: +        tb = traceback.format_exc() +        msg = str(e) + '\n' + tb +        win_service_report_event(service_name, msg, is_error=True) +        raise + + +def win_service_set_status(handle, status_code): +    svcStatus = SERVICE_STATUS() +    svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS +    svcStatus.dwCurrentState = status_code +    svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP + +    svcStatus.dwServiceSpecificExitCode = 0 + +    if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): +        raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) + + +def win_service_main(service_name, real_main, argc, argv_raw): +    try: +        #args = [argv_raw[i].value for i in range(argc)] +        stop_event = threading.Event() +        handler = HandlerEx(functools.partial(stop_event, win_service_handler)) +        h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) +        if not h: +            raise OSError('Handler registration failed: %s' % +                          ctypes.FormatError()) + +        TODO +    except Exception as e: +        tb = traceback.format_exc() +        msg = str(e) + '\n' + tb +        win_service_report_event(service_name, msg, is_error=True) +        raise + + +def win_service_start(service_name, real_main): +    try: +        cb = START_CALLBACK( +            functools.partial(win_service_main, service_name, real_main)) +        dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ +            SERVICE_TABLE_ENTRY( +                service_name, +                cb +            ), +            SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) +        ]) + +        if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): +            raise OSError('ctypes start failed: %s' % ctypes.FormatError()) +    except Exception as e: +        tb = traceback.format_exc() +        msg = str(e) + '\n' + tb +        win_service_report_event(service_name, msg, is_error=True) +        raise + + +def main(args=None): +    parser = argparse.ArgumentParser() +    parser.add_argument('-i', '--install', +                        action='store_const', dest='action', const='install', +                        help='Launch at Windows startup') +    parser.add_argument('-u', '--uninstall', +                        action='store_const', dest='action', const='uninstall', +                        help='Remove Windows service') +    parser.add_argument('-s', '--service', +                        action='store_const', dest='action', const='service', +                        help='Run as a Windows service') +    parser.add_argument('-b', '--bind', metavar='<host:port>', +                        action='store', default='localhost:8142', +                        help='Bind to host:port (default %default)') +    options = parser.parse_args(args=args) + +    if options.action == 'install': +        fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') +        cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) +        win_install_service(SVCNAME, cmdline) +        return + +    if options.action == 'uninstall': +        win_uninstall_service(SVCNAME) +        return + +    if options.action == 'service': +        win_service_start(SVCNAME, main) +        return + +    host, port_str = options.bind.split(':') +    port = int(port_str) + +    print('Listening on %s:%d' % (host, port)) +    srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) +    thr = threading.Thread(target=srv.serve_forever) +    thr.start() +    input('Press ENTER to shut down') +    srv.shutdown() +    thr.join() + + +def rmtree(path): +    for name in os.listdir(path): +        fname = os.path.join(path, name) +        if os.path.isdir(fname): +            rmtree(fname) +        else: +            os.chmod(fname, 0o666) +            os.remove(fname) +    os.rmdir(path) + +#============================================================================== + +class BuildError(Exception): +    def __init__(self, output, code=500): +        self.output = output +        self.code = code + +    def __str__(self): +        return self.output + + +class HTTPError(BuildError): +    pass + + +class PythonBuilder(object): +    def __init__(self, **kwargs): +        pythonVersion = kwargs.pop('python', '2.7') +        try: +            key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) +            try: +                self.pythonPath, _ = _winreg.QueryValueEx(key, '') +            finally: +                _winreg.CloseKey(key) +        except Exception: +            raise BuildError('No such Python version: %s' % pythonVersion) + +        super(PythonBuilder, self).__init__(**kwargs) + + +class GITInfoBuilder(object): +    def __init__(self, **kwargs): +        try: +            self.user, self.repoName = kwargs['path'][:2] +            self.rev = kwargs.pop('rev') +        except ValueError: +            raise BuildError('Invalid path') +        except KeyError as e: +            raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) + +        path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) +        if not os.path.exists(path): +            os.makedirs(path) +        self.basePath = tempfile.mkdtemp(dir=path) +        self.buildPath = os.path.join(self.basePath, 'build') + +        super(GITInfoBuilder, self).__init__(**kwargs) + + +class GITBuilder(GITInfoBuilder): +    def build(self): +        try: +            subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) +            subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) +        except subprocess.CalledProcessError as e: +            raise BuildError(e.output) + +        super(GITBuilder, self).build() + + +class YoutubeDLBuilder(object): +    authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile'] + +    def __init__(self, **kwargs): +        if self.repoName != 'youtube-dl': +            raise BuildError('Invalid repository "%s"' % self.repoName) +        if self.user not in self.authorizedUsers: +            raise HTTPError('Unauthorized user "%s"' % self.user, 401) + +        super(YoutubeDLBuilder, self).__init__(**kwargs) + +    def build(self): +        try: +            subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], +                                    cwd=self.buildPath) +        except subprocess.CalledProcessError as e: +            raise BuildError(e.output) + +        super(YoutubeDLBuilder, self).build() + + +class DownloadBuilder(object): +    def __init__(self, **kwargs): +        self.handler = kwargs.pop('handler') +        self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) +        self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) +        if not self.srcPath.startswith(self.buildPath): +            raise HTTPError(self.srcPath, 401) + +        super(DownloadBuilder, self).__init__(**kwargs) + +    def build(self): +        if not os.path.exists(self.srcPath): +            raise HTTPError('No such file', 404) +        if os.path.isdir(self.srcPath): +            raise HTTPError('Is a directory: %s' % self.srcPath, 401) + +        self.handler.send_response(200) +        self.handler.send_header('Content-Type', 'application/octet-stream') +        self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) +        self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) +        self.handler.end_headers() + +        with open(self.srcPath, 'rb') as src: +            shutil.copyfileobj(src, self.handler.wfile) + +        super(DownloadBuilder, self).build() + + +class CleanupTempDir(object): +    def build(self): +        try: +            rmtree(self.basePath) +        except Exception as e: +            print('WARNING deleting "%s": %s' % (self.basePath, e)) + +        super(CleanupTempDir, self).build() + + +class Null(object): +    def __init__(self, **kwargs): +        pass + +    def start(self): +        pass + +    def close(self): +        pass + +    def build(self): +        pass + + +class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): +    pass + + +class BuildHTTPRequestHandler(BaseHTTPRequestHandler): +    actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching. + +    def do_GET(self): +        path = urlparse.urlparse(self.path) +        paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) +        action, _, path = path.path.strip('/').partition('/') +        if path: +            path = path.split('/') +            if action in self.actionDict: +                try: +                    builder = self.actionDict[action](path=path, handler=self, **paramDict) +                    builder.start() +                    try: +                        builder.build() +                    finally: +                        builder.close() +                except BuildError as e: +                    self.send_response(e.code) +                    msg = unicode(e).encode('UTF-8') +                    self.send_header('Content-Type', 'text/plain; charset=UTF-8') +                    self.send_header('Content-Length', len(msg)) +                    self.end_headers() +                    self.wfile.write(msg) +                except HTTPError as e: +                    self.send_response(e.code, str(e)) +            else: +                self.send_response(500, 'Unknown build method "%s"' % action) +        else: +            self.send_response(500, 'Malformed URL') + +#============================================================================== + +if __name__ == '__main__': +    main() diff --git a/devscripts/release.sh b/devscripts/release.sh index 62c68a6cf..796468b4b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -55,8 +55,8 @@ git push origin "$version"  /bin/echo -e "\n### OK, now it is time to build the binaries..."  REV=$(git rev-parse HEAD)  make youtube-dl youtube-dl.tar.gz -wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ -	wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +read -p "VM running? (y/n) " -n 1 +wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe  mkdir -p "build/$version"  mv youtube-dl youtube-dl.exe "build/$version"  mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index b390c7e2e..66019ee55 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -24,8 +24,8 @@ tests = [      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",       "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),      # 85 - vflkuzxcs 2013/09/11 -    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", -     "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"), +    ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[', +     '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),      # 84 - vflg0g8PQ 2013/08/29 (sporadic)      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",       ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"), diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 99fc7bd28..ff1c86efe 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):          self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668          self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])          self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) +        self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])      def test_youtube_channel_matching(self):          assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py index bcd9f79f6..83c65d57e 100644 --- a/test/test_dailymotion_subtitles.py +++ b/test/test_dailymotion_subtitles.py @@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase):          subtitles = self.getSubtitles()          self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')      def test_allsubtitles(self): +        self.DL.params['writesubtitles'] = True          self.DL.params['allsubtitles'] = True          subtitles = self.getSubtitles()          self.assertEqual(len(subtitles.keys()), 5) @@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase):          self.assertTrue(len(subtitles.keys()) == 0)      def test_nosubtitles(self):          self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' +        self.DL.params['writesubtitles'] = True          self.DL.params['allsubtitles'] = True          subtitles = self.getSubtitles()          self.assertEqual(len(subtitles), 0) diff --git a/test/test_playlists.py b/test/test_playlists.py index 4a2e00b01..d079a4f23 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -8,7 +8,7 @@ import json  import os  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE +from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE  from youtube_dl.utils import *  from helper import FakeYDL @@ -42,5 +42,13 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['id'], u'5124905')          self.assertTrue(len(result['entries']) >= 11) +    def test_soundcloud_user(self): +        dl = FakeYDL() +        ie = SoundcloudUserIE(dl) +        result = ie.extract('https://soundcloud.com/the-concept-band') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], u'9615865') +        self.assertTrue(len(result['entries']) >= 12) +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 5632871ac..168e6c66c 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase):          subtitles = self.getSubtitles()          self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')      def test_youtube_allsubtitles(self): +        self.DL.params['writesubtitles'] = True          self.DL.params['allsubtitles'] = True          subtitles = self.getSubtitles()          self.assertEqual(len(subtitles.keys()), 13) @@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase):          self.assertTrue(subtitles['it'] is not None)      def test_youtube_nosubtitles(self):          self.url = 'sAjKT8FhjI8' +        self.DL.params['writesubtitles'] = True          self.DL.params['allsubtitles'] = True          subtitles = self.getSubtitles()          self.assertEqual(len(subtitles), 0) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c2f992b8e..de2b133e0 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -74,6 +74,7 @@ class YoutubeDL(object):      writesubtitles:    Write the video subtitles to a file      writeautomaticsub: Write the automatic subtitles to a file      allsubtitles:      Downloads all the subtitles of the video +                       (requires writesubtitles or writeautomaticsub)      listsubtitles:     Lists all available subtitles for the video      subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)      subtitleslangs:    List of languages of the subtitles to download @@ -141,14 +142,10 @@ class YoutubeDL(object):      def to_screen(self, message, skip_eol=False):          """Print message to stdout if not in quiet mode.""" -        assert type(message) == type(u'')          if not self.params.get('quiet', False):              terminator = [u'\n', u''][skip_eol]              output = message + terminator -            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr -                output = output.encode(preferredencoding(), 'ignore') -            self._screen_file.write(output) -            self._screen_file.flush() +            write_string(output, self._screen_file)      def to_stderr(self, message):          """Print message to stderr.""" @@ -499,8 +496,7 @@ class YoutubeDL(object):                  return          subtitles_are_requested = any([self.params.get('writesubtitles', False), -                                       self.params.get('writeautomaticsub'), -                                       self.params.get('allsubtitles', False)]) +                                       self.params.get('writeautomaticsub')])          if  subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:              # subtitles download errors are already managed as troubles in relevant IE diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 696e54f49..df4feefe7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -30,6 +30,7 @@ __authors__  = (      'Pierre Rudloff',      'Huarong Huo',      'Ismael Mejía', +    'Steffan \'Ruirize\' James',  )  __license__ = 'Public Domain' @@ -149,7 +150,7 @@ def parseOpts(overrideArguments=None):      general.add_option('-U', '--update',              action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')      general.add_option('-i', '--ignore-errors', -            action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) +            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)      general.add_option('--dump-user-agent',              action='store_true', dest='dump_user_agent',              help='display the current browser identification', default=False) @@ -354,7 +355,7 @@ def parseOpts(overrideArguments=None):      if overrideArguments is not None:          opts, args = parser.parse_args(overrideArguments)          if opts.verbose: -            sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n') +            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')      else:          xdg_config_home = os.environ.get('XDG_CONFIG_HOME')          if xdg_config_home: @@ -367,9 +368,9 @@ def parseOpts(overrideArguments=None):          argv = systemConf + userConf + commandLineConf          opts, args = parser.parse_args(argv)          if opts.verbose: -            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') -            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') -            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') +            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') +            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') +            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')      return parser, opts, args @@ -392,7 +393,7 @@ def _real_main(argv=None):          except (IOError, OSError) as err:              if opts.verbose:                  traceback.print_exc() -            sys.stderr.write(u'ERROR: unable to open cookie file\n') +            write_string(u'ERROR: unable to open cookie file\n')              sys.exit(101)      # Set user agent      if opts.user_agent is not None: @@ -419,7 +420,7 @@ def _real_main(argv=None):              batchurls = [x.strip() for x in batchurls]              batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]              if opts.verbose: -                sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') +                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')          except IOError:              sys.exit(u'ERROR: batch file could not be read')      all_urls = batchurls + args @@ -533,6 +534,11 @@ def _real_main(argv=None):      else:          date = DateRange(opts.dateafter, opts.datebefore) +    # --all-sub automatically sets --write-sub if --write-auto-sub is not given +    # this was the old behaviour if only --all-sub was given. +    if opts.allsubtitles and (opts.writeautomaticsub == False): +        opts.writesubtitles = True +      if sys.version_info < (3,):          # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)          if opts.outtmpl is not None: @@ -606,7 +612,7 @@ def _real_main(argv=None):          })      if opts.verbose: -        sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n') +        write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')          try:              sp = subprocess.Popen(                  ['git', 'rev-parse', '--short', 'HEAD'], @@ -615,14 +621,14 @@ def _real_main(argv=None):              out, err = sp.communicate()              out = out.decode().strip()              if re.match('[0-9a-f]+', out): -                sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n') +                write_string(u'[debug] Git HEAD: ' + out + u'\n')          except:              try:                  sys.exc_clear()              except:                  pass -        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') -        sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') +        write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n') +        write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')      ydl.add_default_info_extractors() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 25a8e3cf5..726c9fa15 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .arte import ArteTvIE  from .auengine import AUEngineIE  from .bandcamp import BandcampIE  from .bliptv import BlipTVIE, BlipTVUserIE +from .bloomberg import BloombergIE  from .breakcom import BreakIE  from .brightcove import BrightcoveIE  from .c56 import C56IE @@ -71,6 +72,7 @@ from .myvideo import MyVideoIE  from .naver import NaverIE  from .nba import NBAIE  from .nbc import NBCNewsIE +from .newgrounds import NewgroundsIE  from .ooyala import OoyalaIE  from .orf import ORFIE  from .pbs import PBSIE @@ -86,7 +88,8 @@ from .sina import SinaIE  from .slashdot import SlashdotIE  from .slideshare import SlideshareIE  from .sohu import SohuIE -from .soundcloud import SoundcloudIE, SoundcloudSetIE +from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE +from .southparkstudios import SouthParkStudiosIE  from .spiegel import SpiegelIE  from .stanfordoc import StanfordOpenClassroomIE  from .statigram import StatigramIE @@ -106,6 +109,7 @@ from .vbox7 import Vbox7IE  from .veehd import VeeHDIE  from .veoh import VeohIE  from .vevo import VevoIE +from .vice import ViceIE  from .videofyme import VideofyMeIE  from .vimeo import VimeoIE, VimeoChannelIE  from .vine import VineIE diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 7efd1d823..61ce4469a 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):              for fn,fdata in data['files'].items()              if 'Video' in fdata['format']]          formats.sort(key=lambda fdata: fdata['file_size']) +        for f in formats: +            f['ext'] = determine_ext(f['url'])          info = {              '_type': 'video', @@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):              info['thumbnail'] = thumbnail          # TODO: Remove when #980 has been merged -        info['url'] = formats[-1]['url'] -        info['ext'] = determine_ext(formats[-1]['url']) +        info.update(formats[-1]) -        return info
\ No newline at end of file +        return info diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py new file mode 100644 index 000000000..3666a780b --- /dev/null +++ b/youtube_dl/extractor/bloomberg.py @@ -0,0 +1,27 @@ +import re + +from .common import InfoExtractor + + +class BloombergIE(InfoExtractor): +    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html' + +    _TEST = { +        u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', +        u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', +        u'info_dict': { +            u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', +            u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', +        }, +        u'params': { +            # Requires ffmpeg (m3u8 manifest) +            u'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        name = mobj.group('name') +        webpage = self._download_webpage(url, name) +        ooyala_url = self._og_search_video_url(webpage) +        return self.url_result(ooyala_url, ie='Ooyala') diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 64b465805..765cb1f37 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):              'width': int(fe.find('./width').text),              'height': int(fe.find('./height').text),              'url': fe.find('./url').text, +            'ext': determine_ext(fe.find('./url').text),              'filesize': int(fe.find('./filesize').text),              'video_bitrate': int(fe.find('./videoBitrate').text),              '3sat_qualityname': fe.find('./quality').text, @@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):          }          # TODO: Remove when #980 has been merged -        info['url'] = formats[-1]['url'] -        info['ext'] = determine_ext(formats[-1]['url']) +        info.update(formats[-1]) -        return info
\ No newline at end of file +        return info diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index f2b12c884..b8fe82e47 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):      IE_NAME = u'pluzz.francetv.fr'      _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' -    _TEST = { -        u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html', -        u'file': u'88439064.mp4', -        u'info_dict': { -            u'title': u'Allô Rufo', -            u'description': u'md5:d909f1ebdf963814b65772aea250400e', -        }, -        u'params': { -            u'skip_download': True, -        }, -    } +    # Can't use tests, videos expire in 7 days      def _real_extract(self, url):          title = re.match(self._VALID_URL, url).group(1) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index f1cd88983..8895ad289 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):          self.report_extraction(video_id)          # Extract update date -        upload_date = self._html_search_regex('title="Timestamp">(.*?)</a>', +        upload_date = self._html_search_regex( +            ['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],              webpage, u'upload date', fatal=False)          if upload_date:              # Convert timestring to a format suitable for filename diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index ccca1d7e0..3798118a7 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -7,11 +7,11 @@ from .common import InfoExtractor  class HotNewHipHopIE(InfoExtractor):      _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html'      _TEST = { -        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'", +        u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",          u'file': u'1435540.mp3',          u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',          u'info_dict': { -            u"title": u"Freddie Gibbs Songs - Lay It Down" +            u"title": u"Freddie Gibbs - Lay It Down"          }      } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 8245b5583..a200dcd74 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -5,34 +5,27 @@ import socket  from .common import InfoExtractor  from ..utils import (      compat_http_client, -    compat_str,      compat_urllib_error,      compat_urllib_request, - -    ExtractorError, +    unified_strdate,  )  class MixcloudIE(InfoExtractor): -    _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/      _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'      IE_NAME = u'mixcloud' -    def report_download_json(self, file_id): -        """Report JSON download.""" -        self.to_screen(u'Downloading json') - -    def get_urls(self, jsonData, fmt, bitrate='best'): -        """Get urls from 'audio_formats' section in json""" -        try: -            bitrate_list = jsonData[fmt] -            if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list: -                bitrate = max(bitrate_list) # select highest - -            url_list = jsonData[fmt][bitrate] -        except TypeError: # we have no bitrate info. -            url_list = jsonData[fmt] -        return url_list +    _TEST = { +        u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/', +        u'file': u'dholbach-cryptkeeper.mp3', +        u'info_dict': { +            u'title': u'Cryptkeeper', +            u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', +            u'uploader': u'Daniel Holbach', +            u'uploader_id': u'dholbach', +            u'upload_date': u'20111115', +        }, +    }      def check_urls(self, url_list):          """Returns 1st active url from list""" @@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):          return None -    def _print_formats(self, formats): -        print('Available formats:') -        for fmt in formats.keys(): -            for b in formats[fmt]: -                try: -                    ext = formats[fmt][b][0] -                    print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) -                except TypeError: # we have no bitrate info -                    ext = formats[fmt][0] -                    print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) -                    break -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) -        # extract uploader & filename from url -        uploader = mobj.group(1).decode('utf-8') -        file_id = uploader + "-" + mobj.group(2).decode('utf-8') - -        # construct API request -        file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json' -        # retrieve .json file with links to files -        request = compat_urllib_request.Request(file_url) -        try: -            self.report_download_json(file_url) -            jsonData = compat_urllib_request.urlopen(request).read() -        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -            raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err)) - -        # parse JSON -        json_data = json.loads(jsonData) -        player_url = json_data['player_swf_url'] -        formats = dict(json_data['audio_formats']) - -        req_format = self._downloader.params.get('format', None) - -        if self._downloader.params.get('listformats', None): -            self._print_formats(formats) -            return - -        if req_format is None or req_format == 'best': -            for format_param in formats.keys(): -                url_list = self.get_urls(formats, format_param) -                # check urls -                file_url = self.check_urls(url_list) -                if file_url is not None: -                    break # got it! -        else: -            if req_format not in formats: -                raise ExtractorError(u'Format is not available') - -            url_list = self.get_urls(formats, req_format) -            file_url = self.check_urls(url_list) -            format_param = req_format -        return [{ -            'id': file_id.decode('utf-8'), -            'url': file_url.decode('utf-8'), -            'uploader': uploader.decode('utf-8'), -            'upload_date': None, -            'title': json_data['name'], -            'ext': file_url.split('.')[-1].decode('utf-8'), -            'format': (format_param is None and u'NA' or format_param.decode('utf-8')), -            'thumbnail': json_data['thumbnail_url'], -            'description': json_data['description'], -            'player_url': player_url.decode('utf-8'), -        }] +        uploader = mobj.group(1) +        cloudcast_name = mobj.group(2) +        track_id = '-'.join((uploader, cloudcast_name)) +        api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name) +        webpage = self._download_webpage(url, track_id) +        json_data = self._download_webpage(api_url, track_id, +            u'Downloading cloudcast info') +        info = json.loads(json_data) + +        preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url') +        song_url = preview_url.replace('/previews/', '/cloudcasts/originals/') +        template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) +        final_song_url = self.check_urls(template_url % i for i in range(30)) + +        return { +            'id': track_id, +            'title': info['name'], +            'url': final_song_url, +            'ext': 'mp3', +            'description': info['description'], +            'thumbnail': info['pictures'].get('extra_large'), +            'uploader': info['user']['name'], +            'uploader_id': info['user']['username'], +            'upload_date': unified_strdate(info['created_time']), +            'view_count': info['play_count'], +        } diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py new file mode 100644 index 000000000..2ef80bce0 --- /dev/null +++ b/youtube_dl/extractor/newgrounds.py @@ -0,0 +1,38 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import determine_ext + + +class NewgroundsIE(InfoExtractor): +    _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)' +    _TEST = { +        u'url': u'http://www.newgrounds.com/audio/listen/549479', +        u'file': u'549479.mp3', +        u'md5': u'fe6033d297591288fa1c1f780386f07a', +        u'info_dict': { +            u"title": u"B7 - BusMode", +            u"uploader": u"Burn7", +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        music_id = mobj.group('id') +        webpage = self._download_webpage(url, music_id) +         +        title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title') +        uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader') +         +        music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}' +        music_url_json = json.loads(music_url_json_string) +        music_url = music_url_json['url'] + +        return { +            'id':       music_id, +            'title':    title, +            'url':      music_url, +            'uploader': uploader, +            'ext':      determine_ext(music_url), +        } diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index b734722d0..1f7b4d2e7 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -18,11 +18,15 @@ class OoyalaIE(InfoExtractor):          },      } +    @staticmethod +    def _url_for_embed_code(embed_code): +        return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code +      def _extract_result(self, info, more_info):          return {'id': info['embedCode'],                  'ext': 'mp4',                  'title': unescapeHTML(info['title']), -                'url': info['url'], +                'url': info.get('ipad_url') or info['url'],                  'description': unescapeHTML(more_info['description']),                  'thumbnail': more_info['promo'],                  } @@ -35,7 +39,9 @@ class OoyalaIE(InfoExtractor):          mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',                                          player, u'mobile player url')          mobile_player = self._download_webpage(mobile_url, embedCode) -        videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') +        videos_info = self._search_regex( +            r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', +            mobile_player, u'info').replace('\\"','"')          videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')          videos_info = json.loads(videos_info)          videos_more_info =json.loads(videos_more_info) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5f3a5540d..29cd5617c 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,10 +1,12 @@  import json  import re +import itertools  from .common import InfoExtractor  from ..utils import (      compat_str,      compat_urlparse, +    compat_urllib_parse,      ExtractorError,      unified_strdate, @@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):      def _resolv_url(cls, url):          return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID -    def _extract_info_dict(self, info, full_title=None): +    def _extract_info_dict(self, info, full_title=None, quiet=False):          video_id = info['id']          name = full_title or video_id -        self.report_extraction(name) +        if quiet == False: +            self.report_extraction(name)          thumbnail = info['artwork_url']          if thumbnail is not None: @@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):                  'id': info['id'],                  'title': info['title'],                  } + + +class SoundcloudUserIE(SoundcloudIE): +    _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' +    IE_NAME = u'soundcloud:user' + +    # it's in tests/test_playlists.py +    _TEST = None + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        uploader = mobj.group('user') + +        url = 'http://soundcloud.com/%s/' % uploader +        resolv_url = self._resolv_url(url) +        user_json = self._download_webpage(resolv_url, uploader, +            u'Downloading user info') +        user = json.loads(user_json) + +        tracks = [] +        for i in itertools.count(): +            data = compat_urllib_parse.urlencode({'offset': i*50, +                                                  'client_id': self._CLIENT_ID, +                                                  }) +            tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data +            response = self._download_webpage(tracks_url, uploader,  +                u'Downloading tracks page %s' % (i+1)) +            new_tracks = json.loads(response) +            tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks) +            if len(new_tracks) < 50: +                break + +        return { +            '_type': 'playlist', +            'id': compat_str(user['id']), +            'title': user['username'], +            'entries': tracks, +        } diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py new file mode 100644 index 000000000..a5dc754dd --- /dev/null +++ b/youtube_dl/extractor/southparkstudios.py @@ -0,0 +1,34 @@ +import re + +from .mtv import MTVIE, _media_xml_tag + + +class SouthParkStudiosIE(MTVIE): +    IE_NAME = u'southparkstudios.com' +    _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)' + +    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' + +    _TEST = { +        u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', +        u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4', +        u'info_dict': { +            u'title': u'Bat Daded', +            u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.', +        }, +    } + +    # Overwrite MTVIE properties we don't want +    _TESTS = [] + +    def _get_thumbnail_url(self, uri, itemdoc): +        search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) +        return itemdoc.find(search_path).attrib['url'] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) +        mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"', +                                  webpage, u'mgid') +        return self._get_videos_info(mgid) diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 97215f289..90de7de3a 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor):      @property      def _have_to_download_any_subtitles(self):          return any([self._downloader.params.get('writesubtitles', False), -                    self._downloader.params.get('writeautomaticsub'), -                    self._downloader.params.get('allsubtitles', False)]) +                    self._downloader.params.get('writeautomaticsub')])      def _list_available_subtitles(self, video_id, webpage=None):          """ outputs the available subtitles for the video """ @@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor):          available_subs_list = {}          if self._downloader.params.get('writeautomaticsub', False):              available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) -        if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): +        if self._downloader.params.get('writesubtitles', False):              available_subs_list.update(self._get_available_subtitles(video_id))          if not available_subs_list:  # error, it didn't get the available subtitles diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index f278951ba..0bf028f61 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):              {                  'format': fnode.text,                  'url': video_url_template % fnode.text, +                'ext': fnode.text.partition('-')[0]              }              for fnode in format_doc.findall('./formats/format') @@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):          }          # TODO: Remove when #980 has been merged -        info['url'] = formats[-1]['url'] -        info['ext'] = formats[-1]['format'].partition('-')[0] +        info.update(formats[-1])          return info diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py new file mode 100644 index 000000000..6b93afa50 --- /dev/null +++ b/youtube_dl/extractor/vice.py @@ -0,0 +1,38 @@ +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE +from ..utils import ExtractorError + + +class ViceIE(InfoExtractor): +    _VALID_URL = r'http://www.vice.com/.*?/(?P<name>.+)' + +    _TEST = { +        u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', +        u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', +        u'info_dict': { +            u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', +        }, +        u'params': { +            # Requires ffmpeg (m3u8 manifest) +            u'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        name = mobj.group('name') +        webpage = self._download_webpage(url, name) +        try: +            ooyala_url = self._og_search_video_url(webpage) +        except ExtractorError: +            try: +                embed_code = self._search_regex( +                    r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, +                    u'ooyala embed code') +                ooyala_url = OoyalaIE._url_for_embed_code(embed_code) +            except ExtractorError: +                raise ExtractorError(u'The page doesn\'t contain a video', expected=True) +        return self.url_result(ooyala_url, ie='Ooyala') + diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 88b8b6be0..361619694 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -11,8 +11,8 @@ from ..utils import (  class XHamsterIE(InfoExtractor):      """Information Extractor for xHamster""" -    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html' -    _TEST = { +    _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' +    _TESTS = [{          u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',          u'file': u'1509445.flv',          u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa', @@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):              u"uploader_id": u"Ruseful2011",               u"title": u"FemaleAgent Shy beauty takes the bait"          } -    } +    }, +    { +        u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', +        u'file': u'2221348.flv', +        u'md5': u'e767b9475de189320f691f49c679c4c7', +        u'info_dict': { +            u"upload_date": u"20130914",  +            u"uploader_id": u"jojo747400",  +            u"title": u"Britney Spears  Sexy Booty" +        } +    }]      def _real_extract(self,url):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id') -        mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id +        seo = mobj.group('seo') +        mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)          webpage = self._download_webpage(mrss_url, video_id)          mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f49665925..23a8097c5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                       (                           (?:https?://)?                                       # http(s):// (optional)                           (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| -                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains +                            tube\.majestyc\.net/| +                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls                           (?:                                                  # the various things that can precede the ID:                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ @@ -428,7 +429,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          elif len(s) == 86:              return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]          elif len(s) == 85: -            return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1] +            return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]          elif len(s) == 84:              return s[81:36:-1] + s[0] + s[35:2:-1]          elif len(s) == 83: @@ -782,10 +783,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                          if self._downloader.params.get('verbose'):                              s = url_data['s'][0]                              if age_gate: -                                player_version = self._search_regex(r'ad3-(.+?)\.swf', -                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', -                                    'flash player', fatal=False) -                                player = 'flash player %s' % player_version +                                player = 'flash player'                              else:                                  player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,                                      'html5 player', fatal=False) @@ -1007,6 +1005,9 @@ class YoutubeUserIE(InfoExtractor):                  response = json.loads(page)              except ValueError as err:                  raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) +            if 'entry' not in response['feed']: +                # Number of videos is a multiple of self._MAX_RESULTS +                break              # Extract video identifiers              ids_in_page = [] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 768c6207d..814a9b6be 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -700,7 +700,16 @@ def unified_strdate(date_str):      date_str = date_str.replace(',',' ')      # %z (UTC offset) is only supported in python>=3.2      date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) -    format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M'] +    format_expressions = [ +        '%d %B %Y', +        '%B %d %Y', +        '%b %d %Y', +        '%Y-%m-%d', +        '%d/%m/%Y', +        '%Y/%m/%d %H:%M:%S', +        '%d.%m.%Y %H:%M', +        '%Y-%m-%dT%H:%M:%SZ', +    ]      for expression in format_expressions:          try:              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -781,6 +790,18 @@ def platform_name():      return res +def write_string(s, out=None): +    if out is None: +        out = sys.stderr +    assert type(s) == type(u'') + +    if ('b' in getattr(out, 'mode', '') or +            sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr +        s = s.encode(preferredencoding(), 'ignore') +    out.write(s) +    out.flush() + +  def bytes_to_intlist(bs):      if not bs:          return [] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3b2505c77..80ccfbd4f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.09.12' +__version__ = '2013.09.17' | 
