diff options
79 files changed, 2202 insertions, 1017 deletions
diff --git a/.gitignore b/.gitignore index e44977ca3..86312d4e4 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ updates_key.pem  *.swp  test/testdata  .tox +youtube-dl.zsh diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 000000000..a574949b3 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,82 @@ +Ricardo Garcia Gonzalez +Danny Colligan +Benjamin Johnson +Vasyl' Vavrychuk +Witold Baryluk +Paweł Paprota +Gergely Imreh +Rogério Brito +Philipp Hagemeister +Sören Schulze +Kevin Ngo +Ori Avtalion +shizeeg +Filippo Valsorda +Christian Albrecht +Dave Vasilevsky +Jaime Marquínez Ferrándiz +Jeff Crouse +Osama Khalid +Michael Walter +M. Yasoob Ullah Khalid +Julien Fraichard +Johny Mo Swag +Axel Noack +Albert Kim +Pierre Rudloff +Huarong Huo +Ismael Mejía +Steffan 'Ruirize' James +Andras Elso +Jelle van der Waa +Marcin Cieślak +Anton Larionov +Takuya Tsuchida +Sergey M. +Michael Orlitzky +Chris Gahan +Saimadhav Heblikar +Mike Col +Oleg Prutz +pulpe +Andreas Schmitz +Michael Kaiser +Niklas Laxström +David Triendl +Anthony Weems +David Wagner +Juan C. Olivares +Mattias Harrysson +phaer +Sainyam Kapoor +Nicolas Évrard +Jason Normore +Hoje Lee +Adam Thalhammer +Georg Jähnig +Ralf Haring +Koki Takahashi +Ariset Llerena +Adam Malcontenti-Wilson +Tobias Bell +Naglis Jonaitis +Charles Chen +Hassaan Ali +Dobrosław Żybort +David Fabijan +Sebastian Haas +Alexander Kirk +Erik Johnson +Keith Beckman +Ole Ernst +Aaron McDaniel (mcd1992) +Magnus Kolstad +Hari Padmanaban +Carlos Ramos +5moufl +lenaten +Dennis Scheiba +Damon Timm +winwon +Xavier Beynon +Gabriel Schubiner
\ No newline at end of file @@ -1,7 +1,7 @@ -all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish +all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish  clean: -	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish +	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part  cleanall: clean  	rm -f youtube-dl youtube-dl.exe @@ -9,6 +9,7 @@ cleanall: clean  PREFIX ?= /usr/local  BINDIR ?= $(PREFIX)/bin  MANDIR ?= $(PREFIX)/man +SHAREDIR ?= $(PREFIX)/share  PYTHON ?= /usr/bin/env python  # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local @@ -22,13 +23,15 @@ else  	endif  endif -install: youtube-dl youtube-dl.1 youtube-dl.bash-completion +install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish  	install -d $(DESTDIR)$(BINDIR)  	install -m 755 youtube-dl $(DESTDIR)$(BINDIR)  	install -d $(DESTDIR)$(MANDIR)/man1  	install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1  	install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d  	install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl +	install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions +	install -m 644 youtube-dl.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_youtube-dl  	install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions  	install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish @@ -38,7 +41,7 @@ test:  tar: youtube-dl.tar.gz -.PHONY: all clean install test tar bash-completion pypi-files fish-completion +.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion  pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish @@ -66,12 +69,17 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co  bash-completion: youtube-dl.bash-completion +youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in +	python devscripts/zsh-completion.py + +zsh-completion: youtube-dl.zsh +  youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in  	python devscripts/fish-completion.py  fish-completion: youtube-dl.fish -youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish  	@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \  		--exclude '*.DS_Store' \  		--exclude '*.kate-swp' \ @@ -86,5 +94,5 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-  		bin devscripts test youtube_dl docs \  		LICENSE README.md README.txt \  		Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ -		youtube-dl.fish setup.py \ +		youtube-dl.zsh youtube-dl.fish setup.py \  		youtube-dl @@ -69,6 +69,8 @@ which means you can modify it, redistribute it or use it however you like.                                       configuration in ~/.config/youtube-dl.conf                                       (%APPDATA%/youtube-dl/config.txt on                                       Windows) +    --flat-playlist                  Do not extract the videos of a playlist, +                                     only list them.  ## Video Selection:      --playlist-start NUMBER          playlist video to start at (default is 1) @@ -99,8 +101,6 @@ which means you can modify it, redistribute it or use it however you like.                                       downloaded videos in it.      --include-ads                    Download advertisements as well                                       (experimental) -    --youtube-include-dash-manifest  Try to download the DASH manifest on -                                     YouTube videos (experimental)  ## Download Options:      -r, --rate-limit LIMIT           maximum download rate in bytes per second @@ -158,7 +158,8 @@ which means you can modify it, redistribute it or use it however you like.                                       downloads if possible.      --no-continue                    do not resume partially downloaded files                                       (restart from beginning) -    --no-part                        do not use .part files +    --no-part                        do not use .part files - write directly +                                     into output file      --no-mtime                       do not use the Last-modified header to set                                       the file modification time      --write-description              write video description to a .description @@ -198,6 +199,10 @@ which means you can modify it, redistribute it or use it however you like.      -j, --dump-json                  simulate, quiet but print JSON information.                                       See --output for a description of available                                       keys. +    -J, --dump-single-json           simulate, quiet but print JSON information +                                     for each command-line argument. If the URL +                                     refers to a playlist, dump the whole +                                     playlist information in a single line.      --newline                        output progress bar as new lines      --no-progress                    do not print progress bar      --console-title                  display progress in console titlebar @@ -216,7 +221,7 @@ which means you can modify it, redistribute it or use it however you like.                                       information about the video. (Currently                                       supported only for YouTube)      --user-agent UA                  specify a custom user agent -    --referer REF                    specify a custom referer, use if the video +    --referer URL                    specify a custom referer, use if the video                                       access is restricted to one domain      --add-header FIELD:VALUE         specify a custom HTTP header and its value,                                       separated by a colon ':'. You can use this @@ -241,6 +246,8 @@ which means you can modify it, redistribute it or use it however you like.                                       one is requested      --max-quality FORMAT             highest quality format to download      -F, --list-formats               list all available formats +    --youtube-skip-dash-manifest     Do not download the DASH manifest on +                                     YouTube videos  ## Subtitle Options:      --write-sub                      write subtitle file @@ -256,7 +263,7 @@ which means you can modify it, redistribute it or use it however you like.                                       language tags like 'en,pt'  ## Authentication Options: -    -u, --username USERNAME          account username +    -u, --username USERNAME          login with this account ID      -p, --password PASSWORD          account password      -2, --twofactor TWOFACTOR        two-factor auth code      -n, --netrc                      use .netrc authentication data @@ -267,7 +274,7 @@ which means you can modify it, redistribute it or use it however you like.                                       (requires ffmpeg or avconv and ffprobe or                                       avprobe)      --audio-format FORMAT            "best", "aac", "vorbis", "mp3", "m4a", -                                     "opus", or "wav"; best by default +                                     "opus", or "wav"; "best" by default      --audio-quality QUALITY          ffmpeg/avconv audio quality specification,                                       insert a value between 0 (better) and 9                                       (worse) for VBR or a specific bitrate like diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in new file mode 100644 index 000000000..b394a1ae7 --- /dev/null +++ b/devscripts/zsh-completion.in @@ -0,0 +1,28 @@ +#compdef youtube-dl + +__youtube_dl() { +    local curcontext="$curcontext" fileopts diropts cur prev +    typeset -A opt_args +    fileopts="{{fileopts}}" +    diropts="{{diropts}}" +    cur=$words[CURRENT] +    case $cur in +        :) +            _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' +        ;; +        *) +            prev=$words[CURRENT-1] +            if [[ ${prev} =~ ${fileopts} ]]; then +                _path_files +            elif [[ ${prev} =~ ${diropts} ]]; then +                _path_files -/ +            elif [[ ${prev} == "--recode-video" ]]; then +                _arguments '*: :(mp4 flv ogg webm mkv)' +            else +                _arguments '*: :({{flags}})' +            fi +        ;; +    esac +} + +__youtube_dl
\ No newline at end of file diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py new file mode 100755 index 000000000..e8d71928a --- /dev/null +++ b/devscripts/zsh-completion.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +import os +from os.path import dirname as dirn +import sys + +sys.path.append(dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl + +ZSH_COMPLETION_FILE = "youtube-dl.zsh" +ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" + + +def build_completion(opt_parser): +    opts = [opt for group in opt_parser.option_groups +            for opt in group.option_list] +    opts_file = [opt for opt in opts if opt.metavar == "FILE"] +    opts_dir = [opt for opt in opts if opt.metavar == "DIR"] + +    fileopts = [] +    for opt in opts_file: +        if opt._short_opts: +            fileopts.extend(opt._short_opts) +        if opt._long_opts: +            fileopts.extend(opt._long_opts) + +    diropts = [] +    for opt in opts_dir: +        if opt._short_opts: +            diropts.extend(opt._short_opts) +        if opt._long_opts: +            diropts.extend(opt._long_opts) + +    flags = [opt.get_opt_string() for opt in opts] + +    with open(ZSH_COMPLETION_TEMPLATE) as f: +        template = f.read() + +    template = template.replace("{{fileopts}}", "|".join(fileopts)) +    template = template.replace("{{diropts}}", "|".join(diropts)) +    template = template.replace("{{flags}}", " ".join(flags)) + +    with open(ZSH_COMPLETION_FILE, "w") as f: +        f.write(template) + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff --git a/test/helper.py b/test/helper.py index 62cb3ce02..fb8618120 100644 --- a/test/helper.py +++ b/test/helper.py @@ -145,7 +145,7 @@ def expect_info_dict(self, expected_dict, got_dict):          info_dict_str = ''.join(              '    %s: %s,\n' % (_repr(k), _repr(v))              for k, v in test_info_dict.items()) -        write_string('\n"info_dict": {' + info_dict_str + '}\n', out=sys.stderr) +        write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr)          self.assertFalse(              missing_keys,              'Missing keys in test definition: %s' % ( @@ -171,3 +171,13 @@ def assertGreaterEqual(self, got, expected, msg=None):          if msg is None:              msg = '%r not greater than or equal to %r' % (got, expected)          self.assertTrue(got >= expected, msg) + + +def expect_warnings(ydl, warnings_re): +    real_warning = ydl.report_warning + +    def _report_warning(w): +        if not any(re.search(w_re, w) for w_re in warnings_re): +            real_warning(w) + +    ydl.report_warning = _report_warning diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 84b05da39..965e5d8a5 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -14,7 +14,7 @@ from test.helper import gettestcases  from youtube_dl.extractor import (      FacebookIE,      gen_extractors, -    JustinTVIE, +    TwitchIE,      YoutubeIE,  ) @@ -72,21 +72,17 @@ class TestAllURLsMatching(unittest.TestCase):          self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])          self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) -    def test_justin_tv_channelid_matching(self): -        self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv')) -        self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/')) -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/')) - -    def test_justintv_videoid_matching(self): -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483')) - -    def test_justin_tv_chapterid_matching(self): -        self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361')) +    def test_twitch_channelid_matching(self): +        self.assertTrue(TwitchIE.suitable('twitch.tv/vanillatv')) +        self.assertTrue(TwitchIE.suitable('www.twitch.tv/vanillatv')) +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv')) +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/')) + +    def test_twitch_videoid_matching(self): +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/vanillatv/b/328087483')) + +    def test_twitch_chapterid_matching(self): +        self.assertTrue(TwitchIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361'))      def test_youtube_extract(self):          assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) diff --git a/test/test_download.py b/test/test_download.py index 8178015ea..75e0bb289 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  from test.helper import (      assertGreaterEqual, +    expect_warnings,      get_params,      gettestcases,      expect_info_dict, @@ -100,6 +101,7 @@ def generator(test_case):              if status['status'] == 'finished':                  finished_hook_called.add(status['filename'])          ydl.add_progress_hook(_hook) +        expect_warnings(ydl, test_case.get('expected_warnings', []))          def get_tc_filename(tc):              return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) @@ -183,7 +185,9 @@ def generator(test_case):                          md5_for_file = _file_md5(tc_filename)                          self.assertEqual(md5_for_file, tc['md5'])                  info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' -                self.assertTrue(os.path.exists(info_json_fn)) +                self.assertTrue( +                    os.path.exists(info_json_fn), +                    'Missing info file %s' % info_json_fn)                  with io.open(info_json_fn, encoding='utf-8') as infof:                      info_dict = json.load(infof) diff --git a/test/test_utils.py b/test/test_utils.py index bcca0efea..19f9fce20 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -45,6 +45,9 @@ from youtube_dl.utils import (      escape_rfc3986,      escape_url,      js_to_json, +    get_filesystem_encoding, +    compat_getenv, +    compat_expanduser,  ) @@ -355,5 +358,15 @@ class TestUtil(unittest.TestCase):          on = js_to_json('{"abc": true}')          self.assertEqual(json.loads(on), {'abc': True}) +    def test_compat_getenv(self): +        test_str = 'тест' +        os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding()) +        self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + +    def test_compat_expanduser(self): +        test_str = 'C:\Documents and Settings\тест\Application Data' +        os.environ['HOME'] = test_str.encode(get_filesystem_encoding()) +        self.assertEqual(compat_expanduser('~'), test_str) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dec0e20e7..28dcc0195 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -24,6 +24,7 @@ if os.name == 'nt':  from .utils import (      compat_cookiejar, +    compat_expanduser,      compat_http_client,      compat_str,      compat_urllib_error, @@ -61,7 +62,7 @@ from .utils import (  from .cache import Cache  from .extractor import get_info_extractor, gen_extractors  from .downloader import get_suitable_downloader -from .postprocessor import FFmpegMergerPP +from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor  from .version import __version__ @@ -107,6 +108,8 @@ class YoutubeDL(object):      forcefilename:     Force printing final filename.      forceduration:     Force printing duration.      forcejson:         Force printing info_dict as JSON. +    dump_single_json:  Force printing the info_dict of the whole playlist +                       (or video) as a single JSON line.      simulate:          Do not download the video files.      format:            Video format code.      format_limit:      Highest quality format to try. @@ -165,6 +168,8 @@ class YoutubeDL(object):                         'auto' for elaborate guessing      encoding:          Use this encoding instead of the system-specified.      extract_flat:      Do not resolve URLs, return the immediate result. +                       Pass in 'in_playlist' to only show this behavior for +                       playlist items.      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -447,7 +452,7 @@ class YoutubeDL(object):              template_dict = collections.defaultdict(lambda: 'NA', template_dict)              outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) -            tmpl = os.path.expanduser(outtmpl) +            tmpl = compat_expanduser(outtmpl)              filename = tmpl % template_dict              return filename          except ValueError as err: @@ -568,8 +573,12 @@ class YoutubeDL(object):          result_type = ie_result.get('_type', 'video') -        if self.params.get('extract_flat', False): -            if result_type in ('url', 'url_transparent'): +        if result_type in ('url', 'url_transparent'): +            extract_flat = self.params.get('extract_flat', False) +            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or +                    extract_flat is True): +                if self.params.get('forcejson', False): +                    self.to_stdout(json.dumps(ie_result))                  return ie_result          if result_type == 'video': @@ -897,6 +906,8 @@ class YoutubeDL(object):          if self.params.get('forcejson', False):              info_dict['_filename'] = filename              self.to_stdout(json.dumps(info_dict)) +        if self.params.get('dump_single_json', False): +            info_dict['_filename'] = filename          # Do nothing else if in simulate mode          if self.params.get('simulate', False): @@ -1015,7 +1026,7 @@ class YoutubeDL(object):                          downloaded = []                          success = True                          merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) -                        if not merger._get_executable(): +                        if not merger._executable:                              postprocessors = []                              self.report_warning('You have requested multiple '                                  'formats but ffmpeg or avconv are not installed.' @@ -1064,12 +1075,15 @@ class YoutubeDL(object):          for url in url_list:              try:                  #It also downloads the videos -                self.extract_info(url) +                res = self.extract_info(url)              except UnavailableVideoError:                  self.report_error('unable to download video')              except MaxDownloadsReached:                  self.to_screen('[info] Maximum number of downloaded files reached.')                  raise +            else: +                if self.params.get('dump_single_json', False): +                    self.to_stdout(json.dumps(res))          return self._download_retcode @@ -1297,8 +1311,18 @@ class YoutubeDL(object):                  sys.exc_clear()              except:                  pass -        self._write_string('[debug] Python version %s - %s' % -                     (platform.python_version(), platform_name()) + '\n') +        self._write_string('[debug] Python version %s - %s\n' % ( +            platform.python_version(), platform_name())) + +        exe_versions = FFmpegPostProcessor.get_versions() +        exe_str = ', '.join( +            '%s %s' % (exe, v) +            for exe, v in sorted(exe_versions.items()) +            if v +        ) +        if not exe_str: +            exe_str = 'none' +        self._write_string('[debug] exe versions: %s\n' % exe_str)          proxy_map = {}          for handler in self._opener.handlers: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7f2b4dfcc..7dc971884 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -1,86 +1,6 @@  #!/usr/bin/env python  # -*- coding: utf-8 -*- -__authors__  = ( -    'Ricardo Garcia Gonzalez', -    'Danny Colligan', -    'Benjamin Johnson', -    'Vasyl\' Vavrychuk', -    'Witold Baryluk', -    'Paweł Paprota', -    'Gergely Imreh', -    'Rogério Brito', -    'Philipp Hagemeister', -    'Sören Schulze', -    'Kevin Ngo', -    'Ori Avtalion', -    'shizeeg', -    'Filippo Valsorda', -    'Christian Albrecht', -    'Dave Vasilevsky', -    'Jaime Marquínez Ferrándiz', -    'Jeff Crouse', -    'Osama Khalid', -    'Michael Walter', -    'M. Yasoob Ullah Khalid', -    'Julien Fraichard', -    'Johny Mo Swag', -    'Axel Noack', -    'Albert Kim', -    'Pierre Rudloff', -    'Huarong Huo', -    'Ismael Mejía', -    'Steffan \'Ruirize\' James', -    'Andras Elso', -    'Jelle van der Waa', -    'Marcin Cieślak', -    'Anton Larionov', -    'Takuya Tsuchida', -    'Sergey M.', -    'Michael Orlitzky', -    'Chris Gahan', -    'Saimadhav Heblikar', -    'Mike Col', -    'Oleg Prutz', -    'pulpe', -    'Andreas Schmitz', -    'Michael Kaiser', -    'Niklas Laxström', -    'David Triendl', -    'Anthony Weems', -    'David Wagner', -    'Juan C. Olivares', -    'Mattias Harrysson', -    'phaer', -    'Sainyam Kapoor', -    'Nicolas Évrard', -    'Jason Normore', -    'Hoje Lee', -    'Adam Thalhammer', -    'Georg Jähnig', -    'Ralf Haring', -    'Koki Takahashi', -    'Ariset Llerena', -    'Adam Malcontenti-Wilson', -    'Tobias Bell', -    'Naglis Jonaitis', -    'Charles Chen', -    'Hassaan Ali', -    'Dobrosław Żybort', -    'David Fabijan', -    'Sebastian Haas', -    'Alexander Kirk', -    'Erik Johnson', -    'Keith Beckman', -    'Ole Ernst', -    'Aaron McDaniel (mcd1992)', -    'Magnus Kolstad', -    'Hari Padmanaban', -    'Carlos Ramos', -    '5moufl', -    'lenaten', -) -  __license__ = 'Public Domain'  import codecs @@ -94,6 +14,7 @@ from .options import (      parseOpts,  )  from .utils import ( +    compat_expanduser,      compat_getpass,      compat_print,      DateRange, @@ -255,8 +176,6 @@ def _real_main(argv=None):          date = DateRange.day(opts.date)      else:          date = DateRange(opts.dateafter, opts.datebefore) -    if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search: -        parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')      # Do not download videos when there are audio-only formats      if opts.extractaudio and not opts.keepvideo and opts.format is None: @@ -284,8 +203,8 @@ def _real_main(argv=None):                       u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'                       u' template'.format(outtmpl)) -    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson -    download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive +    any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json +    download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive      ydl_opts = {          'usenetrc': opts.usenetrc, @@ -304,8 +223,9 @@ def _real_main(argv=None):          'forcefilename': opts.getfilename,          'forceformat': opts.getformat,          'forcejson': opts.dumpjson, -        'simulate': opts.simulate, -        'skip_download': (opts.skip_download or opts.simulate or any_printing), +        'dump_single_json': opts.dump_single_json, +        'simulate': opts.simulate or any_printing, +        'skip_download': opts.skip_download,          'format': opts.format,          'format_limit': opts.format_limit,          'listformats': opts.listformats, @@ -369,6 +289,7 @@ def _real_main(argv=None):          'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,          'encoding': opts.encoding,          'exec_cmd': opts.exec_cmd, +        'extract_flat': opts.extract_flat,      }      with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py index 79ff09f78..ac5925d32 100644 --- a/youtube_dl/cache.py +++ b/youtube_dl/cache.py @@ -9,6 +9,7 @@ import shutil  import traceback  from .utils import ( +    compat_expanduser,      write_json_file,  ) @@ -22,7 +23,7 @@ class Cache(object):          if res is None:              cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')              res = os.path.join(cache_root, 'youtube-dl') -        return os.path.expanduser(res) +        return compat_expanduser(res)      def _get_cache_fn(self, section, key, dtype):          assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dd770fdf1..17ab49283 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -20,12 +20,14 @@ from .arte import (      ArteTVDDCIE,      ArteTVEmbedIE,  ) +from .audiomack import AudiomackIE  from .auengine import AUEngineIE  from .bambuser import BambuserIE, BambuserChannelIE  from .bandcamp import BandcampIE, BandcampAlbumIE  from .bbccouk import BBCCoUkIE  from .beeg import BeegIE  from .behindkink import BehindKinkIE +from .bild import BildIE  from .bilibili import BiliBiliIE  from .blinkx import BlinkxIE  from .bliptv import BlipTVIE, BlipTVUserIE @@ -60,7 +62,10 @@ from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE  from .condenast import CondeNastIE  from .cracked import CrackedIE  from .criterion import CriterionIE -from .crunchyroll import CrunchyrollIE +from .crunchyroll import ( +    CrunchyrollIE, +    CrunchyrollShowPlaylistIE +)  from .cspan import CSpanIE  from .d8 import D8IE  from .dailymotion import ( @@ -134,6 +139,7 @@ from .gamestar import GameStarIE  from .gametrailers import GametrailersIE  from .gdcvault import GDCVaultIE  from .generic import GenericIE +from .glide import GlideIE  from .globo import GloboIE  from .godtube import GodTubeIE  from .golem import GolemIE @@ -173,7 +179,6 @@ from .jadorecettepub import JadoreCettePubIE  from .jeuxvideo import JeuxVideoIE  from .jove import JoveIE  from .jukebox import JukeboxIE -from .justintv import JustinTVIE  from .jpopsukitv import JpopsukiIE  from .kankan import KankanIE  from .keezmovies import KeezMoviesIE @@ -316,6 +321,7 @@ from .sbs import SBSIE  from .scivee import SciVeeIE  from .screencast import ScreencastIE  from .servingsys import ServingSysIE +from .sexykarma import SexyKarmaIE  from .shared import SharedIE  from .sharesix import ShareSixIE  from .sina import SinaIE @@ -349,6 +355,7 @@ from .spike import SpikeIE  from .sport5 import Sport5IE  from .sportbox import SportBoxIE  from .sportdeutschland import SportDeutschlandIE +from .srmediathek import SRMediathekIE  from .stanfordoc import StanfordOpenClassroomIE  from .steam import SteamIE  from .streamcloud import StreamcloudIE @@ -367,10 +374,12 @@ from .teachingchannel import TeachingChannelIE  from .teamcoco import TeamcocoIE  from .techtalks import TechTalksIE  from .ted import TEDIE +from .telecinco import TelecincoIE  from .telemb import TeleMBIE  from .tenplay import TenPlayIE  from .testurl import TestURLIE  from .tf1 import TF1IE +from .theonion import TheOnionIE  from .theplatform import ThePlatformIE  from .thesixtyone import TheSixtyOneIE  from .thisav import ThisAVIE @@ -394,6 +403,7 @@ from .tutv import TutvIE  from .tvigle import TvigleIE  from .tvp import TvpIE  from .tvplay import TVPlayIE +from .twitch import TwitchIE  from .ubu import UbuIE  from .udemy import (      UdemyIE, @@ -419,6 +429,7 @@ from .videopremium import VideoPremiumIE  from .videott import VideoTtIE  from .videoweed import VideoWeedIE  from .vidme import VidmeIE +from .vidzi import VidziIE  from .vimeo import (      VimeoIE,      VimeoAlbumIE, @@ -438,6 +449,7 @@ from .viki import VikiIE  from .vk import VKIE  from .vodlocker import VodlockerIE  from .vporn import VpornIE +from .vrt import VRTIE  from .vube import VubeIE  from .vuclip import VuClipIE  from .vulture import VultureIE @@ -487,10 +499,8 @@ from .youtube import (      YoutubeUserIE,      YoutubeWatchLaterIE,  ) -  from .zdf import ZDFIE -  _ALL_CLASSES = [      klass      for name, klass in globals().items() diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index c3d02f85e..b9a9440c0 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -10,8 +10,8 @@ from ..utils import (      unified_strdate,      determine_ext,      get_element_by_id, -    compat_str,      get_element_by_attribute, +    int_or_none,  )  # There are different sources of video in arte.tv, the extraction process  @@ -90,15 +90,24 @@ class ArteTVPlus7IE(InfoExtractor):          if not upload_date_str:              upload_date_str = player_info.get('VDA', '').split(' ')[0] +        title = player_info['VTI'].strip() +        subtitle = player_info.get('VSU', '').strip() +        if subtitle: +            title += ' - %s' % subtitle +          info_dict = {              'id': player_info['VID'], -            'title': player_info['VTI'], +            'title': title,              'description': player_info.get('VDE'),              'upload_date': unified_strdate(upload_date_str),              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),          } -        all_formats = player_info['VSR'].values() +        all_formats = [] +        for format_id, format_dict in player_info['VSR'].items(): +            fmt = dict(format_dict) +            fmt['format_id'] = format_id +            all_formats.append(fmt)          # Some formats use the m3u8 protocol          all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))          def _match_lang(f): @@ -149,22 +158,12 @@ class ArteTVPlus7IE(InfoExtractor):                  )          formats = sorted(formats, key=sort_key)          def _format(format_info): -            quality = '' -            height = format_info.get('height') -            if height is not None: -                quality = compat_str(height) -            bitrate = format_info.get('bitrate') -            if bitrate is not None: -                quality += '-%d' % bitrate -            if format_info.get('versionCode') is not None: -                format_id = '%s-%s' % (quality, format_info['versionCode']) -            else: -                format_id = quality              info = { -                'format_id': format_id, -                'format_note': format_info.get('versionLibelle'), -                'width': format_info.get('width'), -                'height': height, +                'format_id': format_info['format_id'], +                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), +                'width': int_or_none(format_info.get('width')), +                'height': int_or_none(format_info.get('height')), +                'tbr': int_or_none(format_info.get('bitrate')),              }              if format_info['mediaType'] == 'rtmp':                  info['url'] = format_info['streamer'] diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py new file mode 100644 index 000000000..6232d2cd0 --- /dev/null +++ b/youtube_dl/extractor/audiomack.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .soundcloud import SoundcloudIE +from ..utils import ExtractorError + +import time + + +class AudiomackIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)' +    IE_NAME = 'audiomack' +    _TESTS = [ +        #hosted on audiomack +        { +            'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', +            'info_dict': +            { +                'id' : 'roosh-williams/extraordinary', +                'ext': 'mp3', +                'title': 'Roosh Williams - Extraordinary' +            } +        }, +        #hosted on soundcloud via audiomack +        { +            'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare', +            'file': '172419696.mp3', +            'info_dict': +            { +                'ext': 'mp3', +                'title': 'Young Thug ft Lil Wayne - Take Kare', +                "upload_date": "20141016", +                "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n", +                "uploader": "Young Thug World" +            } +        } +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        api_response = self._download_json( +            "http://www.audiomack.com/api/music/url/song/%s?_=%d" % ( +                video_id, time.time()), +            video_id) + +        if "url" not in api_response: +            raise ExtractorError("Unable to deduce api url of song") +        realurl = api_response["url"] + +        #Audiomack wraps a lot of soundcloud tracks in their branded wrapper +        # - if so, pass the work off to the soundcloud extractor +        if SoundcloudIE.suitable(realurl): +            return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'} + +        webpage = self._download_webpage(url, video_id) +        artist = self._html_search_regex( +            r'<span class="artist">(.*?)</span>', webpage, "artist") +        songtitle = self._html_search_regex( +            r'<h1 class="profile-title song-title"><span class="artist">.*?</span>(.*?)</h1>', +            webpage, "title") +        title = artist + " - " + songtitle + +        return { +            'id': video_id, +            'title': title, +            'url': realurl, +        } diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py new file mode 100644 index 000000000..0269d1174 --- /dev/null +++ b/youtube_dl/extractor/bild.py @@ -0,0 +1,39 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class BildIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html' +    IE_DESC = 'Bild.de' +    _TEST = { +        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html', +        'md5': 'dd495cbd99f2413502a1713a1156ac8a', +        'info_dict': { +            'id': '38184146', +            'ext': 'mp4', +            'title': 'BILD hat sie getestet', +            'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg', +            'duration': 196, +            'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml" +        doc = self._download_xml(xml_url, video_id) + +        duration = int_or_none(doc.attrib.get('duration'), scale=1000) + +        return { +            'id': video_id, +            'title': doc.attrib['ueberschrift'], +            'description': doc.attrib.get('text'), +            'url': doc.attrib['src'], +            'thumbnail': doc.attrib.get('img'), +            'duration': duration, +        } diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 2e277c8c3..45ba51732 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -1,8 +1,6 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      ExtractorError, diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 294670386..ad22cbafd 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -87,6 +87,15 @@ class BrightcoveIE(InfoExtractor):                  'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',              },          }, +        { +            # playlist test +            # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players +            'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', +            'info_dict': { +                'title': 'Sealife', +            }, +            'playlist_mincount': 7, +        },      ]      @classmethod diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 496271be4..d064a28f9 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -42,7 +42,7 @@ class CinemassacreIE(InfoExtractor):          webpage = self._download_webpage(url, display_id)          video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') -        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) +        mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)          if not mobj:              raise ExtractorError('Can\'t extract embed url and video id')          playerdata_url = mobj.group('embed_url') @@ -53,17 +53,22 @@ class CinemassacreIE(InfoExtractor):          video_description = self._html_search_regex(              r'<div class="entry-content">(?P<description>.+?)</div>',              webpage, 'description', flags=re.DOTALL, fatal=False) +        video_thumbnail = self._og_search_thumbnail(webpage)          playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') -        video_thumbnail = self._search_regex( -            r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) -        sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') -        videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url') +        vidurl = self._search_regex( +            r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') +        vidid = self._search_regex( +            r'\'vidid\'\s*:\s*"([^\']+)"', playerdata, 'vidid') +        videoserver = self._html_search_regex( +            r"'videoserver'\s*:\s*'([^']+)'", playerdata, 'videoserver') + +        videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)          videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')          formats = [] -        baseurl = sd_url[:sd_url.rfind('/')+1] +        baseurl = vidurl[:vidurl.rfind('/')+1]          for video in videolist.findall('.//video'):              src = video.get('src')              if not src: diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py index d4227e6eb..2edab90a3 100644 --- a/youtube_dl/extractor/cliphunter.py +++ b/youtube_dl/extractor/cliphunter.py @@ -4,7 +4,6 @@ import json  import re  from .common import InfoExtractor -from ..utils import int_or_none  _translation_table = { @@ -39,9 +38,7 @@ class CliphunterIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          video_title = self._search_regex( diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index dae40c136..78877b1cf 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import (  class CNNIE(InfoExtractor):      _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/ -        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' +        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(-ap)?|(?=&)))'''      _TESTS = [{          'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 450c7dfd6..e1bd6bb49 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -89,6 +89,10 @@ class InfoExtractor(object):                                   format, irrespective of the file format.                                   -1 for default (order by other properties),                                   -2 or smaller for less than default. +                    * source_preference  Order number for this video source +                                  (quality takes higher priority) +                                 -1 for default (order by other properties), +                                 -2 or smaller for less than default.                      * http_referer  HTTP Referer header value to set.                      * http_method  HTTP method to use for the download.                      * http_headers  A dictionary of additional HTTP headers @@ -238,7 +242,6 @@ class InfoExtractor(object):      def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):          """ Returns a tuple (page content as string, URL handle) """ -          # Strip hashes from the URL (#1038)          if isinstance(url_or_request, (compat_str, str)):              url_or_request = url_or_request.partition('#')[0] @@ -247,6 +250,10 @@ class InfoExtractor(object):          if urlh is False:              assert not fatal              return False +        content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) +        return (content, urlh) + +    def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):          content_type = urlh.headers.get('Content-Type', '')          webpage_bytes = urlh.read()          m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) @@ -281,6 +288,12 @@ class InfoExtractor(object):              raw_filename = basen + '.dump'              filename = sanitize_filename(raw_filename, restricted=True)              self.to_screen('Saving request to ' + filename) +            # Working around MAX_PATH limitation on Windows (see +            # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) +            if os.name == 'nt': +                absfilepath = os.path.abspath(filename) +                if len(absfilepath) > 259: +                    filename = '\\\\?\\' + absfilepath              with open(filename, 'wb') as outf:                  outf.write(webpage_bytes) @@ -299,7 +312,7 @@ class InfoExtractor(object):                  msg += ' Visit %s for more details' % blocked_iframe              raise ExtractorError(msg, expected=True) -        return (content, urlh) +        return content      def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):          """ Returns the data of the page as a string """ @@ -607,12 +620,13 @@ class InfoExtractor(object):                  audio_ext_preference,                  f.get('filesize') if f.get('filesize') is not None else -1,                  f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, +                f.get('source_preference') if f.get('source_preference') is not None else -1,                  f.get('format_id'),              )          formats.sort(key=_formats_key)      def http_scheme(self): -        """ Either "https:" or "https:", depending on the user's preferences """ +        """ Either "http:" or "https:", depending on the user's preferences """          return (              'http:'              if self._downloader.params.get('prefer_insecure', False) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index ffbe4903b..7a7e79360 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -34,6 +34,8 @@ class CondeNastIE(InfoExtractor):      _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())      IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) +    EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed)/.+?' % '|'.join(_SITES.keys()) +      _TEST = {          'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',          'md5': '1921f713ed48aabd715691f774c451f7', diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f99888ecc..05b21e872 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -24,6 +24,7 @@ from ..aes import (      aes_cbc_decrypt,      inc,  ) +from .common import InfoExtractor  class CrunchyrollIE(SubtitlesInfoExtractor): @@ -39,6 +40,7 @@ class CrunchyrollIE(SubtitlesInfoExtractor):              'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',              'uploader': 'Yomiuri Telecasting Corporation (YTV)',              'upload_date': '20131013', +            'url': 're:(?!.*&)',          },          'params': {              # rtmp @@ -237,12 +239,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text              streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format              streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')              streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) -            streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format) -            video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url') -            video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path') +            streamdata = self._download_xml( +                streamdata_req, video_id, +                note='Downloading media info for %s' % video_format) +            video_url = streamdata.find('.//host').text +            video_play_path = streamdata.find('.//file').text              formats.append({                  'url': video_url, -                'play_path':   video_play_path, +                'play_path': video_play_path,                  'ext': 'flv',                  'format': video_format,                  'format_id': video_format, @@ -285,3 +289,40 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text              'subtitles':   subtitles,              'formats':     formats,          } + + +class CrunchyrollShowPlaylistIE(InfoExtractor): +    IE_NAME = "crunchyroll:playlist" +    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$' + +    _TESTS = [{ +        'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', +        'info_dict': { +            'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', +            'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi' +        }, +        'playlist_count': 13, +    }] + +    def _real_extract(self, url): +        show_id = self._match_id(url) + +        webpage = self._download_webpage(url, show_id) +        title = self._html_search_regex( +            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>', +            webpage, 'title') +        episode_paths = re.findall( +            r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"', +            webpage) +        entries = [ +            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll') +            for ep in episode_paths +        ] +        entries.reverse() + +        return { +            '_type': 'playlist', +            'id': show_id, +            'title': title, +            'entries': entries, +        } diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index c6ab6952e..3c39ca451 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -1,49 +1,48 @@  # encoding: utf-8 -import re +from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import ( -    determine_ext, -)  class FazIE(InfoExtractor): -    IE_NAME = u'faz.net' +    IE_NAME = 'faz.net'      _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'      _TEST = { -        u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', -        u'file': u'12610585.mp4', -        u'info_dict': { -            u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', -            u'description': u'md5:1453fbf9a0d041d985a47306192ea253', +        'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', +        'info_dict': { +            'id': '12610585', +            'ext': 'mp4', +            'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', +            'description': 'md5:1453fbf9a0d041d985a47306192ea253',          },      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        self.to_screen(video_id) +        video_id = self._match_id(url) +          webpage = self._download_webpage(url, video_id) -        config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, -            u'config xml url') -        config = self._download_xml(config_xml_url, video_id, -            u'Downloading config xml') +        config_xml_url = self._search_regex( +            r'writeFLV\(\'(.+?)\',', webpage, 'config xml url') +        config = self._download_xml( +            config_xml_url, video_id, 'Downloading config xml')          encodings = config.find('ENCODINGS')          formats = [] -        for code in ['LOW', 'HIGH', 'HQ']: +        for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):              encoding = encodings.find(code)              if encoding is None:                  continue              encoding_url = encoding.find('FILENAME').text              formats.append({                  'url': encoding_url, -                'ext': determine_ext(encoding_url),                  'format_id': code.lower(), +                'quality': pref,              }) +        self._sort_formats(formats) -        descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') +        descr = self._html_search_regex( +            r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)          return {              'id': video_id,              'title': self._og_search_title(webpage), diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 0b3374d97..35d7d15e1 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -46,7 +46,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                          f4m_format['preference'] = 1                      formats.extend(f4m_formats)              elif video_url.endswith('.m3u8'): -                formats.extend(self._extract_m3u8_formats(video_url, video_id)) +                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))              elif video_url.startswith('rtmp'):                  formats.append({                      'url': video_url, @@ -58,7 +58,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):                  formats.append({                      'url': video_url,                      'format_id': format_id, -                    'preference': 2, +                    'preference': -1,                  })          self._sort_formats(formats) @@ -93,7 +93,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):      _TESTS = [{          'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', -        'md5': '9cecf35f99c4079c199e9817882a9a1c',          'info_dict': {              'id': '84981923',              'ext': 'flv', diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index d966e8403..ec6d96ada 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -37,7 +37,7 @@ class FunnyOrDieIE(InfoExtractor):          video_id = mobj.group('id')          webpage = self._download_webpage(url, video_id) -        links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage) +        links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)          if not links:              raise ExtractorError('No media links available for %s' % video_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dfc2ef4e7..35a7664b2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -28,6 +28,7 @@ from .brightcove import BrightcoveIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE  from .smotri import SmotriIE +from .condenast import CondeNastIE  class GenericIE(InfoExtractor): @@ -324,7 +325,7 @@ class GenericIE(InfoExtractor):                  'ext': 'mp4',                  'age_limit': 18,                  'uploader': 'www.handjobhub.com', -                'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub', +                'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',              }          },          # RSS feed @@ -379,6 +380,32 @@ class GenericIE(InfoExtractor):                  'uploader': 'education-portal.com',              },          }, +        { +            'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', +            'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', +            'info_dict': { +                'id': 'uxjb0lwrcz', +                'ext': 'mp4', +                'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', +                'duration': 1715.0, +                'uploader': 'thoughtworks.wistia.com', +            }, +        }, +        # Direct download with broken HEAD +        { +            'url': 'http://ai-radio.org:8000/radio.opus', +            'info_dict': { +                'id': 'radio', +                'ext': 'opus', +                'title': 'radio', +            }, +            'params': { +                'skip_download': True,  # infinite live stream +            }, +            'expected_warnings': [ +                r'501.*Not Implemented' +            ], +        }      ]      def report_following_redirect(self, new_url): @@ -475,7 +502,8 @@ class GenericIE(InfoExtractor):                       'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'                      ) % (url, url), expected=True)              else: -                assert ':' in default_search +                if ':' not in default_search: +                    default_search += ':'                  return self.url_result(default_search + url)          url, smuggled_data = unsmuggle_url(url) @@ -490,14 +518,14 @@ class GenericIE(InfoExtractor):          self.to_screen('%s: Requesting header' % video_id)          head_req = HEADRequest(url) -        response = self._request_webpage( +        head_response = self._request_webpage(              head_req, video_id,              note=False, errnote='Could not send HEAD request to %s' % url,              fatal=False) -        if response is not False: +        if head_response is not False:              # Check for redirect -            new_url = response.geturl() +            new_url = head_response.geturl()              if url != new_url:                  self.report_following_redirect(new_url)                  if force_videoid: @@ -505,34 +533,35 @@ class GenericIE(InfoExtractor):                          new_url, {'force_videoid': force_videoid})                  return self.url_result(new_url) -            # Check for direct link to a video -            content_type = response.headers.get('Content-Type', '') -            m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type) -            if m: -                upload_date = response.headers.get('Last-Modified') -                if upload_date: -                    upload_date = unified_strdate(upload_date) -                return { -                    'id': video_id, -                    'title': os.path.splitext(url_basename(url))[0], -                    'formats': [{ -                        'format_id': m.group('format_id'), -                        'url': url, -                        'vcodec': 'none' if m.group('type') == 'audio' else None -                    }], -                    'upload_date': upload_date, -                } +        full_response = None +        if head_response is False: +            full_response = self._request_webpage(url, video_id) +            head_response = full_response + +        # Check for direct link to a video +        content_type = head_response.headers.get('Content-Type', '') +        m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type) +        if m: +            upload_date = unified_strdate( +                head_response.headers.get('Last-Modified')) +            return { +                'id': video_id, +                'title': os.path.splitext(url_basename(url))[0], +                'formats': [{ +                    'format_id': m.group('format_id'), +                    'url': url, +                    'vcodec': 'none' if m.group('type') == 'audio' else None +                }], +                'upload_date': upload_date, +            }          if not self._downloader.params.get('test', False) and not is_intentional:              self._downloader.report_warning('Falling back on generic information extractor.') -        try: +        if full_response: +            webpage = self._webpage_read_content(full_response, url, video_id) +        else:              webpage = self._download_webpage(url, video_id) -        except ValueError: -            # since this is the last-resort InfoExtractor, if -            # this error is thrown, it'll be thrown here -            raise ExtractorError('Failed to download URL: %s' % url) -          self.report_extraction(video_id)          # Is it an RSS feed? @@ -608,13 +637,13 @@ class GenericIE(InfoExtractor):          if mobj:              player_url = unescapeHTML(mobj.group('url'))              surl = smuggle_url(player_url, {'Referer': url}) -            return self.url_result(surl, 'Vimeo') +            return self.url_result(surl)          # Look for embedded (swf embed) Vimeo player          mobj = re.search( -            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) +            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)          if mobj: -            return self.url_result(mobj.group(1), 'Vimeo') +            return self.url_result(mobj.group(1))          # Look for embedded YouTube player          matches = re.findall(r'''(?x) @@ -622,7 +651,8 @@ class GenericIE(InfoExtractor):                  <iframe[^>]+?src=|                  data-video-url=|                  <embed[^>]+?src=| -                embedSWF\(?:\s* +                embedSWF\(?:\s*| +                new\s+SWFObject\(              )              (["\'])                  (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ @@ -651,17 +681,20 @@ class GenericIE(InfoExtractor):          # Look for embedded Wistia player          match = re.search( -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) +            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)          if match: +            embed_url = self._proto_relative_url( +                unescapeHTML(match.group('url')))              return {                  '_type': 'url_transparent', -                'url': unescapeHTML(match.group('url')), +                'url': embed_url,                  'ie_key': 'Wistia',                  'uploader': video_uploader,                  'title': video_title,                  'id': video_id,              } -        match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) +             +        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)          if match:              return {                  '_type': 'url_transparent', @@ -847,6 +880,12 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'MLB') +        mobj = re.search( +            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL, +            webpage) +        if mobj is not None: +            return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') +          def check_video(vurl):              vpath = compat_urlparse.urlparse(vurl).path              vext = determine_ext(vpath) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py new file mode 100644 index 000000000..9561ed5fb --- /dev/null +++ b/youtube_dl/extractor/glide.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class GlideIE(InfoExtractor): +    IE_DESC = 'Glide mobile video messages (glide.me)' +    _VALID_URL = r'https?://share\.glide\.me/(?P<id>[A-Za-z0-9\-=_+]+)' +    _TEST = { +        'url': 'http://share.glide.me/UZF8zlmuQbe4mr+7dCiQ0w==', +        'md5': '4466372687352851af2d131cfaa8a4c7', +        'info_dict': { +            'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', +            'ext': 'mp4', +            'title': 'Damon Timm\'s Glide message', +            'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        title = self._html_search_regex( +            r'<title>(.*?)</title>', webpage, 'title') +        video_url = self.http_scheme() + self._search_regex( +            r'<source src="(.*?)" type="video/mp4">', webpage, 'video URL') +        thumbnail_url = self._search_regex( +            r'<img id="video-thumbnail" src="(.*?)"', +            webpage, 'thumbnail url', fatal=False) +        thumbnail = ( +            thumbnail_url if thumbnail_url is None +            else self.http_scheme() + thumbnail_url) + +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 07d994b44..fcefe54cd 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -1,13 +1,11 @@  # coding: utf-8  from __future__ import unicode_literals -import datetime  import re +import codecs  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -) +from ..utils import unified_strdate  class GooglePlusIE(InfoExtractor): @@ -19,74 +17,57 @@ class GooglePlusIE(InfoExtractor):          'info_dict': {              'id': 'ZButuJc6CtH',              'ext': 'flv', +            'title': '嘆きの天使 降臨',              'upload_date': '20120613',              'uploader': '井上ヨシマサ', -            'title': '嘆きの天使 降臨',          }      }      def _real_extract(self, url): -        # Extract id from URL -        mobj = re.match(self._VALID_URL, url) - -        video_id = mobj.group('id') +        video_id = self._match_id(url)          # Step 1, Retrieve post webpage to extract further information          webpage = self._download_webpage(url, video_id, 'Downloading entry webpage') -        self.report_extraction(video_id) - -        # Extract update date -        upload_date = self._html_search_regex( +        title = self._og_search_description(webpage).splitlines()[0] +        upload_date = unified_strdate(self._html_search_regex(              r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>                      ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''', -            webpage, 'upload date', fatal=False, flags=re.VERBOSE) -        if upload_date: -            # Convert timestring to a format suitable for filename -            upload_date = datetime.datetime.strptime(upload_date, "%Y-%m-%d") -            upload_date = upload_date.strftime('%Y%m%d') - -        # Extract uploader -        uploader = self._html_search_regex(r'rel\="author".*?>(.*?)</a>', -            webpage, 'uploader', fatal=False) - -        # Extract title -        # Get the first line for title -        video_title = self._og_search_description(webpage).splitlines()[0] +            webpage, 'upload date', fatal=False, flags=re.VERBOSE)) +        uploader = self._html_search_regex( +            r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)          # Step 2, Simulate clicking the image box to launch video          DOMAIN = 'https://plus.google.com/' -        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN), +        video_page = self._search_regex( +            r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),              webpage, 'video page URL')          if not video_page.startswith(DOMAIN):              video_page = DOMAIN + video_page          webpage = self._download_webpage(video_page, video_id, 'Downloading video page') -        # Extract video links all sizes -        pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' -        mobj = re.findall(pattern, webpage) -        if len(mobj) == 0: -            raise ExtractorError('Unable to extract video links') - -        # Sort in resolution -        links = sorted(mobj) +        def unicode_escape(s): +            decoder = codecs.getdecoder('unicode_escape') +            return re.sub( +                r'\\u[0-9a-fA-F]{4,}', +                lambda m: decoder(m.group(0))[0], +                s) -        # Choose the lowest of the sort, i.e. highest resolution -        video_url = links[-1] -        # Only get the url. The resolution part in the tuple has no use anymore -        video_url = video_url[-1] -        # Treat escaped \u0026 style hex -        try: -            video_url = video_url.decode("unicode_escape") -        except AttributeError: # Python 3 -            video_url = bytes(video_url, 'ascii').decode('unicode-escape') +        # Extract video links all sizes +        formats = [{ +            'url': unicode_escape(video_url), +            'ext': 'flv', +            'width': int(width), +            'height': int(height), +        } for width, height, video_url in re.findall( +            r'\d+,(\d+),(\d+),"(https?://redirector\.googlevideo\.com.*?)"', webpage)] +        self._sort_formats(formats)          return {              'id': video_id, -            'url': video_url, +            'title': title,              'uploader': uploader,              'upload_date': upload_date, -            'title': video_title, -            'ext': 'flv', +            'formats': formats,          } diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 45cca1d24..e21e57510 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -46,9 +46,9 @@ class GorillaVidIE(InfoExtractor):          'info_dict': {              'id': '3rso4kdn6f9m',              'ext': 'mp4', -            'title': 'Micro Pig piglets ready on 16th July 2009', +            'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',              'thumbnail': 're:http://.*\.jpg', -        }, +        }      }, {          'url': 'http://movpod.in/0wguyyxi1yca',          'only_matching': True, diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index 5bdd08afa..b6cc15b6f 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -1,37 +1,33 @@  # -*- coding: utf-8 -*- - -import re -import json +from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import determine_ext +  class HarkIE(InfoExtractor): -    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+' +    _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+'      _TEST = { -        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', -        u'file': u'mmbzyhkgny.mp3', -        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee', -        u'info_dict': { -            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013", -            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.', -            u'duration': 11, +        'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', +        'md5': '6783a58491b47b92c7c1af5a77d4cbee', +        'info_dict': { +            'id': 'mmbzyhkgny', +            'ext': 'mp3', +            'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013', +            'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.', +            'duration': 11,          }      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group(1) -        json_url = "http://www.hark.com/clips/%s.json" %(video_id) -        info_json = self._download_webpage(json_url, video_id) -        info = json.loads(info_json) -        final_url = info['url'] +        video_id = self._match_id(url) +        data = self._download_json( +            'http://www.hark.com/clips/%s.json' % video_id, video_id) -        return {'id': video_id, -                'url' : final_url, -                'title': info['name'], -                'ext': determine_ext(final_url), -                'description': info['description'], -                'thumbnail': info['image_original'], -                'duration': info['duration'], -                } +        return { +            'id': video_id, +            'url': data['url'], +            'title': data['name'], +            'description': data.get('description'), +            'thumbnail': data.get('image_original'), +            'duration': data.get('duration'), +        } diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index f97b1e085..d41c0413f 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  from .common import InfoExtractor  from ..utils import (      get_meta_content, +    int_or_none,      parse_iso8601,  ) @@ -28,20 +29,26 @@ class HeiseIE(InfoExtractor):              'timestamp': 1411812600,              'upload_date': '20140927',              'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', +            'thumbnail': 're:https?://.*\.jpg$',          }      }      def _real_extract(self, url):          video_id = self._match_id(url) -          webpage = self._download_webpage(url, video_id) -        json_url = self._search_regex( -            r'json_url:\s*"([^"]+)"', webpage, 'json URL') -        config = self._download_json(json_url, video_id) + +        container_id = self._search_regex( +            r'<div class="videoplayerjw".*?data-container="([0-9]+)"', +            webpage, 'container ID') +        sequenz_id = self._search_regex( +            r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"', +            webpage, 'sequenz ID') +        data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id) +        doc = self._download_xml(data_url, video_id)          info = {              'id': video_id, -            'thumbnail': config.get('poster'), +            'thumbnail': self._og_search_thumbnail(webpage),              'timestamp': parse_iso8601(get_meta_content('date', webpage)),              'description': self._og_search_description(webpage),          } @@ -49,32 +56,19 @@ class HeiseIE(InfoExtractor):          title = get_meta_content('fulltitle', webpage)          if title:              info['title'] = title -        elif config.get('title'): -            info['title'] = config['title']          else:              info['title'] = self._og_search_title(webpage)          formats = [] -        for t, rs in config['formats'].items(): -            if not rs or not hasattr(rs, 'items'): -                self._downloader.report_warning( -                    'formats: {0}: no resolutions'.format(t)) -                continue - -            for height_str, obj in rs.items(): -                format_id = '{0}_{1}'.format(t, height_str) - -                if not obj or not obj.get('url'): -                    self._downloader.report_warning( -                        'formats: {0}: no url'.format(format_id)) -                    continue - -                formats.append({ -                    'url': obj['url'], -                    'format_id': format_id, -                    'height': self._int(height_str, 'height'), -                }) - +        for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'): +            label = source_node.attrib['label'] +            height = int_or_none(self._search_regex( +                r'^(.*?_)?([0-9]+)p$', label, 'height', default=None)) +            formats.append({ +                'url': source_node.attrib['file'], +                'format_note': label, +                'height': height, +            })          self._sort_formats(formats)          info['formats'] = formats diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index 68684b997..fccc23884 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -28,13 +28,13 @@ class HowStuffWorksIE(InfoExtractor):              }          },          { -            'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm', +            'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',              'info_dict': { -                'id': '553470', -                'display_id': 'deadliest-catch-jakes-farewell-pots', +                'id': '453464', +                'display_id': 'survival-zone-food-and-water-in-the-savanna',                  'ext': 'mp4', -                'title': 'Deadliest Catch: Jake\'s Farewell Pots', -                'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc', +                'title': 'Survival Zone: Food and Water In the Savanna', +                'description': 'md5:7e1c89f6411434970c15fa094170c371',                  'thumbnail': 're:^https?://.*\.jpg$',              },              'params': { diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py index 94e7cf790..4ccf6b9b8 100644 --- a/youtube_dl/extractor/huffpost.py +++ b/youtube_dl/extractor/huffpost.py @@ -33,8 +33,7 @@ class HuffPostIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id          data = self._download_json(api_url, video_id)['data'] diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py deleted file mode 100644 index 27017e89f..000000000 --- a/youtube_dl/extractor/justintv.py +++ /dev/null @@ -1,155 +0,0 @@ -from __future__ import unicode_literals - -import itertools -import json -import os -import re - -from .common import InfoExtractor -from ..utils import ( -    compat_str, -    ExtractorError, -    formatSeconds, -) - - -class JustinTVIE(InfoExtractor): -    """Information extractor for justin.tv and twitch.tv""" -    # TODO: One broadcast may be split into multiple videos. The key -    # 'broadcast_id' is the same for all parts, and 'broadcast_part' -    # starts at 1 and increases. Can we treat all parts as one video? - -    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/ -        (?: -            (?P<channelid>[^/]+)| -            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| -            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) -        ) -        /?(?:\#.*)?$ -        """ -    _JUSTIN_PAGE_LIMIT = 100 -    IE_NAME = 'justin.tv' -    IE_DESC = 'justin.tv and twitch.tv' -    _TEST = { -        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360', -        'md5': 'ecaa8a790c22a40770901460af191c9a', -        'info_dict': { -            'id': '296128360', -            'ext': 'flv', -            'upload_date': '20110927', -            'uploader_id': 25114803, -            'uploader': 'thegamedevhub', -            'title': 'Beginner Series - Scripting With Python Pt.1' -        } -    } - -    # Return count of items, list of *valid* items -    def _parse_page(self, url, video_id, counter): -        info_json = self._download_webpage( -            url, video_id, -            'Downloading video info JSON on page %d' % counter, -            'Unable to download video info JSON %d' % counter) - -        response = json.loads(info_json) -        if type(response) != list: -            error_text = response.get('error', 'unknown error') -            raise ExtractorError('Justin.tv API: %s' % error_text) -        info = [] -        for clip in response: -            video_url = clip['video_file_url'] -            if video_url: -                video_extension = os.path.splitext(video_url)[1][1:] -                video_date = re.sub('-', '', clip['start_time'][:10]) -                video_uploader_id = clip.get('user_id', clip.get('channel_id')) -                video_id = clip['id'] -                video_title = clip.get('title', video_id) -                info.append({ -                    'id': compat_str(video_id), -                    'url': video_url, -                    'title': video_title, -                    'uploader': clip.get('channel_name', video_uploader_id), -                    'uploader_id': video_uploader_id, -                    'upload_date': video_date, -                    'ext': video_extension, -                }) -        return (len(response), info) - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) - -        api_base = 'http://api.justin.tv' -        paged = False -        if mobj.group('channelid'): -            paged = True -            video_id = mobj.group('channelid') -            api = api_base + '/channel/archives/%s.json' % video_id -        elif mobj.group('chapterid'): -            chapter_id = mobj.group('chapterid') - -            webpage = self._download_webpage(url, chapter_id) -            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) -            if not m: -                raise ExtractorError('Cannot find archive of a chapter') -            archive_id = m.group(1) - -            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id -            doc = self._download_xml( -                api, chapter_id, -                note='Downloading chapter information', -                errnote='Chapter information download failed') -            for a in doc.findall('.//archive'): -                if archive_id == a.find('./id').text: -                    break -            else: -                raise ExtractorError('Could not find chapter in chapter information') - -            video_url = a.find('./video_file_url').text -            video_ext = video_url.rpartition('.')[2] or 'flv' - -            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id -            chapter_info = self._download_json( -                chapter_api_url, 'c' + chapter_id, -                note='Downloading chapter metadata', -                errnote='Download of chapter metadata failed') - -            bracket_start = int(doc.find('.//bracket_start').text) -            bracket_end = int(doc.find('.//bracket_end').text) - -            # TODO determine start (and probably fix up file) -            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 -            #video_url += '?start=' + TODO:start_timestamp -            # bracket_start is 13290, but we want 51670615 -            self._downloader.report_warning('Chapter detected, but we can just download the whole file. ' -                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) - -            info = { -                'id': 'c' + chapter_id, -                'url': video_url, -                'ext': video_ext, -                'title': chapter_info['title'], -                'thumbnail': chapter_info['preview'], -                'description': chapter_info['description'], -                'uploader': chapter_info['channel']['display_name'], -                'uploader_id': chapter_info['channel']['name'], -            } -            return info -        else: -            video_id = mobj.group('videoid') -            api = api_base + '/broadcast/by_archive/%s.json' % video_id - -        entries = [] -        offset = 0 -        limit = self._JUSTIN_PAGE_LIMIT -        for counter in itertools.count(1): -            page_url = api + ('?offset=%d&limit=%d' % (offset, limit)) -            page_count, page_info = self._parse_page( -                page_url, video_id, counter) -            entries.extend(page_info) -            if not paged or page_count != limit: -                break -            offset += limit -        return { -            '_type': 'playlist', -            'id': video_id, -            'entries': entries, -        } diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 56a76380c..827091e60 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -1,8 +1,6 @@  # encoding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor @@ -21,22 +19,17 @@ class KickStarterIE(InfoExtractor):      }, {          'note': 'Embedded video (not using the native kickstarter video service)',          'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178', -        'playlist': [ -            { -                'info_dict': { -                    'id': '78704821', -                    'ext': 'mp4', -                    'uploader_id': 'pebble', -                    'uploader': 'Pebble Technology', -                    'title': 'Pebble iOS Notifications', -                } -            } -        ], +        'info_dict': { +            'id': '78704821', +            'ext': 'mp4', +            'uploader_id': 'pebble', +            'uploader': 'Pebble Technology', +            'title': 'Pebble iOS Notifications', +        }      }]      def _real_extract(self, url): -        m = re.match(self._VALID_URL, url) -        video_id = m.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          title = self._html_search_regex( diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 5341ac773..8a73ecfa0 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -34,7 +34,7 @@ class KontrTubeIE(InfoExtractor):          video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')          thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)          title = self._html_search_regex( -            r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title') +            r'<title>(.+?)</title>', webpage, 'video title')          description = self._html_search_meta('description', webpage, 'video description')          mobj = re.search( diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 516147417..363a12ad0 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -190,7 +190,8 @@ class LivestreamOriginalIE(InfoExtractor):              'id': video_id,              'title': item.find('title').text,              'url': 'rtmp://extondemand.livestream.com/ondemand', -            'play_path': 'mp4:trans/dv15/mogulus-{0}.mp4'.format(path), +            'play_path': 'trans/dv15/mogulus-{0}'.format(path), +            'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque',              'ext': 'flv',              'thumbnail': thumbnail_url,          } diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index fca0bfef0..db5df4078 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -22,7 +22,7 @@ class LRTIE(InfoExtractor):              'id': '54391',              'ext': 'mp4',              'title': 'Septynios Kauno dienos', -            'description': 'Kauno miesto ir apskrities naujienos', +            'description': 'md5:24d84534c7dc76581e59f5689462411a',              'duration': 1783,          },          'params': { diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 979f3d692..6691521e5 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -6,6 +6,7 @@ import json  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse, +    compat_urlparse,      get_element_by_attribute,      parse_duration,      strip_jsonp, @@ -39,13 +40,21 @@ class MiTeleIE(InfoExtractor):          ).replace('\'', '"')          embed_data = json.loads(embed_data_json) -        info_url = embed_data['flashvars']['host'] +        domain = embed_data['mediaUrl'] +        if not domain.startswith('http'): +            # only happens in telecinco.es videos +            domain = 'http://' + domain +        info_url = compat_urlparse.urljoin( +            domain, +            compat_urllib_parse.unquote(embed_data['flashvars']['host']) +        )          info_el = self._download_xml(info_url, episode).find('./video/info')          video_link = info_el.find('videoUrl/link').text          token_query = compat_urllib_parse.urlencode({'id': video_link})          token_info = self._download_json( -            'http://token.mitele.es/?' + token_query, episode, +            embed_data['flashvars']['ov_tk'] + '?' + token_query, +            episode,              transform_source=strip_jsonp          ) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 520f27fca..bb8937c4d 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -33,22 +33,22 @@ class MixcloudIE(InfoExtractor):          },      } -    def check_urls(self, url_list): -        """Returns 1st active url from list""" -        for url in url_list: +    def _get_url(self, track_id, template_url): +        server_count = 30 +        for i in range(server_count): +            url = template_url % i              try:                  # We only want to know if the request succeed                  # don't download the whole file -                self._request_webpage(HEADRequest(url), None, False) +                self._request_webpage( +                    HEADRequest(url), track_id, +                    'Checking URL %d/%d ...' % (i + 1, server_count + 1))                  return url              except ExtractorError: -                url = None +                pass          return None -    def _get_url(self, template_url): -        return self.check_urls(template_url % i for i in range(30)) -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          uploader = mobj.group(1) @@ -61,16 +61,16 @@ class MixcloudIE(InfoExtractor):              r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')          song_url = preview_url.replace('/previews/', '/c/originals/')          template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) -        final_song_url = self._get_url(template_url) +        final_song_url = self._get_url(track_id, template_url)          if final_song_url is None:              self.to_screen('Trying with m4a extension')              template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') -            final_song_url = self._get_url(template_url) +            final_song_url = self._get_url(track_id, template_url)          if final_song_url is None:              raise ExtractorError('Unable to extract track url')          PREFIX = ( -            r'<div class="cloudcast-play-button-container"' +            r'<div class="cloudcast-play-button-container[^"]*?"'              r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')          title = self._html_search_regex(              PREFIX + r'm-title="([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 6229b2173..3621ff99e 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -5,20 +5,20 @@ import re  from .common import InfoExtractor  from ..utils import ( -    int_or_none, +    str_to_int,      unified_strdate,  )  class MotherlessIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)' +    _VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'      _TESTS = [          {              'url': 'http://motherless.com/AC3FFE1', -            'md5': '5527fef81d2e529215dad3c2d744a7d9', +            'md5': '310f62e325a9fafe64f68c0bccb6e75f',              'info_dict': {                  'id': 'AC3FFE1', -                'ext': 'flv', +                'ext': 'mp4',                  'title': 'Fucked in the ass while playing PS3',                  'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],                  'upload_date': '20100913', @@ -40,33 +40,51 @@ class MotherlessIE(InfoExtractor):                  'thumbnail': 're:http://.*\.jpg',                  'age_limit': 18,              } +        }, +        { +            'url': 'http://motherless.com/g/cosplay/633979F', +            'md5': '0b2a43f447a49c3e649c93ad1fafa4a0', +            'info_dict': { +                'id': '633979F', +                'ext': 'mp4', +                'title': 'Turtlette', +                'categories': ['superheroine heroine  superher'], +                'upload_date': '20140827', +                'uploader_id': 'shade0230', +                'thumbnail': 're:http://.*\.jpg', +                'age_limit': 18, +            }          }      ] -    def _real_extract(self,url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +    def _real_extract(self, url): +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') -         -        video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url') +        title = self._html_search_regex( +            r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') +        video_url = self._html_search_regex( +            r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')          age_limit = self._rta_search(webpage) - -        view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count') +        view_count = str_to_int(self._html_search_regex( +            r'<strong>Views</strong>\s+([^<]+)<', +            webpage, 'view count', fatal=False)) +        like_count = str_to_int(self._html_search_regex( +            r'<strong>Favorited</strong>\s+([^<]+)<', +            webpage, 'like count', fatal=False)) -        upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date') +        upload_date = self._html_search_regex( +            r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')          if 'Ago' in upload_date:              days = int(re.search(r'([0-9]+)', upload_date).group(1))              upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')          else:              upload_date = unified_strdate(upload_date) -        like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count') -          comment_count = webpage.count('class="media-comment-contents"') -        uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id') +        uploader_id = self._html_search_regex( +            r'"thumb-member-username">\s+<a href="/m/([^"]+)"', +            webpage, 'uploader_id')          categories = self._html_search_meta('keywords', webpage)          if categories: @@ -79,8 +97,8 @@ class MotherlessIE(InfoExtractor):              'uploader_id': uploader_id,              'thumbnail': self._og_search_thumbnail(webpage),              'categories': categories, -            'view_count': int_or_none(view_count.replace(',', '')), -            'like_count': int_or_none(like_count.replace(',', '')), +            'view_count': view_count, +            'like_count': like_count,              'comment_count': comment_count,              'age_limit': age_limit,              'url': video_url, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index e75ab7c39..7b5449031 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -26,8 +26,7 @@ class NBCIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')          if theplatform_url.startswith('//'): @@ -57,7 +56,7 @@ class NBCNewsIE(InfoExtractor):              'md5': 'b2421750c9f260783721d898f4c42063',              'info_dict': {                  'id': 'I1wpAI_zmhsQ', -                'ext': 'flv', +                'ext': 'mp4',                  'title': 'How Twitter Reacted To The Snowden Interview',                  'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',              }, @@ -97,6 +96,8 @@ class NBCNewsIE(InfoExtractor):              ]              for base_url in base_urls: +                if not base_url: +                    continue                  playlist_url = base_url + '?form=MPXNBCNewsAPI'                  all_videos = self._download_json(playlist_url, title)['videos'] diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 94d5ba982..add4b3e5d 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -18,16 +18,16 @@ class NDRIE(InfoExtractor):      _TESTS = [          { -            'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html', -            'md5': '4a4eeafd17c3058b65f0c8f091355855', +            'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html', +            'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c',              'note': 'Video file',              'info_dict': { -                'id': '325', +                'id': '25866',                  'ext': 'mp4', -                'title': 'Blaue Bohnen aus Blocken', -                'description': 'md5:190d71ba2ccddc805ed01547718963bc', -                'duration': 1715, -            }, +                'title': 'Kartoffeltage in der Lewitz', +                'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8', +                'duration': 166, +            }          },          {              'url': 'http://www.ndr.de/info/audio51535.html', diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 072d9cf8e..82af6e330 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -7,7 +7,6 @@ from .common import InfoExtractor  from ..utils import (      compat_urlparse,      compat_urllib_parse, -    determine_ext,      unified_strdate,  ) @@ -22,21 +21,23 @@ class NHLBaseInfoExtractor(InfoExtractor):          self.report_extraction(video_id)          initial_video_url = info['publishPoint'] -        data = compat_urllib_parse.urlencode({ -            'type': 'fvod', -            'path': initial_video_url.replace('.mp4', '_sd.mp4'), -        }) -        path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data -        path_doc = self._download_xml( -            path_url, video_id, 'Downloading final video url') -        video_url = path_doc.find('path').text +        if info['formats'] == '1': +            data = compat_urllib_parse.urlencode({ +                'type': 'fvod', +                'path': initial_video_url.replace('.mp4', '_sd.mp4'), +            }) +            path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data +            path_doc = self._download_xml( +                path_url, video_id, 'Downloading final video url') +            video_url = path_doc.find('path').text +        else: +           video_url = initial_video_url          join = compat_urlparse.urljoin          return {              'id': video_id,              'title': info['name'],              'url': video_url, -            'ext': determine_ext(video_url),              'description': info['description'],              'duration': int(info['duration']),              'thumbnail': join(join(video_url, '/u/'), info['bigImage']), @@ -46,10 +47,11 @@ class NHLBaseInfoExtractor(InfoExtractor):  class NHLIE(NHLBaseInfoExtractor):      IE_NAME = 'nhl.com' -    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)' +    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'      _TESTS = [{          'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', +        'md5': 'db704a4ea09e8d3988c85e36cc892d09',          'info_dict': {              'id': '453614',              'ext': 'mp4', @@ -59,6 +61,17 @@ class NHLIE(NHLBaseInfoExtractor):              'upload_date': '20131006',          },      }, { +        'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h', +        'md5': 'd22e82bc592f52d37d24b03531ee9696', +        'info_dict': { +            'id': '2014020024-628-h', +            'ext': 'mp4', +            'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)', +            'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014', +            'duration': 0, +            'upload_date': '20141011', +        }, +    }, {          'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',          'only_matching': True,      }] diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index c0c139b5d..7b85589b7 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -39,18 +39,17 @@ class NiconicoIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'      _NETRC_MACHINE = 'niconico' -    # Determine whether the downloader uses authentication to download video -    _AUTHENTICATE = False +    # Determine whether the downloader used authentication to download video +    _AUTHENTICATED = False      def _real_initialize(self): -        if self._downloader.params.get('username', None) is not None: -            self._AUTHENTICATE = True - -        if self._AUTHENTICATE: -            self._login() +        self._login()      def _login(self):          (username, password) = self._get_login_info() +        # No authentication to be performed +        if not username: +            return True          # Log in          login_form_strs = { @@ -68,6 +67,8 @@ class NiconicoIE(InfoExtractor):          if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:              self._downloader.report_warning('unable to log in: bad username or password')              return False +        # Successful login +        self._AUTHENTICATED = True          return True      def _real_extract(self, url): @@ -82,7 +83,7 @@ class NiconicoIE(InfoExtractor):              'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,              note='Downloading video info page') -        if self._AUTHENTICATE: +        if self._AUTHENTICATED:              # Get flv info              flv_info_webpage = self._download_webpage(                  'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 8f140d626..6118ed5c2 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -80,8 +80,14 @@ class PBSIE(InfoExtractor):                  'thumbnail': 're:^https?://.*\.jpg$',                  'upload_date': '20140122',              } +        }, +        { +            'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/', +            'info_dict': { +                'id': 'united-states-of-secrets', +            }, +            'playlist_count': 2,          } -      ]      def _extract_webpage(self, url): @@ -96,6 +102,12 @@ class PBSIE(InfoExtractor):                  r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',                  webpage, 'upload date', default=None)) +            # tabbed frontline videos +            tabbed_videos = re.findall( +                r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage) +            if tabbed_videos: +                return tabbed_videos, presumptive_id, upload_date +              MEDIA_ID_REGEXES = [                  r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed                  r'class="coveplayerid">([^<]+)<',                       # coveplayer @@ -130,6 +142,12 @@ class PBSIE(InfoExtractor):      def _real_extract(self, url):          video_id, display_id, upload_date = self._extract_webpage(url) +        if isinstance(video_id, list): +            entries = [self.url_result( +                'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id) +                for vid_id in video_id] +            return self.playlist_result(entries, display_id) +          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id          info = self._download_json(info_url, display_id) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 4118ee956..618e8f5dd 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -16,13 +16,14 @@ from ..aes import (  class PornHubIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))' +    _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'      _TEST = {          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', -        'file': '648719015.mp4',          'md5': '882f488fa1f0026f023f33576004a2ed',          'info_dict': { -            "uploader": "BABES-COM", +            'id': '648719015', +            'ext': 'mp4', +            "uploader": "Babes",              "title": "Seductive Indian beauty strips down and fingers her pink pussy",              "age_limit": 18          } @@ -35,9 +36,7 @@ class PornHubIE(InfoExtractor):          return count      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('videoid') -        url = 'http://www.' + mobj.group('url') +        video_id = self._match_id(url)          req = compat_urllib_request.Request(url)          req.add_header('Cookie', 'age_verified=1') @@ -45,7 +44,7 @@ class PornHubIE(InfoExtractor):          video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')          video_uploader = self._html_search_regex( -            r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', +            r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',              webpage, 'uploader', fatal=False)          thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)          if thumbnail: diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 463e85501..7fcde086c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -14,7 +14,6 @@ from ..utils import (  class PromptFileIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)' -    _FILE_NOT_FOUND_REGEX = r'<div.+id="not_found_msg".+>.+</div>[^-]'      _TEST = {          'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',          'md5': 'd1451b6302da7215485837aaea882c4c', @@ -27,11 +26,10 @@ class PromptFileIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: +        if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:              raise ExtractorError('Video %s does not exist' % video_id,                                   expected=True) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index a45884b25..1a41cbe40 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -81,7 +81,7 @@ class RTLnowIE(InfoExtractor):                  'id': '99205',                  'ext': 'flv',                  'title': 'Medicopter 117 - Angst!', -                'description': 'md5:895b1df01639b5f61a04fc305a5cb94d', +                'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',                  'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',                  'upload_date': '20080928',                  'duration': 2691, diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 55b58e5e6..0e470e73f 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -1,8 +1,6 @@  # -*- coding: utf-8 -*-  from __future__ import unicode_literals -import re -  from .common import InfoExtractor @@ -21,19 +19,20 @@ class RUHDIE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          video_url = self._html_search_regex(              r'<param name="src" value="([^"]+)"', webpage, 'video url')          title = self._html_search_regex( -            r'<title>([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title') +            r'<title>([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!</title>', +            webpage, 'title')          description = self._html_search_regex( -            r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False) +            r'(?s)<div id="longdesc">(.+?)<span id="showlink">', +            webpage, 'description', fatal=False)          thumbnail = self._html_search_regex( -            r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False) +            r'<param name="previewImage" value="([^"]+)"', +            webpage, 'thumbnail', fatal=False)          if thumbnail:              thumbnail = 'http://www.ruhd.ru' + thumbnail diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py new file mode 100644 index 000000000..c833fc8ee --- /dev/null +++ b/youtube_dl/extractor/sexykarma.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate, +    parse_duration, +    int_or_none, +) + + +class SexyKarmaIE(InfoExtractor): +    IE_DESC = 'Sexy Karma and Watch Indian Porn' +    _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' +    _TESTS = [{ +        'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', +        'md5': 'b9798e7d1ef1765116a8f516c8091dbd', +        'info_dict': { +            'id': 'yHI70cOyIHt', +            'display_id': 'taking-a-quick-pee', +            'ext': 'mp4', +            'title': 'Taking a quick pee.', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'wildginger7', +            'upload_date': '20141007', +            'duration': 22, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        } +    }, { +        'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', +        'md5': 'dd216c68d29b49b12842b9babe762a5d', +        'info_dict': { +            'id': '8Id6EZPbuHf', +            'display_id': 'pot-pixie-tribute', +            'ext': 'mp4', +            'title': 'pot_pixie tribute', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'banffite', +            'upload_date': '20141013', +            'duration': 16, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        } +    }, { +        'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', +        'md5': '9afb80675550406ed9a63ac2819ef69d', +        'info_dict': { +            'id': 'dW2mtctxJfs', +            'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number', +            'ext': 'mp4', +            'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'Don', +            'upload_date': '20140213', +            'duration': 83, +            'view_count': int, +            'comment_count': int, +            'categories': list, +        } +    }] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') + +        webpage = self._download_webpage(url, display_id) + +        video_url = self._html_search_regex( +            r"url: escape\('([^']+)'\)", webpage, 'url') + +        title = self._html_search_regex( +            r'<h2 class="he2"><span>(.*?)</span>', +            webpage, 'title') +        thumbnail = self._html_search_regex( +            r'<span id="container"><img\s+src="([^"]+)"', +            webpage, 'thumbnail', fatal=False) + +        uploader = self._html_search_regex( +            r'class="aupa">\s*(.*?)</a>', +            webpage, 'uploader') +        upload_date = unified_strdate(self._html_search_regex( +            r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False)) + +        duration = parse_duration(self._search_regex( +            r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>', +            webpage, 'duration', fatal=False)) + +        view_count = int_or_none(self._search_regex( +            r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', +            webpage, 'view count', fatal=False)) +        comment_count = int_or_none(self._search_regex( +            r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', +            webpage, 'comment count', fatal=False)) + +        categories = re.findall( +            r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', +            webpage) + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': video_url, +            'title': title, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'upload_date': upload_date, +            'duration': duration, +            'view_count': view_count, +            'comment_count': comment_count, +            'categories': categories, +        } diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 4719ba45c..54256e1a2 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -40,14 +40,15 @@ class SoundcloudIE(InfoExtractor):      _TESTS = [          {              'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', -            'file': '62986583.mp3',              'md5': 'ebef0a451b909710ed1d7787dddbf0d7',              'info_dict': { -                "upload_date": "20121011", -                "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", -                "uploader": "E.T. ExTerrestrial Music", -                "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1", -                "duration": 143, +                'id': '62986583', +                'ext': 'mp3', +                'upload_date': '20121011', +                'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', +                'uploader': 'E.T. ExTerrestrial Music', +                'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', +                'duration': 143,              }          },          # not streamable song @@ -103,7 +104,7 @@ class SoundcloudIE(InfoExtractor):                  'id': '128590877',                  'ext': 'mp3',                  'title': 'Bus Brakes', -                'description': 'md5:0170be75dd395c96025d210d261c784e', +                'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',                  'uploader': 'oddsamples',                  'upload_date': '20140109',                  'duration': 17, @@ -140,6 +141,7 @@ class SoundcloudIE(InfoExtractor):              'description': info['description'],              'thumbnail': thumbnail,              'duration': int_or_none(info.get('duration'), 1000), +            'webpage_url': info.get('permalink_url'),          }          formats = []          if info.get('downloadable', False): diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 19cc976e3..becdf658f 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -7,7 +7,6 @@ from .common import InfoExtractor  from ..utils import (      parse_duration,      parse_iso8601, -    int_or_none,  ) @@ -26,7 +25,6 @@ class SportBoxIE(InfoExtractor):                  'timestamp': 1411896237,                  'upload_date': '20140928',                  'duration': 4846, -                'view_count': int,              },              'params': {                  # m3u8 download @@ -65,8 +63,6 @@ class SportBoxIE(InfoExtractor):              r'<span itemprop="uploadDate">([^<]+)</span>', webpage, 'timestamp', fatal=False))          duration = parse_duration(self._html_search_regex(              r'<meta itemprop="duration" content="PT([^"]+)">', webpage, 'duration', fatal=False)) -        view_count = int_or_none(self._html_search_regex( -            r'<span>Просмотров: (\d+)</span>', player, 'view count', fatal=False))          return {              'id': video_id, @@ -76,6 +72,5 @@ class SportBoxIE(InfoExtractor):              'thumbnail': thumbnail,              'timestamp': timestamp,              'duration': duration, -            'view_count': view_count,              'formats': formats,          } diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py new file mode 100644 index 000000000..d92d14d65 --- /dev/null +++ b/youtube_dl/extractor/srmediathek.py @@ -0,0 +1,43 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import js_to_json + + +class SRMediathekIE(InfoExtractor): +    IE_DESC = 'Süddeutscher Rundfunk' +    _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' + +    _TEST = { +        'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', +        'info_dict': { +            'id': '28455', +            'ext': 'mp4', +            'title': 'sportarena (26.10.2014)', +            'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        urls = json.loads(js_to_json(self._search_regex( +            r'var mediaURLs\s*=\s*(.*?);\n', webpage, 'video URLs'))) +        formats = [{'url': url} for url in urls] +        self._sort_formats(formats) + +        title = json.loads(js_to_json(self._search_regex( +            r'var mediaTitles\s*=\s*(.*?);\n', webpage, 'title')))[0] + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'description': self._og_search_description(webpage), +            'thumbnail': self._og_search_thumbnail(webpage), +        } diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index f76b6e2b2..5ca079f88 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -10,7 +10,6 @@ class SyfyIE(InfoExtractor):      _TESTS = [{          'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', -        'md5': 'e07de1d52c7278adbb9b9b1c93a66849',          'info_dict': {              'id': 'NmqMrGnXvmO1',              'ext': 'flv', diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d5e28efad..cd4af96fd 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -65,6 +65,22 @@ class TEDIE(SubtitlesInfoExtractor):              'title': 'Who are the hackers?',          },          'playlist_mincount': 6, +    }, { +        # contains a youtube video +        'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything', +        'add_ie': ['Youtube'], +        'info_dict': { +            'id': '_ZG8HBuDjgc', +            'ext': 'mp4', +            'title': 'Douglas Adams: Parrots the Universe and Everything', +            'description': 'md5:01ad1e199c49ac640cb1196c0e9016af', +            'uploader': 'University of California Television (UCTV)', +            'uploader_id': 'UCtelevision', +            'upload_date': '20080522', +        }, +        'params': { +            'skip_download': True, +        },      }]      _NATIVE_FORMATS = { @@ -114,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):          talk_info = self._extract_info(webpage)['talks'][0] +        if talk_info.get('external') is not None: +            self.to_screen('Found video from %s' % talk_info['external']['service']) +            return { +                '_type': 'url', +                'url': talk_info['external']['uri'], +            } +          formats = [{              'url': format_url,              'format_id': format_id, diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py new file mode 100644 index 000000000..db9788c18 --- /dev/null +++ b/youtube_dl/extractor/telecinco.py @@ -0,0 +1,19 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .mitele import MiTeleIE + + +class TelecincoIE(MiTeleIE): +    IE_NAME = 'telecinco.es' +    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<episode>.*?)\.html' + +    _TEST = { +        'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', +        'info_dict': { +            'id': 'MDSVID20141015_0058', +            'ext': 'mp4', +            'title': 'Con Martín Berasategui, hacer un bacalao al ...', +            'duration': 662, +        }, +    } diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py new file mode 100644 index 000000000..b65d8e03f --- /dev/null +++ b/youtube_dl/extractor/theonion.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class TheOnionIE(InfoExtractor): +    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' +    _TEST = { +        'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', +        'md5': '19eaa9a39cf9b9804d982e654dc791ee', +        'info_dict': { +            'id': '2133', +            'ext': 'mp4', +            'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image', +            'description': 'md5:cc12448686b5600baae9261d3e180910', +            'thumbnail': 're:^https?://.*\.jpg\?\d+$', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        article_id = mobj.group('article_id') + +        webpage = self._download_webpage(url, article_id) + +        video_id = self._search_regex( +            r'"videoId":\s(\d+),', webpage, 'video ID') +        title = self._og_search_title(webpage) +        description = self._og_search_description(webpage) +        thumbnail = self._og_search_thumbnail(webpage) + +        sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) +        if not sources: +            raise ExtractorError( +                'No sources found for video %s' % video_id, expected=True) + +        formats = [] +        for src, type_ in sources: +            if type_ == 'video/mp4': +                formats.append({ +                    'format_id': 'mp4_sd', +                    'preference': 1, +                    'url': src, +                }) +            elif type_ == 'video/webm': +                formats.append({ +                    'format_id': 'webm_sd', +                    'preference': 0, +                    'url': src, +                }) +            elif type_ == 'application/x-mpegURL': +                formats.extend( +                    self._extract_m3u8_formats(src, video_id, preference=-1)) +            else: +                self.report_warning( +                    'Encountered unexpected format: %s' % type_) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'thumbnail': thumbnail, +            'description': description, +        } diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0be793b1c..a04925633 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -6,6 +6,7 @@ import json  from .common import InfoExtractor  from ..utils import (      compat_str, +    determine_ext,      ExtractorError,      xpath_with_ns,  ) @@ -34,10 +35,21 @@ class ThePlatformIE(InfoExtractor):              'skip_download': True,          },      } +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        if mobj.group('config'): +            config_url = url+ '&form=json' +            config_url = config_url.replace('swf/', 'config/') +            config_url = config_url.replace('onsite/', 'onsite/config/') +            config = self._download_json(config_url, video_id, 'Downloading config') +            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' +        else: +            smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' +                'format=smil&mbr=true'.format(video_id)) -    def _get_info(self, video_id, smil_url): -        meta = self._download_xml(smil_url, video_id) +        meta = self._download_xml(smil_url, video_id)          try:              error_msg = next(                  n.attrib['abstract'] @@ -89,10 +101,14 @@ class ThePlatformIE(InfoExtractor):                  for f in switch.findall(_x('smil:video')):                      attr = f.attrib                      vbr = int(attr['system-bitrate']) // 1000 +                    ext = determine_ext(attr['src']) +                    if ext == 'once': +                        ext = 'mp4'                      formats.append({                          'format_id': compat_str(vbr),                          'url': attr['src'],                          'vbr': vbr, +                        'ext': ext,                      })              self._sort_formats(formats) @@ -104,17 +120,3 @@ class ThePlatformIE(InfoExtractor):              'thumbnail': info['defaultThumbnailUrl'],              'duration': info['duration']//1000,          } -         -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        if mobj.group('config'): -            config_url = url+ '&form=json' -            config_url = config_url.replace('swf/', 'config/') -            config_url = config_url.replace('onsite/', 'onsite/config/') -            config = self._download_json(config_url, video_id, 'Downloading config') -            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' -        else: -            smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' -                'format=smil&mbr=true'.format(video_id)) -        return self._get_info(video_id, smil_url) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 306fe8974..40c53ff17 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -4,9 +4,6 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -)  class TumblrIE(InfoExtractor): @@ -18,7 +15,7 @@ class TumblrIE(InfoExtractor):              'id': '54196191430',              'ext': 'mp4',              'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', -            'description': 'md5:dfac39636969fe6bf1caa2d50405f069', +            'description': 'md5:37db8211e40b50c7c44e95da14f630b7',              'thumbnail': 're:http://.*\.jpg',          }      }, { @@ -27,7 +24,7 @@ class TumblrIE(InfoExtractor):          'info_dict': {              'id': '90208453769',              'ext': 'mp4', -            'title': '5SOS STRUM ;)', +            'title': '5SOS STRUM ;]',              'description': 'md5:dba62ac8639482759c8eb10ce474586a',              'thumbnail': 're:http://.*\.jpg',          } @@ -41,18 +38,12 @@ class TumblrIE(InfoExtractor):          url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)          webpage = self._download_webpage(url, video_id) -        re_video = r'src=\\x22(?P<video_url>http://%s\.tumblr\.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id) -        video = re.search(re_video, webpage) -        if video is None: -            raise ExtractorError('Unable to extract video') -        video_url = video.group('video_url') -        ext = video.group('ext') - -        video_thumbnail = self._search_regex( -            r'posters.*?\[\\x22(.*?)\\x22', -            webpage, 'thumbnail', fatal=False)  # We pick the first poster -        if video_thumbnail: -            video_thumbnail = video_thumbnail.replace('\\\\/', '/') +        iframe_url = self._search_regex( +            r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', +            webpage, 'iframe url') +        iframe = self._download_webpage(iframe_url, video_id) +        video_url = self._search_regex(r'<source src="([^"]+)"', +            iframe, 'video url')          # The only place where you can get a title, it's not complete,          # but searching in other places doesn't work for all videos @@ -62,9 +53,9 @@ class TumblrIE(InfoExtractor):          return {              'id': video_id, -             'url': video_url, -             'title': video_title, -             'description': self._html_search_meta('description', webpage), -             'thumbnail': video_thumbnail, -             'ext': ext, +            'url': video_url, +            'ext': 'mp4', +            'title': video_title, +            'description': self._og_search_description(webpage), +            'thumbnail': self._og_search_thumbnail(webpage),          } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py new file mode 100644 index 000000000..36aa1ad6e --- /dev/null +++ b/youtube_dl/extractor/twitch.py @@ -0,0 +1,187 @@ +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    parse_iso8601, +) + + +class TwitchIE(InfoExtractor): +    # TODO: One broadcast may be split into multiple videos. The key +    # 'broadcast_id' is the same for all parts, and 'broadcast_part' +    # starts at 1 and increases. Can we treat all parts as one video? +    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?twitch\.tv/ +        (?: +            (?P<channelid>[^/]+)| +            (?:(?:[^/]+)/b/(?P<videoid>[^/]+))| +            (?:(?:[^/]+)/c/(?P<chapterid>[^/]+)) +        ) +        /?(?:\#.*)?$ +        """ +    _PAGE_LIMIT = 100 +    _API_BASE = 'https://api.twitch.tv' +    _TESTS = [{ +        'url': 'http://www.twitch.tv/riotgames/b/577357806', +        'info_dict': { +            'id': 'a577357806', +            'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', +        }, +        'playlist_mincount': 12, +    }, { +        'url': 'http://www.twitch.tv/acracingleague/c/5285812', +        'info_dict': { +            'id': 'c5285812', +            'title': 'ACRL Off Season - Sports Cars @ Nordschleife', +        }, +        'playlist_mincount': 3, +    }, { +        'url': 'http://www.twitch.tv/vanillatv', +        'info_dict': { +            'id': 'vanillatv', +            'title': 'VanillaTV', +        }, +        'playlist_mincount': 412, +    }] + +    def _handle_error(self, response): +        if not isinstance(response, dict): +            return +        error = response.get('error') +        if error: +            raise ExtractorError( +                '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), +                expected=True) + +    def _download_json(self, url, video_id, note='Downloading JSON metadata'): +        response = super(TwitchIE, self)._download_json(url, video_id, note) +        self._handle_error(response) +        return response + +    def _extract_media(self, item, item_id): +        ITEMS = { +            'a': 'video', +            'c': 'chapter', +        } +        info = self._extract_info(self._download_json( +            '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, +            'Downloading %s info JSON' % ITEMS[item])) +        response = self._download_json( +            '%s/api/videos/%s%s' % (self._API_BASE, item, item_id), item_id, +            'Downloading %s playlist JSON' % ITEMS[item]) +        entries = [] +        chunks = response['chunks'] +        qualities = list(chunks.keys()) +        for num, fragment in enumerate(zip(*chunks.values()), start=1): +            formats = [] +            for fmt_num, fragment_fmt in enumerate(fragment): +                format_id = qualities[fmt_num] +                fmt = { +                    'url': fragment_fmt['url'], +                    'format_id': format_id, +                    'quality': 1 if format_id == 'live' else 0, +                } +                m = re.search(r'^(?P<height>\d+)[Pp]', format_id) +                if m: +                    fmt['height'] = int(m.group('height')) +                formats.append(fmt) +            self._sort_formats(formats) +            entry = dict(info) +            entry['id'] = '%s_%d' % (entry['id'], num) +            entry['title'] = '%s part %d' % (entry['title'], num) +            entry['formats'] = formats +            entries.append(entry) +        return self.playlist_result(entries, info['id'], info['title']) + +    def _extract_info(self, info): +        return { +            'id': info['_id'], +            'title': info['title'], +            'description': info['description'], +            'duration': info['length'], +            'thumbnail': info['preview'], +            'uploader': info['channel']['display_name'], +            'uploader_id': info['channel']['name'], +            'timestamp': parse_iso8601(info['recorded_at']), +            'view_count': info['views'], +        } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        if mobj.group('chapterid'): +            return self._extract_media('c', mobj.group('chapterid')) + +            """ +            webpage = self._download_webpage(url, chapter_id) +            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage) +            if not m: +                raise ExtractorError('Cannot find archive of a chapter') +            archive_id = m.group(1) + +            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id +            doc = self._download_xml( +                api, chapter_id, +                note='Downloading chapter information', +                errnote='Chapter information download failed') +            for a in doc.findall('.//archive'): +                if archive_id == a.find('./id').text: +                    break +            else: +                raise ExtractorError('Could not find chapter in chapter information') + +            video_url = a.find('./video_file_url').text +            video_ext = video_url.rpartition('.')[2] or 'flv' + +            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id +            chapter_info = self._download_json( +                chapter_api_url, 'c' + chapter_id, +                note='Downloading chapter metadata', +                errnote='Download of chapter metadata failed') + +            bracket_start = int(doc.find('.//bracket_start').text) +            bracket_end = int(doc.find('.//bracket_end').text) + +            # TODO determine start (and probably fix up file) +            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457 +            #video_url += '?start=' + TODO:start_timestamp +            # bracket_start is 13290, but we want 51670615 +            self._downloader.report_warning('Chapter detected, but we can just download the whole file. ' +                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end))) + +            info = { +                'id': 'c' + chapter_id, +                'url': video_url, +                'ext': video_ext, +                'title': chapter_info['title'], +                'thumbnail': chapter_info['preview'], +                'description': chapter_info['description'], +                'uploader': chapter_info['channel']['display_name'], +                'uploader_id': chapter_info['channel']['name'], +            } +            return info +            """ +        elif mobj.group('videoid'): +            return self._extract_media('a', mobj.group('videoid')) +        elif mobj.group('channelid'): +            channel_id = mobj.group('channelid') +            info = self._download_json( +                '%s/kraken/channels/%s' % (self._API_BASE, channel_id), +                channel_id, 'Downloading channel info JSON') +            channel_name = info.get('display_name') or info.get('name') +            entries = [] +            offset = 0 +            limit = self._PAGE_LIMIT +            for counter in itertools.count(1): +                response = self._download_json( +                    '%s/kraken/channels/%s/videos/?offset=%d&limit=%d' +                    % (self._API_BASE, channel_id, offset, limit), +                    channel_id, 'Downloading channel videos JSON page %d' % counter) +                videos = response['videos'] +                if not videos: +                    break +                entries.extend([self.url_result(video['url'], 'Twitch') for video in videos]) +                offset += limit +            return self.playlist_result(entries, channel_id, channel_name) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 994b60a76..cee1ea8f6 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -72,7 +72,7 @@ class UstreamChannelIE(InfoExtractor):          'info_dict': {              'id': '10874166',          }, -        'playlist_mincount': 54, +        'playlist_mincount': 17,      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 964470070..d3fa70e0e 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor):              'info_dict': {                  'id': '84196',                  'ext': 'mp4', -                'title': 'Hevnen er søt episode 10: Abu', +                'title': 'Hevnen er søt episode 1:10 - Abu',                  'description': 'md5:e25e4badb5f544b04341e14abdc72234',                  'thumbnail': 're:^https?://.*\.jpg',                  'duration': 648.000, @@ -67,9 +67,7 @@ class VGTVIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          data = self._download_json(              'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id,              video_id, 'Downloading media JSON') diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 9328ef4a2..0faa729c6 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -1,55 +1,85 @@ -import json -import re +from __future__ import unicode_literals  from .common import InfoExtractor +from ..utils import ( +    float_or_none, +    int_or_none, +)  class ViddlerIE(InfoExtractor): -    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'      _TEST = { -        u"url": u"http://www.viddler.com/v/43903784", -        u'file': u'43903784.mp4', -        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9', -        u'info_dict': { -            u"title": u"Video Made Easy", -            u"uploader": u"viddler", -            u"duration": 100.89, +        "url": "http://www.viddler.com/v/43903784", +        'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4', +        'info_dict': { +            'id': '43903784', +            'ext': 'mp4', +            "title": "Video Made Easy", +            'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ', +            "uploader": "viddler", +            'timestamp': 1335371429, +            'upload_date': '20120425', +            "duration": 100.89, +            'thumbnail': 're:^https?://.*\.jpg$', +            'view_count': int, +            'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],          }      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        embed_url = mobj.group('domain') + u'/embed/' + video_id -        webpage = self._download_webpage(embed_url, video_id) - -        video_sources_code = self._search_regex( -            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs') -        video_sources = json.loads(video_sources_code.replace("'", '"')) - -        formats = [{ -            'url': video_url, -            'format': format_id, -        } for video_url, format_id in video_sources.items()] - -        title = self._html_search_regex( -            r"title\s*:\s*'([^']*)'", webpage, u'title') -        uploader = self._html_search_regex( -            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False) -        duration_s = self._html_search_regex( -            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False) -        duration = float(duration_s) if duration_s else None -        thumbnail = self._html_search_regex( -            r"thumbnail\s*:\s*'([^']*)'", -            webpage, u'thumbnail', fatal=False) +        video_id = self._match_id(url) + +        json_url = ( +            'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' % +            video_id) +        data = self._download_json(json_url, video_id)['video'] + +        formats = [] +        for filed in data['files']: +            if filed.get('status', 'ready') != 'ready': +                continue +            f = { +                'format_id': filed['profile_id'], +                'format_note': filed['profile_name'], +                'url': self._proto_relative_url(filed['url']), +                'width': int_or_none(filed.get('width')), +                'height': int_or_none(filed.get('height')), +                'filesize': int_or_none(filed.get('size')), +                'ext': filed.get('ext'), +                'source_preference': -1, +            } +            formats.append(f) + +            if filed.get('cdn_url'): +                f = f.copy() +                f['url'] = self._proto_relative_url(filed['cdn_url']) +                f['format_id'] = filed['profile_id'] + '-cdn' +                f['source_preference'] = 1 +                formats.append(f) + +            if filed.get('html5_video_source'): +                f = f.copy() +                f['url'] = self._proto_relative_url( +                    filed['html5_video_source']) +                f['format_id'] = filed['profile_id'] + '-html5' +                f['source_preference'] = 0 +                formats.append(f) +        self._sort_formats(formats) + +        categories = [ +            t.get('text') for t in data.get('tags', []) if 'text' in t]          return {              '_type': 'video',              'id': video_id, -            'title': title, -            'thumbnail': thumbnail, -            'uploader': uploader, -            'duration': duration, +            'title': data['title'],              'formats': formats, +            'description': data.get('description'), +            'timestamp': int_or_none(data.get('upload_time')), +            'thumbnail': self._proto_relative_url(data.get('thumbnail_url')), +            'uploader': data.get('author'), +            'duration': float_or_none(data.get('length')), +            'view_count': int_or_none(data.get('view_count')), +            'categories': categories,          } diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py new file mode 100644 index 000000000..669979e13 --- /dev/null +++ b/youtube_dl/extractor/vidzi.py @@ -0,0 +1,33 @@ +#coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VidziIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)' +    _TEST = { +        'url': 'http://vidzi.tv/cghql9yq6emu.html', +        'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', +        'info_dict': { +            'id': 'cghql9yq6emu', +            'ext': 'mp4', +            'title': 'youtube-dl test video  1\\\\2\'3/4<5\\\\6ä7↭', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +         +        webpage = self._download_webpage(url, video_id) +        video_url = self._html_search_regex( +            r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url') +        title = self._html_search_regex( +            r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') +         +        return { +            'id': video_id, +            'title': title, +            'url': video_url, +        } +        
\ No newline at end of file diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index d2c36b58a..d9cad0ea5 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -8,13 +8,11 @@ import itertools  from .common import InfoExtractor  from .subtitles import SubtitlesInfoExtractor  from ..utils import ( -    clean_html,      compat_HTTPError,      compat_urllib_parse,      compat_urllib_request,      compat_urlparse,      ExtractorError, -    get_element_by_attribute,      InAdvancePagedList,      int_or_none,      RegexNotFoundError, @@ -56,7 +54,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):      # _VALID_URL matches Vimeo URLs      _VALID_URL = r'''(?x) -        (?P<proto>(?:https?:)?//)? +        https?://          (?:(?:www|(?P<player>player))\.)?          vimeo(?P<pro>pro)?\.com/          (?!channels/[^/?#]+/?(?:$|[?#])|album/) @@ -157,6 +155,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):                  'duration': 62,              }          }, +        { +            # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ +            'url': 'https://player.vimeo.com/video/98044508', +            'note': 'The js code contains assignments to the same variable as the config', +            'info_dict': { +                'id': '98044508', +                'ext': 'mp4', +                'title': 'Pier Solar OUYA Official Trailer', +                'uploader': 'Tulio Gonçalves', +                'uploader_id': 'user28849593', +            }, +        },      ]      def _verify_video_password(self, url, video_id, webpage): @@ -244,7 +254,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):                  # We try to find out to which variable is assigned the config dic                  m_variable_name = re.search('(\w)\.video\.id', webpage)                  if m_variable_name is not None: -                    config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1)) +                    config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))                  else:                      config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']                  config = self._search_regex(config_re, webpage, 'info section', @@ -502,7 +512,7 @@ class VimeoReviewIE(InfoExtractor):          'info_dict': {              'id': '91613211',              'ext': 'mp4', -            'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn', +            'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',              'uploader': 'DevWeek Events',              'duration': 2773,              'thumbnail': 're:^https?://.*\.jpg$', diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index e7754158d..42995226e 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -70,7 +70,7 @@ class VineUserIE(InfoExtractor):          'info_dict': {              'id': 'Visa',          }, -        'playlist_mincount': 47, +        'playlist_mincount': 46,      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py new file mode 100644 index 000000000..57ef8dc30 --- /dev/null +++ b/youtube_dl/extractor/vrt.py @@ -0,0 +1,95 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import float_or_none + + +class VRTIE(InfoExtractor): +    _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*' +    _TESTS = [ +        # deredactie.be +        { +            'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL', +            'md5': '4cebde1eb60a53782d4f3992cbd46ec8', +            'info_dict': { +                'id': '2129880', +                'ext': 'flv', +                'title': 'Het journaal L - 25/10/14', +                'description': None, +                'timestamp': 1414271750.949, +                'upload_date': '20141025', +                'duration': 929, +            } +        }, +        # sporza.be +        { +            'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time', +            'md5': '11f53088da9bf8e7cfc42456697953ff', +            'info_dict': { +                'id': '2124639', +                'ext': 'flv', +                'title': 'Bekijk Extra Time van 20 oktober', +                'description': 'md5:83ac5415a4f1816c6a93f8138aef2426', +                'timestamp': 1413835980.560, +                'upload_date': '20141020', +                'duration': 3238, +            }   +        }, +        # cobra.be +        { +            'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari', +            'md5': '78a2b060a5083c4f055449a72477409d', +            'info_dict': { +                'id': '2126050', +                'ext': 'flv', +                'title': 'Bret Easton Ellis in Café Corsari', +                'description': 'md5:f699986e823f32fd6036c1855a724ee9', +                'timestamp': 1413967500.494, +                'upload_date': '20141022', +                'duration': 661, +            } +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        video_id = self._search_regex( +            r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False) + +        formats = [] +        mobj = re.search( +            r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"', +            webpage) +        if mobj: +            formats.extend(self._extract_m3u8_formats( +                '%s/%s' % (mobj.group('server'), mobj.group('path')), +                video_id, 'mp4')) +        mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage) +        if mobj: +            formats.extend(self._extract_f4m_formats( +                '%s/manifest.f4m' % mobj.group('src'), video_id)) +        self._sort_formats(formats) + +        title = self._og_search_title(webpage) +        description = self._og_search_description(webpage, default=None) +        thumbnail = self._og_search_thumbnail(webpage) +        timestamp = float_or_none(self._search_regex( +            r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000) +        duration = float_or_none(self._search_regex( +            r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'timestamp': timestamp, +            'duration': duration, +            'formats': formats, +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 273d93d9e..c3bb9b2cf 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -20,7 +20,7 @@ class XTubeIE(InfoExtractor):              'id': 'kVTUy_G222_',              'ext': 'mp4',              'title': 'strange erotica', -            'description': 'surreal gay themed erotica...almost an ET kind of thing', +            'description': 'http://www.xtube.com an ET kind of thing',              'uploader': 'greenshowers',              'duration': 450,              'age_limit': 18, diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 944d7da38..9cd7989cc 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -13,7 +13,6 @@ class YnetIE(InfoExtractor):      _TESTS = [          {              'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', -            'md5': '4b29cb57c3dddd57642b3f051f535b07',              'info_dict': {                  'id': 'L-11659-99244',                  'ext': 'flv', @@ -22,7 +21,6 @@ class YnetIE(InfoExtractor):              }          }, {              'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', -            'md5': '8194c2ea221e9a639cac96b6b0753dc5',              'info_dict': {                  'id': 'L-8859-84418',                  'ext': 'flv', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9041cfa87..40fe4662a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -185,14 +185,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):          self._download_webpage(              req, None, -            note='Confirming age', errnote='Unable to confirm age') -        return True +            note='Confirming age', errnote='Unable to confirm age', +            fatal=False)      def _real_initialize(self):          if self._downloader is None:              return -        if not self._set_language(): -            return +        if self._get_login_info()[0] is not None: +            if not self._set_language(): +                return          if not self._login():              return          self._confirm_age() @@ -286,6 +287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},          '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},          '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, +        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},          '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, @@ -938,7 +940,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')          # Look for the DASH manifest -        if (self._downloader.params.get('youtube_include_dash_manifest', False)): +        if self._downloader.params.get('youtube_include_dash_manifest', True):              try:                  # The DASH manifest used needs to be the one from the original video_webpage.                  # The one found in get_video_info seems to be using different signatures. @@ -1055,7 +1057,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          'note': 'issue #673',          'url': 'PLBB231211A4F62143',          'info_dict': { -            'title': 'Team Fortress 2 (Class-based LP)', +            'title': '[OLD]Team Fortress 2 (Class-based LP)',          },          'playlist_mincount': 26,      }, { diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f651337ad..98e20d549 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -6,6 +6,8 @@ import shlex  import sys  from .utils import ( +    compat_expanduser, +    compat_getenv,      get_term_width,      write_string,  ) @@ -27,19 +29,19 @@ def parseOpts(overrideArguments=None):          return res      def _readUserConf(): -        xdg_config_home = os.environ.get('XDG_CONFIG_HOME') +        xdg_config_home = compat_getenv('XDG_CONFIG_HOME')          if xdg_config_home:              userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')              if not os.path.isfile(userConfFile):                  userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')          else: -            userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config') +            userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')              if not os.path.isfile(userConfFile): -                userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') +                userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')          userConf = _readOptions(userConfFile, None)          if userConf is None: -            appdata_dir = os.environ.get('appdata') +            appdata_dir = compat_getenv('appdata')              if appdata_dir:                  userConf = _readOptions(                      os.path.join(appdata_dir, 'youtube-dl', 'config'), @@ -51,11 +53,11 @@ def parseOpts(overrideArguments=None):          if userConf is None:              userConf = _readOptions( -                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'), +                os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),                  default=None)          if userConf is None:              userConf = _readOptions( -                os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'), +                os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),                  default=None)          if userConf is None: @@ -75,7 +77,8 @@ def parseOpts(overrideArguments=None):          if len(opts) > 1:              opts.insert(1, ', ') -        if option.takes_value(): opts.append(' %s' % option.metavar) +        if option.takes_value(): +            opts.append(' %s' % option.metavar)          return "".join(opts) @@ -87,68 +90,69 @@ def parseOpts(overrideArguments=None):          for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:              try:                  i = opts.index(private_opt) -                opts[i+1] = 'PRIVATE' +                opts[i + 1] = 'PRIVATE'              except ValueError:                  pass          return opts -    max_width = 80 -    max_help_position = 80 -      # No need to wrap help messages if we're on a wide console      columns = get_term_width() -    if columns: max_width = columns +    max_width = columns if columns else 80 +    max_help_position = 80      fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)      fmt.format_option_strings = _format_option_string      kw = { -        'version'   : __version__, -        'formatter' : fmt, -        'usage' : '%prog [options] url [url...]', -        'conflict_handler' : 'resolve', +        'version': __version__, +        'formatter': fmt, +        'usage': '%prog [options] url [url...]', +        'conflict_handler': 'resolve',      }      parser = optparse.OptionParser(**kw) -    # option groups -    general        = optparse.OptionGroup(parser, 'General Options') -    selection      = optparse.OptionGroup(parser, 'Video Selection') -    authentication = optparse.OptionGroup(parser, 'Authentication Options') -    video_format   = optparse.OptionGroup(parser, 'Video Format Options') -    subtitles      = optparse.OptionGroup(parser, 'Subtitle Options') -    downloader     = optparse.OptionGroup(parser, 'Download Options') -    postproc       = optparse.OptionGroup(parser, 'Post-processing Options') -    filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') -    workarounds    = optparse.OptionGroup(parser, 'Workarounds') -    verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - -    general.add_option('-h', '--help', -            action='help', help='print this help text and exit') -    general.add_option('-v', '--version', -            action='version', help='print program version and exit') -    general.add_option('-U', '--update', -            action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') -    general.add_option('-i', '--ignore-errors', -            action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False) -    general.add_option('--abort-on-error', -            action='store_false', dest='ignoreerrors', -            help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') -    general.add_option('--dump-user-agent', -            action='store_true', dest='dump_user_agent', -            help='display the current browser identification', default=False) -    general.add_option('--list-extractors', -            action='store_true', dest='list_extractors', -            help='List all supported extractors and the URLs they would handle', default=False) -    general.add_option('--extractor-descriptions', -            action='store_true', dest='list_extractor_descriptions', -            help='Output descriptions of all supported extractors', default=False) +    general = optparse.OptionGroup(parser, 'General Options') +    general.add_option( +        '-h', '--help', +        action='help', +        help='print this help text and exit') +    general.add_option( +        '-v', '--version', +        action='version', +        help='print program version and exit') +    general.add_option( +        '-U', '--update', +        action='store_true', dest='update_self', +        help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') +    general.add_option( +        '-i', '--ignore-errors', +        action='store_true', dest='ignoreerrors', default=False, +        help='continue on download errors, for example to skip unavailable videos in a playlist') +    general.add_option( +        '--abort-on-error', +        action='store_false', dest='ignoreerrors', +        help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') +    general.add_option( +        '--dump-user-agent', +        action='store_true', dest='dump_user_agent', default=False, +        help='display the current browser identification')      general.add_option( -        '--proxy', dest='proxy', default=None, metavar='URL', +        '--list-extractors', +        action='store_true', dest='list_extractors', default=False, +        help='List all supported extractors and the URLs they would handle') +    general.add_option( +        '--extractor-descriptions', +        action='store_true', dest='list_extractor_descriptions', default=False, +        help='Output descriptions of all supported extractors') +    general.add_option( +        '--proxy', dest='proxy', +        default=None, metavar='URL',          help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')      general.add_option( -        '--socket-timeout', dest='socket_timeout', -        type=float, default=None, help=u'Time to wait before giving up, in seconds') +        '--socket-timeout', +        dest='socket_timeout', type=float, default=None, +        help='Time to wait before giving up, in seconds')      general.add_option(          '--default-search',          dest='default_search', metavar='PREFIX', @@ -157,7 +161,13 @@ def parseOpts(overrideArguments=None):          '--ignore-config',          action='store_true',          help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') +    general.add_option( +        '--flat-playlist', +        action='store_const', dest='extract_flat', const='in_playlist', +        default=False, +        help='Do not extract the videos of a playlist, only list them.') +    selection = optparse.OptionGroup(parser, 'Video Selection')      selection.add_option(          '--playlist-start',          dest='playliststart', metavar='NUMBER', default=1, type=int, @@ -166,245 +176,375 @@ def parseOpts(overrideArguments=None):          '--playlist-end',          dest='playlistend', metavar='NUMBER', default=None, type=int,          help='playlist video to end at (default is last)') -    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') -    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') -    selection.add_option('--max-downloads', metavar='NUMBER', -                         dest='max_downloads', type=int, default=None, -                         help='Abort after downloading NUMBER files') -    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) -    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) -    selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)      selection.add_option( -        '--datebefore', metavar='DATE', dest='datebefore', default=None, +        '--match-title', +        dest='matchtitle', metavar='REGEX', +        help='download only matching titles (regex or caseless sub-string)') +    selection.add_option( +        '--reject-title', +        dest='rejecttitle', metavar='REGEX', +        help='skip download for matching titles (regex or caseless sub-string)') +    selection.add_option( +        '--max-downloads', +        dest='max_downloads', metavar='NUMBER', type=int, default=None, +        help='Abort after downloading NUMBER files') +    selection.add_option( +        '--min-filesize', +        metavar='SIZE', dest='min_filesize', default=None, +        help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)') +    selection.add_option( +        '--max-filesize', +        metavar='SIZE', dest='max_filesize', default=None, +        help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)') +    selection.add_option( +        '--date', +        metavar='DATE', dest='date', default=None, +        help='download only videos uploaded in this date') +    selection.add_option( +        '--datebefore', +        metavar='DATE', dest='datebefore', default=None,          help='download only videos uploaded on or before this date (i.e. inclusive)')      selection.add_option( -        '--dateafter', metavar='DATE', dest='dateafter', default=None, +        '--dateafter', +        metavar='DATE', dest='dateafter', default=None,          help='download only videos uploaded on or after this date (i.e. inclusive)')      selection.add_option( -        '--min-views', metavar='COUNT', dest='min_views', -        default=None, type=int, -        help="Do not download any videos with less than COUNT views",) +        '--min-views', +        metavar='COUNT', dest='min_views', default=None, type=int, +        help='Do not download any videos with less than COUNT views',)      selection.add_option( -        '--max-views', metavar='COUNT', dest='max_views', -        default=None, type=int, -        help="Do not download any videos with more than COUNT views",) -    selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) -    selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', -                         help='download only videos suitable for the given age', -                         default=None, type=int) -    selection.add_option('--download-archive', metavar='FILE', -                         dest='download_archive', -                         help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.') +        '--max-views', +        metavar='COUNT', dest='max_views', default=None, type=int, +        help='Do not download any videos with more than COUNT views')      selection.add_option( -        '--include-ads', dest='include_ads', -        action='store_true', -        help='Download advertisements as well (experimental)') +        '--no-playlist', +        action='store_true', dest='noplaylist', default=False, +        help='download only the currently playing video') +    selection.add_option( +        '--age-limit', +        metavar='YEARS', dest='age_limit', default=None, type=int, +        help='download only videos suitable for the given age') +    selection.add_option( +        '--download-archive', metavar='FILE', +        dest='download_archive', +        help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')      selection.add_option( -        '--youtube-include-dash-manifest', action='store_true', -        dest='youtube_include_dash_manifest', default=False, -        help='Try to download the DASH manifest on YouTube videos (experimental)') - -    authentication.add_option('-u', '--username', -            dest='username', metavar='USERNAME', help='account username') -    authentication.add_option('-p', '--password', -            dest='password', metavar='PASSWORD', help='account password') -    authentication.add_option('-2', '--twofactor', -            dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code') -    authentication.add_option('-n', '--netrc', -            action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) -    authentication.add_option('--video-password', -            dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)') - - -    video_format.add_option('-f', '--format', -            action='store', dest='format', metavar='FORMAT', default=None, -            help='video format code, specify the order of preference using slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as  -f  136/137/mp4/bestvideo,140/m4a/bestaudio') -    video_format.add_option('--all-formats', -            action='store_const', dest='format', help='download all available video formats', const='all') -    video_format.add_option('--prefer-free-formats', -            action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested') -    video_format.add_option('--max-quality', -            action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') -    video_format.add_option('-F', '--list-formats', -            action='store_true', dest='listformats', help='list all available formats') - -    subtitles.add_option('--write-sub', '--write-srt', -            action='store_true', dest='writesubtitles', -            help='write subtitle file', default=False) -    subtitles.add_option('--write-auto-sub', '--write-automatic-sub', -            action='store_true', dest='writeautomaticsub', -            help='write automatic subtitle file (youtube only)', default=False) -    subtitles.add_option('--all-subs', -            action='store_true', dest='allsubtitles', -            help='downloads all the available subtitles of the video', default=False) -    subtitles.add_option('--list-subs', -            action='store_true', dest='listsubtitles', -            help='lists all available subtitles for the video', default=False) -    subtitles.add_option('--sub-format', -            action='store', dest='subtitlesformat', metavar='FORMAT', -            help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') -    subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', -            action='callback', dest='subtitleslangs', metavar='LANGS', type='str', -            default=[], callback=_comma_separated_values_options_callback, -            help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') - -    downloader.add_option('-r', '--rate-limit', -            dest='ratelimit', metavar='LIMIT', help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') -    downloader.add_option('-R', '--retries', -            dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) -    downloader.add_option('--buffer-size', -            dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16K) (default is %default)', default="1024") -    downloader.add_option('--no-resize-buffer', -            action='store_true', dest='noresizebuffer', -            help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) -    downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP) +        '--include-ads', +        dest='include_ads', action='store_true', +        help='Download advertisements as well (experimental)') +    authentication = optparse.OptionGroup(parser, 'Authentication Options') +    authentication.add_option( +        '-u', '--username', +        dest='username', metavar='USERNAME', +        help='login with this account ID') +    authentication.add_option( +        '-p', '--password', +        dest='password', metavar='PASSWORD', +        help='account password') +    authentication.add_option( +        '-2', '--twofactor', +        dest='twofactor', metavar='TWOFACTOR', +        help='two-factor auth code') +    authentication.add_option( +        '-n', '--netrc', +        action='store_true', dest='usenetrc', default=False, +        help='use .netrc authentication data') +    authentication.add_option( +        '--video-password', +        dest='videopassword', metavar='PASSWORD', +        help='video password (vimeo, smotri)') + +    video_format = optparse.OptionGroup(parser, 'Video Format Options') +    video_format.add_option( +        '-f', '--format', +        action='store', dest='format', metavar='FORMAT', default=None, +        help='video format code, specify the order of preference using slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as  -f  136/137/mp4/bestvideo,140/m4a/bestaudio') +    video_format.add_option( +        '--all-formats', +        action='store_const', dest='format', const='all', +        help='download all available video formats') +    video_format.add_option( +        '--prefer-free-formats', +        action='store_true', dest='prefer_free_formats', default=False, +        help='prefer free video formats unless a specific one is requested') +    video_format.add_option( +        '--max-quality', +        action='store', dest='format_limit', metavar='FORMAT', +        help='highest quality format to download') +    video_format.add_option( +        '-F', '--list-formats', +        action='store_true', dest='listformats', +        help='list all available formats') +    video_format.add_option( +        '--youtube-include-dash-manifest', +        action='store_true', dest='youtube_include_dash_manifest', default=True, +        help=optparse.SUPPRESS_HELP) +    video_format.add_option( +        '--youtube-skip-dash-manifest', +        action='store_false', dest='youtube_include_dash_manifest', +        help='Do not download the DASH manifest on YouTube videos') + +    subtitles = optparse.OptionGroup(parser, 'Subtitle Options') +    subtitles.add_option( +        '--write-sub', '--write-srt', +        action='store_true', dest='writesubtitles', default=False, +        help='write subtitle file') +    subtitles.add_option( +        '--write-auto-sub', '--write-automatic-sub', +        action='store_true', dest='writeautomaticsub', default=False, +        help='write automatic subtitle file (youtube only)') +    subtitles.add_option( +        '--all-subs', +        action='store_true', dest='allsubtitles', default=False, +        help='downloads all the available subtitles of the video') +    subtitles.add_option( +        '--list-subs', +        action='store_true', dest='listsubtitles', default=False, +        help='lists all available subtitles for the video') +    subtitles.add_option( +        '--sub-format', +        action='store', dest='subtitlesformat', metavar='FORMAT', default='srt', +        help='subtitle format (default=srt) ([sbv/vtt] youtube only)') +    subtitles.add_option( +        '--sub-lang', '--sub-langs', '--srt-lang', +        action='callback', dest='subtitleslangs', metavar='LANGS', type='str', +        default=[], callback=_comma_separated_values_options_callback, +        help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') + +    downloader = optparse.OptionGroup(parser, 'Download Options') +    downloader.add_option( +        '-r', '--rate-limit', +        dest='ratelimit', metavar='LIMIT', +        help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') +    downloader.add_option( +        '-R', '--retries', +        dest='retries', metavar='RETRIES', default=10, +        help='number of retries (default is %default)') +    downloader.add_option( +        '--buffer-size', +        dest='buffersize', metavar='SIZE', default='1024', +        help='size of download buffer (e.g. 1024 or 16K) (default is %default)') +    downloader.add_option( +        '--no-resize-buffer', +        action='store_true', dest='noresizebuffer', default=False, +        help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') +    downloader.add_option( +        '--test', +        action='store_true', dest='test', default=False, +        help=optparse.SUPPRESS_HELP) + +    workarounds = optparse.OptionGroup(parser, 'Workarounds')      workarounds.add_option( -        '--encoding', dest='encoding', metavar='ENCODING', +        '--encoding', +        dest='encoding', metavar='ENCODING',          help='Force the specified encoding (experimental)')      workarounds.add_option( -        '--no-check-certificate', action='store_true', -        dest='no_check_certificate', default=False, +        '--no-check-certificate', +        action='store_true', dest='no_check_certificate', default=False,          help='Suppress HTTPS certificate validation.')      workarounds.add_option( -        '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', +        '--prefer-insecure', +        '--prefer-unsecure', action='store_true', dest='prefer_insecure',          help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')      workarounds.add_option( -        '--user-agent', metavar='UA', -        dest='user_agent', help='specify a custom user agent') +        '--user-agent', +        metavar='UA', dest='user_agent', +        help='specify a custom user agent')      workarounds.add_option( -        '--referer', metavar='REF', -        dest='referer', default=None, +        '--referer', +        metavar='URL', dest='referer', default=None,          help='specify a custom referer, use if the video access is restricted to one domain',      )      workarounds.add_option( -        '--add-header', metavar='FIELD:VALUE', -        dest='headers', action='append', +        '--add-header', +        metavar='FIELD:VALUE', dest='headers', action='append',          help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',      )      workarounds.add_option( -        '--bidi-workaround', dest='bidi_workaround', action='store_true', -        help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') +        '--bidi-workaround', +        dest='bidi_workaround', action='store_true', +        help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') -    verbosity.add_option('-q', '--quiet', -            action='store_true', dest='quiet', help='activates quiet mode', default=False) +    verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') +    verbosity.add_option( +        '-q', '--quiet', +        action='store_true', dest='quiet', default=False, +        help='activates quiet mode')      verbosity.add_option(          '--no-warnings',          dest='no_warnings', action='store_true', default=False,          help='Ignore warnings') -    verbosity.add_option('-s', '--simulate', -            action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) -    verbosity.add_option('--skip-download', -            action='store_true', dest='skip_download', help='do not download the video', default=False) -    verbosity.add_option('-g', '--get-url', -            action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) -    verbosity.add_option('-e', '--get-title', -            action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) -    verbosity.add_option('--get-id', -            action='store_true', dest='getid', help='simulate, quiet but print id', default=False) -    verbosity.add_option('--get-thumbnail', -            action='store_true', dest='getthumbnail', -            help='simulate, quiet but print thumbnail URL', default=False) -    verbosity.add_option('--get-description', -            action='store_true', dest='getdescription', -            help='simulate, quiet but print video description', default=False) -    verbosity.add_option('--get-duration', -            action='store_true', dest='getduration', -            help='simulate, quiet but print video length', default=False) -    verbosity.add_option('--get-filename', -            action='store_true', dest='getfilename', -            help='simulate, quiet but print output filename', default=False) -    verbosity.add_option('--get-format', -            action='store_true', dest='getformat', -            help='simulate, quiet but print output format', default=False) -    verbosity.add_option('-j', '--dump-json', -            action='store_true', dest='dumpjson', -            help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False) -    verbosity.add_option('--newline', -            action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) -    verbosity.add_option('--no-progress', -            action='store_true', dest='noprogress', help='do not print progress bar', default=False) -    verbosity.add_option('--console-title', -            action='store_true', dest='consoletitle', -            help='display progress in console titlebar', default=False) -    verbosity.add_option('-v', '--verbose', -            action='store_true', dest='verbose', help='print various debugging information', default=False) -    verbosity.add_option('--dump-intermediate-pages', -            action='store_true', dest='dump_intermediate_pages', default=False, -            help='print downloaded pages to debug problems (very verbose)') -    verbosity.add_option('--write-pages', -            action='store_true', dest='write_pages', default=False, -            help='Write downloaded intermediary pages to files in the current directory to debug problems') -    verbosity.add_option('--youtube-print-sig-code', -            action='store_true', dest='youtube_print_sig_code', default=False, -            help=optparse.SUPPRESS_HELP) -    verbosity.add_option('--print-traffic', -            dest='debug_printtraffic', action='store_true', default=False, -            help='Display sent and read HTTP traffic') - - -    filesystem.add_option('-a', '--batch-file', -            dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') -    filesystem.add_option('--id', -            action='store_true', dest='useid', help='use only video ID in file name', default=False) -    filesystem.add_option('-A', '--auto-number', -            action='store_true', dest='autonumber', -            help='number downloaded files starting from 00000', default=False) -    filesystem.add_option('-o', '--output', -            dest='outtmpl', metavar='TEMPLATE', -            help=('output filename template. Use %(title)s to get the title, ' -                  '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' -                  '%(autonumber)s to get an automatically incremented number, ' -                  '%(ext)s for the filename extension, ' -                  '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' -                  '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' -                  '%(upload_date)s for the upload date (YYYYMMDD), ' -                  '%(extractor)s for the provider (youtube, metacafe, etc), ' -                  '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' -                  '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' -                  '%(height)s and %(width)s for the width and height of the video format. ' -                  '%(resolution)s for a textual description of the resolution of the video format. ' -                  'Use - to output to stdout. Can also be used to download to a different directory, ' -                  'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) -    filesystem.add_option('--autonumber-size', -            dest='autonumber_size', metavar='NUMBER', -            help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') -    filesystem.add_option('--restrict-filenames', -            action='store_true', dest='restrictfilenames', -            help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) -    filesystem.add_option('-t', '--title', -            action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False) -    filesystem.add_option('-l', '--literal', -            action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) -    filesystem.add_option('-w', '--no-overwrites', -            action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) -    filesystem.add_option('-c', '--continue', -            action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True) -    filesystem.add_option('--no-continue', -            action='store_false', dest='continue_dl', -            help='do not resume partially downloaded files (restart from beginning)') -    filesystem.add_option('--no-part', -            action='store_true', dest='nopart', help='do not use .part files', default=False) -    filesystem.add_option('--no-mtime', -            action='store_false', dest='updatetime', -            help='do not use the Last-modified header to set the file modification time', default=True) -    filesystem.add_option('--write-description', -            action='store_true', dest='writedescription', -            help='write video description to a .description file', default=False) -    filesystem.add_option('--write-info-json', -            action='store_true', dest='writeinfojson', -            help='write video metadata to a .info.json file', default=False) -    filesystem.add_option('--write-annotations', -            action='store_true', dest='writeannotations', -            help='write video annotations to a .annotation file', default=False) -    filesystem.add_option('--write-thumbnail', -            action='store_true', dest='writethumbnail', -            help='write thumbnail image to disk', default=False) -    filesystem.add_option('--load-info', -            dest='load_info_filename', metavar='FILE', -            help='json file containing the video information (created with the "--write-json" option)') -    filesystem.add_option('--cookies', -            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') +    verbosity.add_option( +        '-s', '--simulate', +        action='store_true', dest='simulate', default=False, +        help='do not download the video and do not write anything to disk',) +    verbosity.add_option( +        '--skip-download', +        action='store_true', dest='skip_download', default=False, +        help='do not download the video',) +    verbosity.add_option( +        '-g', '--get-url', +        action='store_true', dest='geturl', default=False, +        help='simulate, quiet but print URL') +    verbosity.add_option( +        '-e', '--get-title', +        action='store_true', dest='gettitle', default=False, +        help='simulate, quiet but print title') +    verbosity.add_option( +        '--get-id', +        action='store_true', dest='getid', default=False, +        help='simulate, quiet but print id') +    verbosity.add_option( +        '--get-thumbnail', +        action='store_true', dest='getthumbnail', default=False, +        help='simulate, quiet but print thumbnail URL') +    verbosity.add_option( +        '--get-description', +        action='store_true', dest='getdescription', default=False, +        help='simulate, quiet but print video description') +    verbosity.add_option( +        '--get-duration', +        action='store_true', dest='getduration', default=False, +        help='simulate, quiet but print video length') +    verbosity.add_option( +        '--get-filename', +        action='store_true', dest='getfilename', default=False, +        help='simulate, quiet but print output filename') +    verbosity.add_option( +        '--get-format', +        action='store_true', dest='getformat', default=False, +        help='simulate, quiet but print output format') +    verbosity.add_option( +        '-j', '--dump-json', +        action='store_true', dest='dumpjson', default=False, +        help='simulate, quiet but print JSON information. See --output for a description of available keys.') +    verbosity.add_option( +        '-J', '--dump-single-json', +        action='store_true', dest='dump_single_json', default=False, +        help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') +    verbosity.add_option( +        '--newline', +        action='store_true', dest='progress_with_newline', default=False, +        help='output progress bar as new lines') +    verbosity.add_option( +        '--no-progress', +        action='store_true', dest='noprogress', default=False, +        help='do not print progress bar') +    verbosity.add_option( +        '--console-title', +        action='store_true', dest='consoletitle', default=False, +        help='display progress in console titlebar') +    verbosity.add_option( +        '-v', '--verbose', +        action='store_true', dest='verbose', default=False, +        help='print various debugging information') +    verbosity.add_option( +        '--dump-intermediate-pages', +        action='store_true', dest='dump_intermediate_pages', default=False, +        help='print downloaded pages to debug problems (very verbose)') +    verbosity.add_option( +        '--write-pages', +        action='store_true', dest='write_pages', default=False, +        help='Write downloaded intermediary pages to files in the current directory to debug problems') +    verbosity.add_option( +        '--youtube-print-sig-code', +        action='store_true', dest='youtube_print_sig_code', default=False, +        help=optparse.SUPPRESS_HELP) +    verbosity.add_option( +        '--print-traffic', +        dest='debug_printtraffic', action='store_true', default=False, +        help='Display sent and read HTTP traffic') + +    filesystem = optparse.OptionGroup(parser, 'Filesystem Options') +    filesystem.add_option( +        '-a', '--batch-file', +        dest='batchfile', metavar='FILE', +        help='file containing URLs to download (\'-\' for stdin)') +    filesystem.add_option( +        '--id', default=False, +        action='store_true', dest='useid', help='use only video ID in file name') +    filesystem.add_option( +        '-A', '--auto-number', +        action='store_true', dest='autonumber', default=False, +        help='number downloaded files starting from 00000') +    filesystem.add_option( +        '-o', '--output', +        dest='outtmpl', metavar='TEMPLATE', +        help=('output filename template. Use %(title)s to get the title, ' +              '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' +              '%(autonumber)s to get an automatically incremented number, ' +              '%(ext)s for the filename extension, ' +              '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' +              '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' +              '%(upload_date)s for the upload date (YYYYMMDD), ' +              '%(extractor)s for the provider (youtube, metacafe, etc), ' +              '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' +              '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' +              '%(height)s and %(width)s for the width and height of the video format. ' +              '%(resolution)s for a textual description of the resolution of the video format. ' +              'Use - to output to stdout. Can also be used to download to a different directory, ' +              'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) +    filesystem.add_option( +        '--autonumber-size', +        dest='autonumber_size', metavar='NUMBER', +        help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') +    filesystem.add_option( +        '--restrict-filenames', +        action='store_true', dest='restrictfilenames', default=False, +        help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames') +    filesystem.add_option( +        '-t', '--title', +        action='store_true', dest='usetitle', default=False, +        help='[deprecated] use title in file name (default)') +    filesystem.add_option( +        '-l', '--literal', default=False, +        action='store_true', dest='usetitle', +        help='[deprecated] alias of --title') +    filesystem.add_option( +        '-w', '--no-overwrites', +        action='store_true', dest='nooverwrites', default=False, +        help='do not overwrite files') +    filesystem.add_option( +        '-c', '--continue', +        action='store_true', dest='continue_dl', default=True, +        help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') +    filesystem.add_option( +        '--no-continue', +        action='store_false', dest='continue_dl', +        help='do not resume partially downloaded files (restart from beginning)') +    filesystem.add_option( +        '--no-part', +        action='store_true', dest='nopart', default=False, +        help='do not use .part files - write directly into output file') +    filesystem.add_option( +        '--no-mtime', +        action='store_false', dest='updatetime', default=True, +        help='do not use the Last-modified header to set the file modification time') +    filesystem.add_option( +        '--write-description', +        action='store_true', dest='writedescription', default=False, +        help='write video description to a .description file') +    filesystem.add_option( +        '--write-info-json', +        action='store_true', dest='writeinfojson', default=False, +        help='write video metadata to a .info.json file') +    filesystem.add_option( +        '--write-annotations', +        action='store_true', dest='writeannotations', default=False, +        help='write video annotations to a .annotation file') +    filesystem.add_option( +        '--write-thumbnail', +        action='store_true', dest='writethumbnail', default=False, +        help='write thumbnail image to disk') +    filesystem.add_option( +        '--load-info', +        dest='load_info_filename', metavar='FILE', +        help='json file containing the video information (created with the "--write-json" option)') +    filesystem.add_option( +        '--cookies', +        dest='cookiefile', metavar='FILE', +        help='file to read cookies from and dump cookie jar in')      filesystem.add_option(          '--cache-dir', dest='cachedir', default=None, metavar='DIR',          help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') @@ -412,36 +552,61 @@ def parseOpts(overrideArguments=None):          '--no-cache-dir', action='store_const', const=False, dest='cachedir',          help='Disable filesystem caching')      filesystem.add_option( -        '--rm-cache-dir', action='store_true', dest='rm_cachedir', +        '--rm-cache-dir', +        action='store_true', dest='rm_cachedir',          help='Delete all filesystem cache files') - -    postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, -            help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') -    postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', -            help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default') -    postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', -            help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') -    postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, -            help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') -    postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, -            help='keeps the video file on disk after the post-processing; the video is erased by default') -    postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, -            help='do not overwrite post-processed files; the post-processed files are overwritten by default') -    postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, -            help='embed subtitles in the video (only for mp4 videos)') -    postproc.add_option('--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, -            help='embed thumbnail in the audio as cover art') -    postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, -            help='write metadata to the video file') -    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False, -            help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') -    postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg', +    postproc = optparse.OptionGroup(parser, 'Post-processing Options') +    postproc.add_option( +        '-x', '--extract-audio', +        action='store_true', dest='extractaudio', default=False, +        help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') +    postproc.add_option( +        '--audio-format', metavar='FORMAT', dest='audioformat', default='best', +        help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') +    postproc.add_option( +        '--audio-quality', metavar='QUALITY', +        dest='audioquality', default='5', +        help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') +    postproc.add_option( +        '--recode-video', +        metavar='FORMAT', dest='recodevideo', default=None, +        help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') +    postproc.add_option( +        '-k', '--keep-video', +        action='store_true', dest='keepvideo', default=False, +        help='keeps the video file on disk after the post-processing; the video is erased by default') +    postproc.add_option( +        '--no-post-overwrites', +        action='store_true', dest='nopostoverwrites', default=False, +        help='do not overwrite post-processed files; the post-processed files are overwritten by default') +    postproc.add_option( +        '--embed-subs', +        action='store_true', dest='embedsubtitles', default=False, +        help='embed subtitles in the video (only for mp4 videos)') +    postproc.add_option( +        '--embed-thumbnail', +        action='store_true', dest='embedthumbnail', default=False, +        help='embed thumbnail in the audio as cover art') +    postproc.add_option( +        '--add-metadata', +        action='store_true', dest='addmetadata', default=False, +        help='write metadata to the video file') +    postproc.add_option( +        '--xattrs', +        action='store_true', dest='xattrs', default=False, +        help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') +    postproc.add_option( +        '--prefer-avconv', +        action='store_false', dest='prefer_ffmpeg',          help='Prefer avconv over ffmpeg for running the postprocessors (default)') -    postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg', +    postproc.add_option( +        '--prefer-ffmpeg', +        action='store_true', dest='prefer_ffmpeg',          help='Prefer ffmpeg over avconv for running the postprocessors')      postproc.add_option( -        '--exec', metavar='CMD', dest='exec_cmd', +        '--exec', +        metavar='CMD', dest='exec_cmd',          help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )      parser.add_option_group(general) @@ -458,7 +623,7 @@ def parseOpts(overrideArguments=None):      if overrideArguments is not None:          opts, args = parser.parse_args(overrideArguments)          if opts.verbose: -            write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') +            write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')      else:          commandLineConf = sys.argv[1:]          if '--ignore-config' in commandLineConf: @@ -474,8 +639,8 @@ def parseOpts(overrideArguments=None):          opts, args = parser.parse_args(argv)          if opts.verbose: -            write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') -            write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') -            write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') +            write_string('[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') +            write_string('[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') +            write_string('[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')      return parser, opts, args diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index 15aa0daa9..6ac67cbae 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -1,24 +1,26 @@  from .atomicparsley import AtomicParsleyPP  from .ffmpeg import ( +    FFmpegPostProcessor,      FFmpegAudioFixPP, +    FFmpegEmbedSubtitlePP, +    FFmpegExtractAudioPP,      FFmpegMergerPP,      FFmpegMetadataPP,      FFmpegVideoConvertor, -    FFmpegExtractAudioPP, -    FFmpegEmbedSubtitlePP,  )  from .xattrpp import XAttrMetadataPP  from .execafterdownload import ExecAfterDownloadPP  __all__ = [      'AtomicParsleyPP', +    'ExecAfterDownloadPP',      'FFmpegAudioFixPP', +    'FFmpegEmbedSubtitlePP', +    'FFmpegExtractAudioPP',      'FFmpegMergerPP',      'FFmpegMetadataPP', +    'FFmpegPostProcessor',      'FFmpegVideoConvertor', -    'FFmpegExtractAudioPP', -    'FFmpegEmbedSubtitlePP',      'XAttrMetadataPP', -    'ExecAfterDownloadPP',  ] diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 8c5f7c43b..083c79592 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -1,4 +1,5 @@  import os +import re  import subprocess  import sys  import time @@ -7,10 +8,10 @@ import time  from .common import AudioConversionError, PostProcessor  from ..utils import ( -    check_executable,      compat_subprocess_get_DEVNULL,      encodeArgument,      encodeFilename, +    is_outdated_version,      PostProcessingError,      prepend_extension,      shell_quote, @@ -18,6 +19,23 @@ from ..utils import (  ) +def get_version(executable): +    """ Returns the version of the specified executable, +    or False if the executable is not present """ +    try: +        out, err = subprocess.Popen( +            [executable, '-version'], +            stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() +    except OSError: +        return False +    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore') +    m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline) +    if not m: +        return u'present' +    else: +        return m.group(1) + +  class FFmpegPostProcessorError(PostProcessingError):      pass @@ -25,31 +43,58 @@ class FFmpegPostProcessorError(PostProcessingError):  class FFmpegPostProcessor(PostProcessor):      def __init__(self, downloader=None, deletetempfiles=False):          PostProcessor.__init__(self, downloader) -        self._exes = self.detect_executables() +        self._versions = self.get_versions()          self._deletetempfiles = deletetempfiles +    def check_version(self): +        if not self._executable: +            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') + +        REQUIRED_VERSION = '1.0' +        if is_outdated_version( +                self._versions[self._executable], REQUIRED_VERSION): +            warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( +                self._executable, self._executable, REQUIRED_VERSION) +            if self._downloader: +                self._downloader.report_warning(warning) +      @staticmethod -    def detect_executables(): +    def get_versions():          programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] -        return dict((program, check_executable(program, ['-version'])) for program in programs) +        return dict((program, get_version(program)) for program in programs) + +    @property +    def _executable(self): +        if self._downloader.params.get('prefer_ffmpeg', False): +            prefs = ('ffmpeg', 'avconv') +        else: +            prefs = ('avconv', 'ffmpeg') +        for p in prefs: +            if self._versions[p]: +                return p +        return None -    def _get_executable(self): +    @property +    def _probe_executable(self):          if self._downloader.params.get('prefer_ffmpeg', False): -            return self._exes['ffmpeg'] or self._exes['avconv'] +            prefs = ('ffprobe', 'avprobe')          else: -            return self._exes['avconv'] or self._exes['ffmpeg'] +            prefs = ('avprobe', 'ffprobe') +        for p in prefs: +            if self._versions[p]: +                return p +        return None      def _uses_avconv(self): -        return self._get_executable() == self._exes['avconv'] +        return self._executable == 'avconv'      def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): -        if not self._get_executable(): -            raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') +        self.check_version()          files_cmd = []          for path in input_paths:              files_cmd.extend(['-i', encodeFilename(path, True)]) -        cmd = ([self._get_executable(), '-y'] + files_cmd +        cmd = ([self._executable, '-y'] + files_cmd                 + [encodeArgument(o) for o in opts] +                 [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) @@ -85,11 +130,12 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):          self._nopostoverwrites = nopostoverwrites      def get_audio_codec(self, path): -        if not self._exes['ffprobe'] and not self._exes['avprobe']: + +        if not self._probe_executable:              raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')          try:              cmd = [ -                self._exes['avprobe'] or self._exes['ffprobe'], +                self._probe_executable,                  '-show_streams',                  encodeFilename(self._ffmpeg_filename_argument(path), True)]              handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) @@ -182,14 +228,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):              if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):                  self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)              else: -                self._downloader.to_screen(u'[' + self._get_executable() + '] Destination: ' + new_path) +                self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path)                  self.run_ffmpeg(path, new_path, acodec, more_opts)          except:              etype,e,tb = sys.exc_info()              if isinstance(e, AudioConversionError):                  msg = u'audio conversion failed: ' + e.msg              else: -                msg = u'error running ' + self._get_executable() +                msg = u'error running ' + self._executable              raise PostProcessingError(msg)          # Try to update the date time for extracted audio file. @@ -487,7 +533,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):  class FFmpegMergerPP(FFmpegPostProcessor):      def run(self, info):          filename = info['filepath'] -        args = ['-c', 'copy'] +        args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']          self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)          self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)          return True, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d7ae5a90a..6c0c39ca5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -203,6 +203,82 @@ def compat_ord(c):      if type(c) is int: return c      else: return ord(c) + +if sys.version_info >= (3, 0): +    compat_getenv = os.getenv +    compat_expanduser = os.path.expanduser +else: +    # Environment variables should be decoded with filesystem encoding. +    # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) + +    def compat_getenv(key, default=None): +        env = os.getenv(key, default) +        if env: +            env = env.decode(get_filesystem_encoding()) +        return env + +    # HACK: The default implementations of os.path.expanduser from cpython do not decode +    # environment variables with filesystem encoding. We will work around this by +    # providing adjusted implementations. +    # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib +    # for different platforms with correct environment variables decoding. + +    if os.name == 'posix': +        def compat_expanduser(path): +            """Expand ~ and ~user constructions.  If user or $HOME is unknown, +            do nothing.""" +            if not path.startswith('~'): +                return path +            i = path.find('/', 1) +            if i < 0: +                i = len(path) +            if i == 1: +                if 'HOME' not in os.environ: +                    import pwd +                    userhome = pwd.getpwuid(os.getuid()).pw_dir +                else: +                    userhome = compat_getenv('HOME') +            else: +                import pwd +                try: +                    pwent = pwd.getpwnam(path[1:i]) +                except KeyError: +                    return path +                userhome = pwent.pw_dir +            userhome = userhome.rstrip('/') +            return (userhome + path[i:]) or '/' +    elif os.name == 'nt' or os.name == 'ce': +        def compat_expanduser(path): +            """Expand ~ and ~user constructs. + +            If user or $HOME is unknown, do nothing.""" +            if path[:1] != '~': +                return path +            i, n = 1, len(path) +            while i < n and path[i] not in '/\\': +                i = i + 1 + +            if 'HOME' in os.environ: +                userhome = compat_getenv('HOME') +            elif 'USERPROFILE' in os.environ: +                userhome = compat_getenv('USERPROFILE') +            elif not 'HOMEPATH' in os.environ: +                return path +            else: +                try: +                    drive = compat_getenv('HOMEDRIVE') +                except KeyError: +                    drive = '' +                userhome = os.path.join(drive, compat_getenv('HOMEPATH')) + +            if i != 1: #~user +                userhome = os.path.join(os.path.dirname(userhome), path[1:i]) + +            return userhome + path[i:] +    else: +        compat_expanduser = os.path.expanduser + +  # This is not clearly defined otherwise  compiled_regex_type = type(re.compile('')) @@ -1207,11 +1283,14 @@ class locked_file(object):          return self.f.read(*args) +def get_filesystem_encoding(): +    encoding = sys.getfilesystemencoding() +    return encoding if encoding is not None else 'utf-8' + +  def shell_quote(args):      quoted_args = [] -    encoding = sys.getfilesystemencoding() -    if encoding is None: -        encoding = 'utf-8' +    encoding = get_filesystem_encoding()      for a in args:          if isinstance(a, bytes):              # We may get a filename encoded with 'encodeFilename' @@ -1261,7 +1340,7 @@ def format_bytes(bytes):  def get_term_width(): -    columns = os.environ.get('COLUMNS', None) +    columns = compat_getenv('COLUMNS', None)      if columns:          return int(columns) @@ -1644,3 +1723,16 @@ def limit_length(s, length):      if len(s) > length:          return s[:length - len(ELLIPSES)] + ELLIPSES      return s + + +def version_tuple(v): +    return [int(e) for e in v.split('.')] + + +def is_outdated_version(version, limit, assume_new=True): +    if not version: +        return not assume_new +    try: +        return version_tuple(version) < version_tuple(limit) +    except ValueError: +        return not assume_new diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4f0d486b9..8e5a642a8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.10.05.2' +__version__ = '2014.10.26.2'  | 
