diff options
| -rw-r--r-- | test/test_download.py | 16 | ||||
| -rw-r--r-- | test/test_write_info_json.py | 12 | ||||
| -rw-r--r-- | test/test_youtube_lists.py | 22 | ||||
| -rw-r--r-- | test/test_youtube_subtitles.py | 22 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 542 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 595 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 25 | 
7 files changed, 658 insertions, 576 deletions
| diff --git a/test/test_download.py b/test/test_download.py index 8d8698478..9af626dca 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -13,7 +13,7 @@ import binascii  # Allow direct execution  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import youtube_dl.FileDownloader +import youtube_dl.YoutubeDL  import youtube_dl.extractor  from youtube_dl.utils import * @@ -40,17 +40,17 @@ def _try_rm(filename):  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() -class FileDownloader(youtube_dl.FileDownloader): +class YoutubeDL(youtube_dl.YoutubeDL):      def __init__(self, *args, **kwargs):          self.to_stderr = self.to_screen          self.processed_info_dicts = [] -        return youtube_dl.FileDownloader.__init__(self, *args, **kwargs) +        super(YoutubeDL, self).__init__(*args, **kwargs)      def report_warning(self, message):          # Don't accept warnings during tests          raise ExtractorError(message)      def process_info(self, info_dict):          self.processed_info_dicts.append(info_dict) -        return youtube_dl.FileDownloader.process_info(self, info_dict) +        return super(YoutubeDL, self).process_info(info_dict)  def _file_md5(fn):      with open(fn, 'rb') as f: @@ -86,14 +86,14 @@ def generator(test_case):          params = self.parameters.copy()          params.update(test_case.get('params', {})) -        fd = FileDownloader(params) +        ydl = YoutubeDL(params)          for ie in youtube_dl.extractor.gen_extractors(): -            fd.add_info_extractor(ie) +            ydl.add_info_extractor(ie)          finished_hook_called = set()          def _hook(status):              if status['status'] == 'finished':                  finished_hook_called.add(status['filename']) -        fd.add_progress_hook(_hook) +        ydl.fd.add_progress_hook(_hook)          test_cases = test_case.get('playlist', [test_case])          for tc in test_cases: @@ -103,7 +103,7 @@ def generator(test_case):          try:              for retry in range(1, RETRIES + 1):                  try: -                    fd.download([test_case['url']]) +                    ydl.download([test_case['url']])                  except (DownloadError, ExtractorError) as err:                      if retry == RETRIES: raise diff --git a/test/test_write_info_json.py b/test/test_write_info_json.py index b5eca7649..de6d5180f 100644 --- a/test/test_write_info_json.py +++ b/test/test_write_info_json.py @@ -9,7 +9,7 @@ import unittest  # Allow direct execution  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import youtube_dl.FileDownloader +import youtube_dl.YoutubeDL  import youtube_dl.extractor  from youtube_dl.utils import * @@ -22,9 +22,9 @@ proxy_handler = compat_urllib_request.ProxyHandler()  opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())  compat_urllib_request.install_opener(opener) -class FileDownloader(youtube_dl.FileDownloader): +class YoutubeDL(youtube_dl.YoutubeDL):      def __init__(self, *args, **kwargs): -        youtube_dl.FileDownloader.__init__(self, *args, **kwargs) +        super(YoutubeDL, self).__init__(*args, **kwargs)          self.to_stderr = self.to_screen  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -49,9 +49,9 @@ class TestInfoJSON(unittest.TestCase):      def test_info_json(self):          ie = youtube_dl.extractor.YoutubeIE() -        fd = FileDownloader(params) -        fd.add_info_extractor(ie) -        fd.download([TEST_ID]) +        ydl = YoutubeDL(params) +        ydl.add_info_extractor(ie) +        ydl.download([TEST_ID])          self.assertTrue(os.path.exists(INFO_JSON_FILE))          with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:              jd = json.load(jsonf) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 2c46edfa6..320b44082 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,7 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE  from youtube_dl.utils import * -from youtube_dl.FileDownloader import FileDownloader +from youtube_dl import YoutubeDL  PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -23,7 +23,7 @@ proxy_handler = compat_urllib_request.ProxyHandler()  opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())  compat_urllib_request.install_opener(opener) -class FakeDownloader(FileDownloader): +class FakeYDL(YoutubeDL):      def __init__(self):          self.result = []          self.params = parameters @@ -41,7 +41,7 @@ class TestYoutubeLists(unittest.TestCase):          self.assertEqual(info['_type'], 'playlist')      def test_youtube_playlist(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]          self.assertIsPlaylist(result) @@ -50,13 +50,13 @@ class TestYoutubeLists(unittest.TestCase):          self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])      def test_issue_673(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          result = ie.extract('PLBB231211A4F62143')[0]          self.assertTrue(len(result['entries']) > 25)      def test_youtube_playlist_long(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]          self.assertIsPlaylist(result) @@ -64,7 +64,7 @@ class TestYoutubeLists(unittest.TestCase):      def test_youtube_playlist_with_deleted(self):          #651 -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]          ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] @@ -72,14 +72,14 @@ class TestYoutubeLists(unittest.TestCase):          self.assertFalse('KdPEApIVdWM' in ytie_results)      def test_youtube_playlist_empty(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]          self.assertIsPlaylist(result)          self.assertEqual(len(result['entries']), 0)      def test_youtube_course(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          # TODO find a > 100 (paginating?) videos course          result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] @@ -89,7 +89,7 @@ class TestYoutubeLists(unittest.TestCase):          self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')      def test_youtube_channel(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubeChannelIE(dl)          #test paginated channel          result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] @@ -99,13 +99,13 @@ class TestYoutubeLists(unittest.TestCase):          self.assertTrue(len(result['entries']) >= 18)      def test_youtube_user(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubeUserIE(dl)          result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]          self.assertTrue(len(result['entries']) >= 320)      def test_youtube_safe_search(self): -        dl = FakeDownloader() +        dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]          self.assertEqual(len(result['entries']), 2) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 129e23963..e8f5e4ae7 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -12,7 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  from youtube_dl.extractor import YoutubeIE  from youtube_dl.utils import * -from youtube_dl import FileDownloader +from youtube_dl import YoutubeDL  PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -25,7 +25,7 @@ proxy_handler = compat_urllib_request.ProxyHandler()  opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())  compat_urllib_request.install_opener(opener) -class FakeDownloader(FileDownloader): +class FakeYDL(YoutubeDL):      def __init__(self):          self.result = []          # Different instances of the downloader can't share the same dictionary @@ -42,27 +42,27 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()  class TestYoutubeSubtitles(unittest.TestCase):      def setUp(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['allsubtitles'] = False          DL.params['writesubtitles'] = False          DL.params['subtitlesformat'] = 'srt'          DL.params['listsubtitles'] = False      def test_youtube_no_subtitles(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['writesubtitles'] = False          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM')          subtitles = info_dict[0]['subtitles']          self.assertEqual(subtitles, None)      def test_youtube_subtitles(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['writesubtitles'] = True          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM')          sub = info_dict[0]['subtitles'][0]          self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')      def test_youtube_subtitles_it(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['writesubtitles'] = True          DL.params['subtitleslang'] = 'it'          IE = YoutubeIE(DL) @@ -70,7 +70,7 @@ class TestYoutubeSubtitles(unittest.TestCase):          sub = info_dict[0]['subtitles'][0]          self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')      def test_youtube_onlysubtitles(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['writesubtitles'] = True          DL.params['onlysubtitles'] = True          IE = YoutubeIE(DL) @@ -78,14 +78,14 @@ class TestYoutubeSubtitles(unittest.TestCase):          sub = info_dict[0]['subtitles'][0]          self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')      def test_youtube_allsubtitles(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['allsubtitles'] = True          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM')          subtitles = info_dict[0]['subtitles']          self.assertEqual(len(subtitles), 13)      def test_youtube_subtitles_format(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['writesubtitles'] = True          DL.params['subtitlesformat'] = 'sbv'          IE = YoutubeIE(DL) @@ -93,13 +93,13 @@ class TestYoutubeSubtitles(unittest.TestCase):          sub = info_dict[0]['subtitles'][0]          self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')      def test_youtube_list_subtitles(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['listsubtitles'] = True          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM')          self.assertEqual(info_dict, None)      def test_youtube_automatic_captions(self): -        DL = FakeDownloader() +        DL = FakeYDL()          DL.params['writesubtitles'] = True          DL.params['subtitleslang'] = 'it'          IE = YoutubeIE(DL) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index f21c66c29..445f3e85e 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -1,9 +1,6 @@  import math -import io  import os  import re -import shutil -import socket  import subprocess  import sys  import time @@ -13,103 +10,45 @@ if os.name == 'nt':      import ctypes  from .utils import * -from .extractor import get_info_extractor  class FileDownloader(object):      """File Downloader class.      File downloader objects are the ones responsible of downloading the -    actual video file and writing it to disk if the user has requested -    it, among some other tasks. In most cases there should be one per -    program. As, given a video URL, the downloader doesn't know how to -    extract all the needed information, task that InfoExtractors do, it -    has to pass the URL to one of them. - -    For this, file downloader objects have a method that allows -    InfoExtractors to be registered in a given order. When it is passed -    a URL, the file downloader handles it to the first InfoExtractor it -    finds that reports being able to handle it. The InfoExtractor extracts -    all the information about the video or videos the URL refers to, and -    asks the FileDownloader to process the video information, possibly -    downloading the video. +    actual video file and writing it to disk.      File downloaders accept a lot of parameters. In order not to saturate      the object constructor with arguments, it receives a dictionary of -    options instead. These options are available through the params -    attribute for the InfoExtractors to use. The FileDownloader also -    registers itself as the downloader in charge for the InfoExtractors -    that are added to it, so this is a "mutual registration". +    options instead.      Available options: -    username:          Username for authentication purposes. -    password:          Password for authentication purposes. -    usenetrc:          Use netrc for authentication instead. +    verbose:           Print additional info to stdout.      quiet:             Do not print messages to stdout. -    forceurl:          Force printing final URL. -    forcetitle:        Force printing title. -    forceid:           Force printing ID. -    forcethumbnail:    Force printing thumbnail URL. -    forcedescription:  Force printing description. -    forcefilename:     Force printing final filename. -    simulate:          Do not download the video files. -    format:            Video format code. -    format_limit:      Highest quality format to try. -    outtmpl:           Template for output names. -    restrictfilenames: Do not allow "&" and spaces in file names -    ignoreerrors:      Do not stop on download errors.      ratelimit:         Download speed limit, in bytes/sec. -    nooverwrites:      Prevent overwriting files.      retries:           Number of times to retry for HTTP error 5xx      buffersize:        Size of download buffer in bytes.      noresizebuffer:    Do not automatically resize the download buffer.      continuedl:        Try to continue downloads if possible.      noprogress:        Do not print the progress bar. -    playliststart:     Playlist item to start at. -    playlistend:       Playlist item to end at. -    matchtitle:        Download only matching titles. -    rejecttitle:       Reject downloads for matching titles.      logtostderr:       Log messages to stderr instead of stdout.      consoletitle:      Display progress in console window's titlebar.      nopart:            Do not use temporary .part files.      updatetime:        Use the Last-modified header to set output file timestamps. -    writedescription:  Write the video description to a .description file -    writeinfojson:     Write the video description to a .info.json file -    writethumbnail:    Write the thumbnail image to a file -    writesubtitles:    Write the video subtitles to a file -    allsubtitles:      Downloads all the subtitles of the video -    listsubtitles:     Lists all available subtitles for the video -    subtitlesformat:   Subtitle format [sbv/srt] (default=srt) -    subtitleslang:     Language of the subtitles to download      test:              Download only first bytes to test the downloader. -    keepvideo:         Keep the video file after post-processing      min_filesize:      Skip files smaller than this size      max_filesize:      Skip files larger than this size -    daterange:         A DateRange object, download only if the upload_date is in the range. -    skip_download:     Skip the actual download of the video file      """      params = None -    _ies = [] -    _pps = [] -    _download_retcode = None -    _num_downloads = None -    _screen_file = None -    def __init__(self, params): +    def __init__(self, ydl, params):          """Create a FileDownloader object with the given options.""" -        self._ies = [] -        self._pps = [] +        self.ydl = ydl          self._progress_hooks = [] -        self._download_retcode = 0 -        self._num_downloads = 0 -        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]          self.params = params -        if '%(stitle)s' in self.params['outtmpl']: -            self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') -      @staticmethod      def format_bytes(bytes):          if bytes is None: @@ -174,34 +113,11 @@ class FileDownloader(object):          multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())          return int(round(number * multiplier)) -    def add_info_extractor(self, ie): -        """Add an InfoExtractor object to the end of the list.""" -        self._ies.append(ie) -        ie.set_downloader(self) - -    def add_post_processor(self, pp): -        """Add a PostProcessor object to the end of the chain.""" -        self._pps.append(pp) -        pp.set_downloader(self) - -    def to_screen(self, message, skip_eol=False): -        """Print message to stdout if not in quiet mode.""" -        assert type(message) == type(u'') -        if not self.params.get('quiet', False): -            terminator = [u'\n', u''][skip_eol] -            output = message + terminator -            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr -                output = output.encode(preferredencoding(), 'ignore') -            self._screen_file.write(output) -            self._screen_file.flush() +    def to_screen(self, *args, **kargs): +        self.ydl.to_screen(*args, **kargs)      def to_stderr(self, message): -        """Print message to stderr.""" -        assert type(message) == type(u'') -        output = message + u'\n' -        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr -            output = output.encode(preferredencoding()) -        sys.stderr.write(output) +        self.ydl.to_screen(message)      def to_cons_title(self, message):          """Set console/terminal window title to message.""" @@ -214,63 +130,14 @@ class FileDownloader(object):          elif 'TERM' in os.environ:              self.to_screen('\033]0;%s\007' % message, skip_eol=True) -    def fixed_template(self): -        """Checks if the output template is fixed.""" -        return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) - -    def trouble(self, message=None, tb=None): -        """Determine action to take when a download problem appears. +    def trouble(self, *args, **kargs): +        self.ydl.trouble(*args, **kargs) -        Depending on if the downloader has been configured to ignore -        download errors or not, this method may throw an exception or -        not when errors are found, after printing the message. +    def report_warning(self, *args, **kargs): +        self.ydl.report_warning(*args, **kargs) -        tb, if given, is additional traceback information. -        """ -        if message is not None: -            self.to_stderr(message) -        if self.params.get('verbose'): -            if tb is None: -                if sys.exc_info()[0]:  # if .trouble has been called from an except block -                    tb = u'' -                    if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: -                        tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) -                    tb += compat_str(traceback.format_exc()) -                else: -                    tb_data = traceback.format_list(traceback.extract_stack()) -                    tb = u''.join(tb_data) -            self.to_stderr(tb) -        if not self.params.get('ignoreerrors', False): -            if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: -                exc_info = sys.exc_info()[1].exc_info -            else: -                exc_info = sys.exc_info() -            raise DownloadError(message, exc_info) -        self._download_retcode = 1 - -    def report_warning(self, message): -        ''' -        Print the message to stderr, it will be prefixed with 'WARNING:' -        If stderr is a tty file the 'WARNING:' will be colored -        ''' -        if sys.stderr.isatty() and os.name != 'nt': -            _msg_header=u'\033[0;33mWARNING:\033[0m' -        else: -            _msg_header=u'WARNING:' -        warning_message=u'%s %s' % (_msg_header,message) -        self.to_stderr(warning_message) - -    def report_error(self, message, tb=None): -        ''' -        Do the same as trouble, but prefixes the message with 'ERROR:', colored -        in red if stderr is a tty file. -        ''' -        if sys.stderr.isatty() and os.name != 'nt': -            _msg_header = u'\033[0;31mERROR:\033[0m' -        else: -            _msg_header = u'ERROR:' -        error_message = u'%s %s' % (_msg_header, message) -        self.trouble(error_message, tb) +    def report_error(self, *args, **kargs): +        self.ydl.error(*args, **kargs)      def slow_down(self, start_time, byte_counter):          """Sleep if the download speed is over the rate limit.""" @@ -326,18 +193,6 @@ class FileDownloader(object):              pass          return filetime -    def report_writedescription(self, descfn): -        """ Report that the description file is being written """ -        self.to_screen(u'[info] Writing video description to: ' + descfn) - -    def report_writesubtitles(self, sub_filename): -        """ Report that the subtitles file is being written """ -        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) - -    def report_writeinfojson(self, infofn): -        """ Report that the metadata file has been written """ -        self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) -      def report_destination(self, filename):          """Report destination filename."""          self.to_screen(u'[download] Destination: ' + filename) @@ -382,375 +237,6 @@ class FileDownloader(object):          else:              self.to_screen(u'') -    def increment_downloads(self): -        """Increment the ordinal that assigns a number to each file.""" -        self._num_downloads += 1 - -    def prepare_filename(self, info_dict): -        """Generate the output filename.""" -        try: -            template_dict = dict(info_dict) - -            template_dict['epoch'] = int(time.time()) -            autonumber_size = self.params.get('autonumber_size') -            if autonumber_size is None: -                autonumber_size = 5 -            autonumber_templ = u'%0' + str(autonumber_size) + u'd' -            template_dict['autonumber'] = autonumber_templ % self._num_downloads -            if template_dict['playlist_index'] is not None: -                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] - -            sanitize = lambda k,v: sanitize_filename( -                u'NA' if v is None else compat_str(v), -                restricted=self.params.get('restrictfilenames'), -                is_id=(k==u'id')) -            template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) - -            filename = self.params['outtmpl'] % template_dict -            return filename -        except KeyError as err: -            self.report_error(u'Erroneous output template') -            return None -        except ValueError as err: -            self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) -            return None - -    def _match_entry(self, info_dict): -        """ Returns None iff the file should be downloaded """ - -        title = info_dict['title'] -        matchtitle = self.params.get('matchtitle', False) -        if matchtitle: -            if not re.search(matchtitle, title, re.IGNORECASE): -                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' -        rejecttitle = self.params.get('rejecttitle', False) -        if rejecttitle: -            if re.search(rejecttitle, title, re.IGNORECASE): -                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' -        date = info_dict.get('upload_date', None) -        if date is not None: -            dateRange = self.params.get('daterange', DateRange()) -            if date not in dateRange: -                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) -        return None -         -    def extract_info(self, url, download=True, ie_key=None, extra_info={}): -        ''' -        Returns a list with a dictionary for each video we find. -        If 'download', also downloads the videos. -        extra_info is a dict containing the extra values to add to each result -         ''' -         -        if ie_key: -            ie = get_info_extractor(ie_key)() -            ie.set_downloader(self) -            ies = [ie] -        else: -            ies = self._ies - -        for ie in ies: -            if not ie.suitable(url): -                continue - -            if not ie.working(): -                self.report_warning(u'The program functionality for this site has been marked as broken, ' -                                    u'and will probably not work.') - -            try: -                ie_result = ie.extract(url) -                if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) -                    break -                if isinstance(ie_result, list): -                    # Backwards compatibility: old IE result format -                    for result in ie_result: -                        result.update(extra_info) -                    ie_result = { -                        '_type': 'compat_list', -                        'entries': ie_result, -                    } -                else: -                    ie_result.update(extra_info) -                if 'extractor' not in ie_result: -                    ie_result['extractor'] = ie.IE_NAME -                return self.process_ie_result(ie_result, download=download) -            except ExtractorError as de: # An error we somewhat expected -                self.report_error(compat_str(de), de.format_traceback()) -                break -            except Exception as e: -                if self.params.get('ignoreerrors', False): -                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) -                    break -                else: -                    raise -        else: -            self.report_error(u'no suitable InfoExtractor: %s' % url) -         -    def process_ie_result(self, ie_result, download=True, extra_info={}): -        """ -        Take the result of the ie(may be modified) and resolve all unresolved -        references (URLs, playlist items). - -        It will also download the videos if 'download'. -        Returns the resolved ie_result. -        """ - -        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system -        if result_type == 'video': -            if 'playlist' not in ie_result: -                # It isn't part of a playlist -                ie_result['playlist'] = None -                ie_result['playlist_index'] = None -            if download: -                self.process_info(ie_result) -            return ie_result -        elif result_type == 'url': -            # We have to add extra_info to the results because it may be -            # contained in a playlist -            return self.extract_info(ie_result['url'], -                                     download, -                                     ie_key=ie_result.get('ie_key'), -                                     extra_info=extra_info) -        elif result_type == 'playlist': -            # We process each entry in the playlist -            playlist = ie_result.get('title', None) or ie_result.get('id', None) -            self.to_screen(u'[download] Downloading playlist: %s'  % playlist) - -            playlist_results = [] - -            n_all_entries = len(ie_result['entries']) -            playliststart = self.params.get('playliststart', 1) - 1 -            playlistend = self.params.get('playlistend', -1) - -            if playlistend == -1: -                entries = ie_result['entries'][playliststart:] -            else: -                entries = ie_result['entries'][playliststart:playlistend] - -            n_entries = len(entries) - -            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % -                (ie_result['extractor'], playlist, n_all_entries, n_entries)) - -            for i,entry in enumerate(entries,1): -                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) -                extra = { -                         'playlist': playlist,  -                         'playlist_index': i + playliststart, -                         } -                if not 'extractor' in entry: -                    # We set the extractor, if it's an url it will be set then to -                    # the new extractor, but if it's already a video we must make -                    # sure it's present: see issue #877 -                    entry['extractor'] = ie_result['extractor'] -                entry_result = self.process_ie_result(entry, -                                                      download=download, -                                                      extra_info=extra) -                playlist_results.append(entry_result) -            ie_result['entries'] = playlist_results -            return ie_result -        elif result_type == 'compat_list': -            def _fixup(r): -                r.setdefault('extractor', ie_result['extractor']) -                return r -            ie_result['entries'] = [ -                self.process_ie_result(_fixup(r), download=download) -                for r in ie_result['entries'] -            ] -            return ie_result -        else: -            raise Exception('Invalid result type: %s' % result_type) - -    def process_info(self, info_dict): -        """Process a single resolved IE result.""" - -        assert info_dict.get('_type', 'video') == 'video' -        #We increment the download the download count here to match the previous behaviour. -        self.increment_downloads() - -        info_dict['fulltitle'] = info_dict['title'] -        if len(info_dict['title']) > 200: -            info_dict['title'] = info_dict['title'][:197] + u'...' - -        # Keep for backwards compatibility -        info_dict['stitle'] = info_dict['title'] - -        if not 'format' in info_dict: -            info_dict['format'] = info_dict['ext'] - -        reason = self._match_entry(info_dict) -        if reason is not None: -            self.to_screen(u'[download] ' + reason) -            return - -        max_downloads = self.params.get('max_downloads') -        if max_downloads is not None: -            if self._num_downloads > int(max_downloads): -                raise MaxDownloadsReached() - -        filename = self.prepare_filename(info_dict) - -        # Forced printings -        if self.params.get('forcetitle', False): -            compat_print(info_dict['title']) -        if self.params.get('forceid', False): -            compat_print(info_dict['id']) -        if self.params.get('forceurl', False): -            compat_print(info_dict['url']) -        if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: -            compat_print(info_dict['thumbnail']) -        if self.params.get('forcedescription', False) and 'description' in info_dict: -            compat_print(info_dict['description']) -        if self.params.get('forcefilename', False) and filename is not None: -            compat_print(filename) -        if self.params.get('forceformat', False): -            compat_print(info_dict['format']) - -        # Do nothing else if in simulate mode -        if self.params.get('simulate', False): -            return - -        if filename is None: -            return - -        try: -            dn = os.path.dirname(encodeFilename(filename)) -            if dn != '' and not os.path.exists(dn): -                os.makedirs(dn) -        except (OSError, IOError) as err: -            self.report_error(u'unable to create directory ' + compat_str(err)) -            return - -        if self.params.get('writedescription', False): -            try: -                descfn = filename + u'.description' -                self.report_writedescription(descfn) -                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: -                    descfile.write(info_dict['description']) -            except (OSError, IOError): -                self.report_error(u'Cannot write description file ' + descfn) -                return - -        if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: -            # subtitles download errors are already managed as troubles in relevant IE -            # that way it will silently go on when used with unsupporting IE -            subtitle = info_dict['subtitles'][0] -            (sub_error, sub_lang, sub) = subtitle -            sub_format = self.params.get('subtitlesformat') -            if sub_error: -                self.report_warning("Some error while getting the subtitles") -            else: -                try: -                    sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format -                    self.report_writesubtitles(sub_filename) -                    with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: -                        subfile.write(sub) -                except (OSError, IOError): -                    self.report_error(u'Cannot write subtitles file ' + descfn) -                    return - -        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: -            subtitles = info_dict['subtitles'] -            sub_format = self.params.get('subtitlesformat') -            for subtitle in subtitles: -                (sub_error, sub_lang, sub) = subtitle -                if sub_error: -                    self.report_warning("Some error while getting the subtitles") -                else: -                    try: -                        sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format -                        self.report_writesubtitles(sub_filename) -                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: -                                subfile.write(sub) -                    except (OSError, IOError): -                        self.report_error(u'Cannot write subtitles file ' + descfn) -                        return - -        if self.params.get('writeinfojson', False): -            infofn = filename + u'.info.json' -            self.report_writeinfojson(infofn) -            try: -                json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) -                write_json_file(json_info_dict, encodeFilename(infofn)) -            except (OSError, IOError): -                self.report_error(u'Cannot write metadata to JSON file ' + infofn) -                return - -        if self.params.get('writethumbnail', False): -            if 'thumbnail' in info_dict: -                thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2] -                if not thumb_format: -                    thumb_format = 'jpg' -                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format -                self.to_screen(u'[%s] %s: Downloading thumbnail ...' % -                               (info_dict['extractor'], info_dict['id'])) -                uf = compat_urllib_request.urlopen(info_dict['thumbnail']) -                with open(thumb_filename, 'wb') as thumbf: -                    shutil.copyfileobj(uf, thumbf) -                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % -                               (info_dict['extractor'], info_dict['id'], thumb_filename)) - -        if not self.params.get('skip_download', False): -            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): -                success = True -            else: -                try: -                    success = self._do_download(filename, info_dict) -                except (OSError, IOError) as err: -                    raise UnavailableVideoError() -                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -                    self.report_error(u'unable to download video data: %s' % str(err)) -                    return -                except (ContentTooShortError, ) as err: -                    self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) -                    return - -            if success: -                try: -                    self.post_process(filename, info_dict) -                except (PostProcessingError) as err: -                    self.report_error(u'postprocessing: %s' % str(err)) -                    return - -    def download(self, url_list): -        """Download a given list of URLs.""" -        if len(url_list) > 1 and self.fixed_template(): -            raise SameFileError(self.params['outtmpl']) - -        for url in url_list: -            try: -                #It also downloads the videos -                videos = self.extract_info(url) -            except UnavailableVideoError: -                self.report_error(u'unable to download video') -            except MaxDownloadsReached: -                self.to_screen(u'[info] Maximum number of downloaded files reached.') -                raise - -        return self._download_retcode - -    def post_process(self, filename, ie_info): -        """Run all the postprocessors on the given file.""" -        info = dict(ie_info) -        info['filepath'] = filename -        keep_video = None -        for pp in self._pps: -            try: -                keep_video_wish,new_info = pp.run(info) -                if keep_video_wish is not None: -                    if keep_video_wish: -                        keep_video = keep_video_wish -                    elif keep_video is None: -                        # No clear decision yet, let IE decide -                        keep_video = keep_video_wish -            except PostProcessingError as e: -                self.to_stderr(u'ERROR: ' + e.msg) -        if keep_video is False and not self.params.get('keepvideo', False): -            try: -                self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) -                os.remove(encodeFilename(filename)) -            except (IOError, OSError): -                self.report_warning(u'Unable to remove downloaded video file') -      def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):          self.report_destination(filename)          tmpfilename = self.temp_name(filename) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py new file mode 100644 index 000000000..52ee8cedb --- /dev/null +++ b/youtube_dl/YoutubeDL.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import io +import os +import re +import shutil +import socket +import sys +import time +import traceback + +from .utils import * +from .extractor import get_info_extractor +from .FileDownloader import FileDownloader + + +class YoutubeDL(object): +    """YoutubeDL class. + +    YoutubeDL objects are the ones responsible of downloading the +    actual video file and writing it to disk if the user has requested +    it, among some other tasks. In most cases there should be one per +    program. As, given a video URL, the downloader doesn't know how to +    extract all the needed information, task that InfoExtractors do, it +    has to pass the URL to one of them. + +    For this, YoutubeDL objects have a method that allows +    InfoExtractors to be registered in a given order. When it is passed +    a URL, the YoutubeDL object handles it to the first InfoExtractor it +    finds that reports being able to handle it. The InfoExtractor extracts +    all the information about the video or videos the URL refers to, and +    YoutubeDL process the extracted information, possibly using a File +    Downloader to download the video. + +    YoutubeDL objects accept a lot of parameters. In order not to saturate +    the object constructor with arguments, it receives a dictionary of +    options instead. These options are available through the params +    attribute for the InfoExtractors to use. The YoutubeDL also +    registers itself as the downloader in charge for the InfoExtractors +    that are added to it, so this is a "mutual registration". + +    Available options: + +    username:          Username for authentication purposes. +    password:          Password for authentication purposes. +    usenetrc:          Use netrc for authentication instead. +    verbose:           Print additional info to stdout. +    quiet:             Do not print messages to stdout. +    forceurl:          Force printing final URL. +    forcetitle:        Force printing title. +    forceid:           Force printing ID. +    forcethumbnail:    Force printing thumbnail URL. +    forcedescription:  Force printing description. +    forcefilename:     Force printing final filename. +    simulate:          Do not download the video files. +    format:            Video format code. +    format_limit:      Highest quality format to try. +    outtmpl:           Template for output names. +    restrictfilenames: Do not allow "&" and spaces in file names +    ignoreerrors:      Do not stop on download errors. +    nooverwrites:      Prevent overwriting files. +    playliststart:     Playlist item to start at. +    playlistend:       Playlist item to end at. +    matchtitle:        Download only matching titles. +    rejecttitle:       Reject downloads for matching titles. +    logtostderr:       Log messages to stderr instead of stdout. +    writedescription:  Write the video description to a .description file +    writeinfojson:     Write the video description to a .info.json file +    writethumbnail:    Write the thumbnail image to a file +    writesubtitles:    Write the video subtitles to a file +    allsubtitles:      Downloads all the subtitles of the video +    listsubtitles:     Lists all available subtitles for the video +    subtitlesformat:   Subtitle format [sbv/srt] (default=srt) +    subtitleslang:     Language of the subtitles to download +    keepvideo:         Keep the video file after post-processing +    daterange:         A DateRange object, download only if the upload_date is in the range. +    skip_download:     Skip the actual download of the video file +     +    The following parameters are not used by YoutubeDL itself, they are used by +    the FileDownloader: +    nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, +    noresizebuffer, retries, continuedl, noprogress, consoletitle +    """ + +    params = None +    _ies = [] +    _pps = [] +    _download_retcode = None +    _num_downloads = None +    _screen_file = None + +    def __init__(self, params): +        """Create a FileDownloader object with the given options.""" +        self._ies = [] +        self._pps = [] +        self._progress_hooks = [] +        self._download_retcode = 0 +        self._num_downloads = 0 +        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] +        self.params = params +        self.fd = FileDownloader(self, self.params) + +        if '%(stitle)s' in self.params['outtmpl']: +            self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + +    def add_info_extractor(self, ie): +        """Add an InfoExtractor object to the end of the list.""" +        self._ies.append(ie) +        ie.set_downloader(self) + +    def add_post_processor(self, pp): +        """Add a PostProcessor object to the end of the chain.""" +        self._pps.append(pp) +        pp.set_downloader(self) + +    def to_screen(self, message, skip_eol=False): +        """Print message to stdout if not in quiet mode.""" +        assert type(message) == type(u'') +        if not self.params.get('quiet', False): +            terminator = [u'\n', u''][skip_eol] +            output = message + terminator +            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr +                output = output.encode(preferredencoding(), 'ignore') +            self._screen_file.write(output) +            self._screen_file.flush() + +    def to_stderr(self, message): +        """Print message to stderr.""" +        assert type(message) == type(u'') +        output = message + u'\n' +        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr +            output = output.encode(preferredencoding()) +        sys.stderr.write(output) + +    def fixed_template(self): +        """Checks if the output template is fixed.""" +        return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) + +    def trouble(self, message=None, tb=None): +        """Determine action to take when a download problem appears. + +        Depending on if the downloader has been configured to ignore +        download errors or not, this method may throw an exception or +        not when errors are found, after printing the message. + +        tb, if given, is additional traceback information. +        """ +        if message is not None: +            self.to_stderr(message) +        if self.params.get('verbose'): +            if tb is None: +                if sys.exc_info()[0]:  # if .trouble has been called from an except block +                    tb = u'' +                    if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: +                        tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) +                    tb += compat_str(traceback.format_exc()) +                else: +                    tb_data = traceback.format_list(traceback.extract_stack()) +                    tb = u''.join(tb_data) +            self.to_stderr(tb) +        if not self.params.get('ignoreerrors', False): +            if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: +                exc_info = sys.exc_info()[1].exc_info +            else: +                exc_info = sys.exc_info() +            raise DownloadError(message, exc_info) +        self._download_retcode = 1 + +    def report_warning(self, message): +        ''' +        Print the message to stderr, it will be prefixed with 'WARNING:' +        If stderr is a tty file the 'WARNING:' will be colored +        ''' +        if sys.stderr.isatty() and os.name != 'nt': +            _msg_header=u'\033[0;33mWARNING:\033[0m' +        else: +            _msg_header=u'WARNING:' +        warning_message=u'%s %s' % (_msg_header,message) +        self.to_stderr(warning_message) + +    def report_error(self, message, tb=None): +        ''' +        Do the same as trouble, but prefixes the message with 'ERROR:', colored +        in red if stderr is a tty file. +        ''' +        if sys.stderr.isatty() and os.name != 'nt': +            _msg_header = u'\033[0;31mERROR:\033[0m' +        else: +            _msg_header = u'ERROR:' +        error_message = u'%s %s' % (_msg_header, message) +        self.trouble(error_message, tb) + +    def slow_down(self, start_time, byte_counter): +        """Sleep if the download speed is over the rate limit.""" +        rate_limit = self.params.get('ratelimit', None) +        if rate_limit is None or byte_counter == 0: +            return +        now = time.time() +        elapsed = now - start_time +        if elapsed <= 0.0: +            return +        speed = float(byte_counter) / elapsed +        if speed > rate_limit: +            time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) + +    def report_writedescription(self, descfn): +        """ Report that the description file is being written """ +        self.to_screen(u'[info] Writing video description to: ' + descfn) + +    def report_writesubtitles(self, sub_filename): +        """ Report that the subtitles file is being written """ +        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) + +    def report_writeinfojson(self, infofn): +        """ Report that the metadata file has been written """ +        self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) + +    def report_file_already_downloaded(self, file_name): +        """Report file has already been fully downloaded.""" +        try: +            self.to_screen(u'[download] %s has already been downloaded' % file_name) +        except (UnicodeEncodeError) as err: +            self.to_screen(u'[download] The file has already been downloaded') + +    def increment_downloads(self): +        """Increment the ordinal that assigns a number to each file.""" +        self._num_downloads += 1 + +    def prepare_filename(self, info_dict): +        """Generate the output filename.""" +        try: +            template_dict = dict(info_dict) + +            template_dict['epoch'] = int(time.time()) +            autonumber_size = self.params.get('autonumber_size') +            if autonumber_size is None: +                autonumber_size = 5 +            autonumber_templ = u'%0' + str(autonumber_size) + u'd' +            template_dict['autonumber'] = autonumber_templ % self._num_downloads +            if template_dict['playlist_index'] is not None: +                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] + +            sanitize = lambda k,v: sanitize_filename( +                u'NA' if v is None else compat_str(v), +                restricted=self.params.get('restrictfilenames'), +                is_id=(k==u'id')) +            template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) + +            filename = self.params['outtmpl'] % template_dict +            return filename +        except KeyError as err: +            self.report_error(u'Erroneous output template') +            return None +        except ValueError as err: +            self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) +            return None + +    def _match_entry(self, info_dict): +        """ Returns None iff the file should be downloaded """ + +        title = info_dict['title'] +        matchtitle = self.params.get('matchtitle', False) +        if matchtitle: +            if not re.search(matchtitle, title, re.IGNORECASE): +                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' +        rejecttitle = self.params.get('rejecttitle', False) +        if rejecttitle: +            if re.search(rejecttitle, title, re.IGNORECASE): +                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' +        date = info_dict.get('upload_date', None) +        if date is not None: +            dateRange = self.params.get('daterange', DateRange()) +            if date not in dateRange: +                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) +        return None +         +    def extract_info(self, url, download=True, ie_key=None, extra_info={}): +        ''' +        Returns a list with a dictionary for each video we find. +        If 'download', also downloads the videos. +        extra_info is a dict containing the extra values to add to each result +         ''' +         +        if ie_key: +            ie = get_info_extractor(ie_key)() +            ie.set_downloader(self) +            ies = [ie] +        else: +            ies = self._ies + +        for ie in ies: +            if not ie.suitable(url): +                continue + +            if not ie.working(): +                self.report_warning(u'The program functionality for this site has been marked as broken, ' +                                    u'and will probably not work.') + +            try: +                ie_result = ie.extract(url) +                if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) +                    break +                if isinstance(ie_result, list): +                    # Backwards compatibility: old IE result format +                    for result in ie_result: +                        result.update(extra_info) +                    ie_result = { +                        '_type': 'compat_list', +                        'entries': ie_result, +                    } +                else: +                    ie_result.update(extra_info) +                if 'extractor' not in ie_result: +                    ie_result['extractor'] = ie.IE_NAME +                return self.process_ie_result(ie_result, download=download) +            except ExtractorError as de: # An error we somewhat expected +                self.report_error(compat_str(de), de.format_traceback()) +                break +            except Exception as e: +                if self.params.get('ignoreerrors', False): +                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) +                    break +                else: +                    raise +        else: +            self.report_error(u'no suitable InfoExtractor: %s' % url) +         +    def process_ie_result(self, ie_result, download=True, extra_info={}): +        """ +        Take the result of the ie(may be modified) and resolve all unresolved +        references (URLs, playlist items). + +        It will also download the videos if 'download'. +        Returns the resolved ie_result. +        """ + +        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system +        if result_type == 'video': +            if 'playlist' not in ie_result: +                # It isn't part of a playlist +                ie_result['playlist'] = None +                ie_result['playlist_index'] = None +            if download: +                self.process_info(ie_result) +            return ie_result +        elif result_type == 'url': +            # We have to add extra_info to the results because it may be +            # contained in a playlist +            return self.extract_info(ie_result['url'], +                                     download, +                                     ie_key=ie_result.get('ie_key'), +                                     extra_info=extra_info) +        elif result_type == 'playlist': +            # We process each entry in the playlist +            playlist = ie_result.get('title', None) or ie_result.get('id', None) +            self.to_screen(u'[download] Downloading playlist: %s'  % playlist) + +            playlist_results = [] + +            n_all_entries = len(ie_result['entries']) +            playliststart = self.params.get('playliststart', 1) - 1 +            playlistend = self.params.get('playlistend', -1) + +            if playlistend == -1: +                entries = ie_result['entries'][playliststart:] +            else: +                entries = ie_result['entries'][playliststart:playlistend] + +            n_entries = len(entries) + +            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % +                (ie_result['extractor'], playlist, n_all_entries, n_entries)) + +            for i,entry in enumerate(entries,1): +                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) +                extra = { +                         'playlist': playlist,  +                         'playlist_index': i + playliststart, +                         } +                if not 'extractor' in entry: +                    # We set the extractor, if it's an url it will be set then to +                    # the new extractor, but if it's already a video we must make +                    # sure it's present: see issue #877 +                    entry['extractor'] = ie_result['extractor'] +                entry_result = self.process_ie_result(entry, +                                                      download=download, +                                                      extra_info=extra) +                playlist_results.append(entry_result) +            ie_result['entries'] = playlist_results +            return ie_result +        elif result_type == 'compat_list': +            def _fixup(r): +                r.setdefault('extractor', ie_result['extractor']) +                return r +            ie_result['entries'] = [ +                self.process_ie_result(_fixup(r), download=download) +                for r in ie_result['entries'] +            ] +            return ie_result +        else: +            raise Exception('Invalid result type: %s' % result_type) + +    def process_info(self, info_dict): +        """Process a single resolved IE result.""" + +        assert info_dict.get('_type', 'video') == 'video' +        #We increment the download the download count here to match the previous behaviour. +        self.increment_downloads() + +        info_dict['fulltitle'] = info_dict['title'] +        if len(info_dict['title']) > 200: +            info_dict['title'] = info_dict['title'][:197] + u'...' + +        # Keep for backwards compatibility +        info_dict['stitle'] = info_dict['title'] + +        if not 'format' in info_dict: +            info_dict['format'] = info_dict['ext'] + +        reason = self._match_entry(info_dict) +        if reason is not None: +            self.to_screen(u'[download] ' + reason) +            return + +        max_downloads = self.params.get('max_downloads') +        if max_downloads is not None: +            if self._num_downloads > int(max_downloads): +                raise MaxDownloadsReached() + +        filename = self.prepare_filename(info_dict) + +        # Forced printings +        if self.params.get('forcetitle', False): +            compat_print(info_dict['title']) +        if self.params.get('forceid', False): +            compat_print(info_dict['id']) +        if self.params.get('forceurl', False): +            compat_print(info_dict['url']) +        if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: +            compat_print(info_dict['thumbnail']) +        if self.params.get('forcedescription', False) and 'description' in info_dict: +            compat_print(info_dict['description']) +        if self.params.get('forcefilename', False) and filename is not None: +            compat_print(filename) +        if self.params.get('forceformat', False): +            compat_print(info_dict['format']) + +        # Do nothing else if in simulate mode +        if self.params.get('simulate', False): +            return + +        if filename is None: +            return + +        try: +            dn = os.path.dirname(encodeFilename(filename)) +            if dn != '' and not os.path.exists(dn): +                os.makedirs(dn) +        except (OSError, IOError) as err: +            self.report_error(u'unable to create directory ' + compat_str(err)) +            return + +        if self.params.get('writedescription', False): +            try: +                descfn = filename + u'.description' +                self.report_writedescription(descfn) +                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: +                    descfile.write(info_dict['description']) +            except (OSError, IOError): +                self.report_error(u'Cannot write description file ' + descfn) +                return + +        if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: +            # subtitles download errors are already managed as troubles in relevant IE +            # that way it will silently go on when used with unsupporting IE +            subtitle = info_dict['subtitles'][0] +            (sub_error, sub_lang, sub) = subtitle +            sub_format = self.params.get('subtitlesformat') +            if sub_error: +                self.report_warning("Some error while getting the subtitles") +            else: +                try: +                    sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format +                    self.report_writesubtitles(sub_filename) +                    with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: +                        subfile.write(sub) +                except (OSError, IOError): +                    self.report_error(u'Cannot write subtitles file ' + descfn) +                    return + +        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: +            subtitles = info_dict['subtitles'] +            sub_format = self.params.get('subtitlesformat') +            for subtitle in subtitles: +                (sub_error, sub_lang, sub) = subtitle +                if sub_error: +                    self.report_warning("Some error while getting the subtitles") +                else: +                    try: +                        sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format +                        self.report_writesubtitles(sub_filename) +                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: +                                subfile.write(sub) +                    except (OSError, IOError): +                        self.report_error(u'Cannot write subtitles file ' + descfn) +                        return + +        if self.params.get('writeinfojson', False): +            infofn = filename + u'.info.json' +            self.report_writeinfojson(infofn) +            try: +                json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) +                write_json_file(json_info_dict, encodeFilename(infofn)) +            except (OSError, IOError): +                self.report_error(u'Cannot write metadata to JSON file ' + infofn) +                return + +        if self.params.get('writethumbnail', False): +            if 'thumbnail' in info_dict: +                thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2] +                if not thumb_format: +                    thumb_format = 'jpg' +                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format +                self.to_screen(u'[%s] %s: Downloading thumbnail ...' % +                               (info_dict['extractor'], info_dict['id'])) +                uf = compat_urllib_request.urlopen(info_dict['thumbnail']) +                with open(thumb_filename, 'wb') as thumbf: +                    shutil.copyfileobj(uf, thumbf) +                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % +                               (info_dict['extractor'], info_dict['id'], thumb_filename)) + +        if not self.params.get('skip_download', False): +            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): +                success = True +            else: +                try: +                    success = self.fd._do_download(filename, info_dict) +                except (OSError, IOError) as err: +                    raise UnavailableVideoError() +                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +                    self.report_error(u'unable to download video data: %s' % str(err)) +                    return +                except (ContentTooShortError, ) as err: +                    self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) +                    return + +            if success: +                try: +                    self.post_process(filename, info_dict) +                except (PostProcessingError) as err: +                    self.report_error(u'postprocessing: %s' % str(err)) +                    return + +    def download(self, url_list): +        """Download a given list of URLs.""" +        if len(url_list) > 1 and self.fixed_template(): +            raise SameFileError(self.params['outtmpl']) + +        for url in url_list: +            try: +                #It also downloads the videos +                videos = self.extract_info(url) +            except UnavailableVideoError: +                self.report_error(u'unable to download video') +            except MaxDownloadsReached: +                self.to_screen(u'[info] Maximum number of downloaded files reached.') +                raise + +        return self._download_retcode + +    def post_process(self, filename, ie_info): +        """Run all the postprocessors on the given file.""" +        info = dict(ie_info) +        info['filepath'] = filename +        keep_video = None +        for pp in self._pps: +            try: +                keep_video_wish,new_info = pp.run(info) +                if keep_video_wish is not None: +                    if keep_video_wish: +                        keep_video = keep_video_wish +                    elif keep_video is None: +                        # No clear decision yet, let IE decide +                        keep_video = keep_video_wish +            except PostProcessingError as e: +                self.to_stderr(u'ERROR: ' + e.msg) +        if keep_video is False and not self.params.get('keepvideo', False): +            try: +                self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) +                os.remove(encodeFilename(filename)) +            except (IOError, OSError): +                self.report_warning(u'Unable to remove downloaded video file') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a8b62a6cd..180217b94 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -46,6 +46,7 @@ from .update import update_self  from .version import __version__  from .FileDownloader import *  from .extractor import gen_extractors +from .YoutubeDL import YoutubeDL  from .PostProcessor import *  def parseOpts(overrideArguments=None): @@ -492,8 +493,8 @@ def _real_main(argv=None):              or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')              or u'%(title)s-%(id)s.%(ext)s') -    # File downloader -    fd = FileDownloader({ +    # YoutubeDL +    ydl = YoutubeDL({          'usenetrc': opts.usenetrc,          'username': opts.username,          'password': opts.password, @@ -550,31 +551,31 @@ def _real_main(argv=None):          })      if opts.verbose: -        fd.to_screen(u'[debug] youtube-dl version ' + __version__) +        ydl.to_screen(u'[debug] youtube-dl version ' + __version__)          try:              sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE,                                    cwd=os.path.dirname(os.path.abspath(__file__)))              out, err = sp.communicate()              out = out.decode().strip()              if re.match('[0-9a-f]+', out): -                fd.to_screen(u'[debug] Git HEAD: ' + out) +                ydl.to_screen(u'[debug] Git HEAD: ' + out)          except:              pass -        fd.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) -        fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) +        ydl.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform())) +        ydl.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))      for extractor in extractors: -        fd.add_info_extractor(extractor) +        ydl.add_info_extractor(extractor)      # PostProcessors      if opts.extractaudio: -        fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) +        ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))      if opts.recodevideo: -        fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) +        ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))      # Update version      if opts.update_self: -        update_self(fd.to_screen, opts.verbose, sys.argv[0]) +        update_self(ydl.to_screen, opts.verbose, sys.argv[0])      # Maybe do nothing      if len(all_urls) < 1: @@ -584,9 +585,9 @@ def _real_main(argv=None):              sys.exit()      try: -        retcode = fd.download(all_urls) +        retcode = ydl.download(all_urls)      except MaxDownloadsReached: -        fd.to_screen(u'--max-download limit reached, aborting.') +        ydl.to_screen(u'--max-download limit reached, aborting.')          retcode = 101      # Dump cookie jar if requested | 
