diff options
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 176 | 
1 files changed, 128 insertions, 48 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dec0e20e7..578c8daf2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -7,6 +7,7 @@ import collections  import datetime  import errno  import io +import itertools  import json  import locale  import os @@ -22,12 +23,15 @@ import traceback  if os.name == 'nt':      import ctypes -from .utils import ( +from .compat import (      compat_cookiejar, +    compat_expanduser,      compat_http_client,      compat_str,      compat_urllib_error,      compat_urllib_request, +) +from .utils import (      escape_url,      ContentTooShortError,      date_from_str, @@ -57,11 +61,13 @@ from .utils import (      write_string,      YoutubeDLHandler,      prepend_extension, +    args_to_str,  )  from .cache import Cache  from .extractor import get_info_extractor, gen_extractors  from .downloader import get_suitable_downloader -from .postprocessor import FFmpegMergerPP +from .downloader.rtmp import rtmpdump_version +from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor  from .version import __version__ @@ -107,6 +113,8 @@ class YoutubeDL(object):      forcefilename:     Force printing final filename.      forceduration:     Force printing duration.      forcejson:         Force printing info_dict as JSON. +    dump_single_json:  Force printing the info_dict of the whole playlist +                       (or video) as a single JSON line.      simulate:          Do not download the video files.      format:            Video format code.      format_limit:      Highest quality format to try. @@ -116,6 +124,7 @@ class YoutubeDL(object):      nooverwrites:      Prevent overwriting files.      playliststart:     Playlist item to start at.      playlistend:       Playlist item to end at. +    playlistreverse:   Download playlist items in reverse order.      matchtitle:        Download only matching titles.      rejecttitle:       Reject downloads for matching titles.      logger:            Log messages to a logging.Logger instance. @@ -165,6 +174,8 @@ class YoutubeDL(object):                         'auto' for elaborate guessing      encoding:          Use this encoding instead of the system-specified.      extract_flat:      Do not resolve URLs, return the immediate result. +                       Pass in 'in_playlist' to only show this behavior for +                       playlist items.      The following parameters are not used by YoutubeDL itself, they are used by      the FileDownloader: @@ -184,7 +195,7 @@ class YoutubeDL(object):      _num_downloads = None      _screen_file = None -    def __init__(self, params=None): +    def __init__(self, params=None, auto_init=True):          """Create a FileDownloader object with the given options."""          if params is None:              params = {} @@ -241,6 +252,26 @@ class YoutubeDL(object):          self._setup_opener() +        if auto_init: +            self.print_debug_header() +            self.add_default_info_extractors() + +    def warn_if_short_id(self, argv): +        # short YouTube ID starting with dash? +        idxs = [ +            i for i, a in enumerate(argv) +            if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] +        if idxs: +            correct_argv = ( +                ['youtube-dl'] + +                [a for i, a in enumerate(argv) if i not in idxs] + +                ['--'] + [argv[i] for i in idxs] +            ) +            self.report_warning( +                'Long argument string detected. ' +                'Use -- to separate parameters and URLs, like this:\n%s\n' % +                args_to_str(correct_argv)) +      def add_info_extractor(self, ie):          """Add an InfoExtractor object to the end of the list."""          self._ies.append(ie) @@ -285,7 +316,7 @@ class YoutubeDL(object):          self._output_process.stdin.write((message + '\n').encode('utf-8'))          self._output_process.stdin.flush()          res = ''.join(self._output_channel.readline().decode('utf-8') -                       for _ in range(line_count)) +                      for _ in range(line_count))          return res[:-len('\n')]      def to_screen(self, message, skip_eol=False): @@ -447,7 +478,7 @@ class YoutubeDL(object):              template_dict = collections.defaultdict(lambda: 'NA', template_dict)              outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) -            tmpl = os.path.expanduser(outtmpl) +            tmpl = compat_expanduser(outtmpl)              filename = tmpl % template_dict              return filename          except ValueError as err: @@ -522,7 +553,7 @@ class YoutubeDL(object):              try:                  ie_result = ie.extract(url) -                if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) +                if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)                      break                  if isinstance(ie_result, list):                      # Backwards compatibility: old IE result format @@ -535,7 +566,7 @@ class YoutubeDL(object):                      return self.process_ie_result(ie_result, download, extra_info)                  else:                      return ie_result -            except ExtractorError as de: # An error we somewhat expected +            except ExtractorError as de:  # An error we somewhat expected                  self.report_error(compat_str(de), de.format_traceback())                  break              except MaxDownloadsReached: @@ -568,8 +599,12 @@ class YoutubeDL(object):          result_type = ie_result.get('_type', 'video') -        if self.params.get('extract_flat', False): -            if result_type in ('url', 'url_transparent'): +        if result_type in ('url', 'url_transparent'): +            extract_flat = self.params.get('extract_flat', False) +            if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or +                    extract_flat is True): +                if self.params.get('forcejson', False): +                    self.to_stdout(json.dumps(ie_result))                  return ie_result          if result_type == 'video': @@ -588,27 +623,19 @@ class YoutubeDL(object):                  ie_result['url'], ie_key=ie_result.get('ie_key'),                  extra_info=extra_info, download=False, process=False) -            def make_result(embedded_info): -                new_result = ie_result.copy() -                for f in ('_type', 'url', 'ext', 'player_url', 'formats', -                          'entries', 'ie_key', 'duration', -                          'subtitles', 'annotations', 'format', -                          'thumbnail', 'thumbnails'): -                    if f in new_result: -                        del new_result[f] -                    if f in embedded_info: -                        new_result[f] = embedded_info[f] -                return new_result -            new_result = make_result(info) +            force_properties = dict( +                (k, v) for k, v in ie_result.items() if v is not None) +            for f in ('_type', 'url'): +                if f in force_properties: +                    del force_properties[f] +            new_result = info.copy() +            new_result.update(force_properties)              assert new_result.get('_type') != 'url_transparent' -            if new_result.get('_type') == 'compat_list': -                new_result['entries'] = [ -                    make_result(e) for e in new_result['entries']]              return self.process_ie_result(                  new_result, download=download, extra_info=extra_info) -        elif result_type == 'playlist': +        elif result_type == 'playlist' or result_type == 'multi_video':              # We process each entry in the playlist              playlist = ie_result.get('title', None) or ie_result.get('id', None)              self.to_screen('[download] Downloading playlist: %s' % playlist) @@ -621,27 +648,39 @@ class YoutubeDL(object):              if playlistend == -1:                  playlistend = None -            if isinstance(ie_result['entries'], list): -                n_all_entries = len(ie_result['entries']) -                entries = ie_result['entries'][playliststart:playlistend] +            ie_entries = ie_result['entries'] +            if isinstance(ie_entries, list): +                n_all_entries = len(ie_entries) +                entries = ie_entries[playliststart:playlistend]                  n_entries = len(entries)                  self.to_screen(                      "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %                      (ie_result['extractor'], playlist, n_all_entries, n_entries)) -            else: -                assert isinstance(ie_result['entries'], PagedList) -                entries = ie_result['entries'].getslice( +            elif isinstance(ie_entries, PagedList): +                entries = ie_entries.getslice(                      playliststart, playlistend)                  n_entries = len(entries)                  self.to_screen(                      "[%s] playlist %s: Downloading %d videos" %                      (ie_result['extractor'], playlist, n_entries)) +            else:  # iterable +                entries = list(itertools.islice( +                    ie_entries, playliststart, playlistend)) +                n_entries = len(entries) +                self.to_screen( +                    "[%s] playlist %s: Downloading %d videos" % +                    (ie_result['extractor'], playlist, n_entries)) + +            if self.params.get('playlistreverse', False): +                entries = entries[::-1]              for i, entry in enumerate(entries, 1):                  self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))                  extra = {                      'n_entries': n_entries,                      'playlist': playlist, +                    'playlist_id': ie_result.get('id'), +                    'playlist_title': ie_result.get('title'),                      'playlist_index': i + playliststart,                      'extractor': ie_result['extractor'],                      'webpage_url': ie_result['webpage_url'], @@ -661,14 +700,20 @@ class YoutubeDL(object):              ie_result['entries'] = playlist_results              return ie_result          elif result_type == 'compat_list': +            self.report_warning( +                'Extractor %s returned a compat_list result. ' +                'It needs to be updated.' % ie_result.get('extractor')) +              def _fixup(r): -                self.add_extra_info(r, +                self.add_extra_info( +                    r,                      {                          'extractor': ie_result['extractor'],                          'webpage_url': ie_result['webpage_url'],                          'webpage_url_basename': url_basename(ie_result['webpage_url']),                          'extractor_key': ie_result['extractor_key'], -                    }) +                    } +                )                  return r              ie_result['entries'] = [                  self.process_ie_result(_fixup(r), download, extra_info) @@ -746,6 +791,10 @@ class YoutubeDL(object):              info_dict['display_id'] = info_dict['id']          if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: +            # Working around negative timestamps in Windows +            # (see http://bugs.python.org/issue1646728) +            if info_dict['timestamp'] < 0 and os.name == 'nt': +                info_dict['timestamp'] = 0              upload_date = datetime.datetime.utcfromtimestamp(                  info_dict['timestamp'])              info_dict['upload_date'] = upload_date.strftime('%Y%m%d') @@ -818,8 +867,15 @@ class YoutubeDL(object):                          # Two formats have been requested like '137+139'                          format_1, format_2 = rf.split('+')                          formats_info = (self.select_format(format_1, formats), -                            self.select_format(format_2, formats)) +                                        self.select_format(format_2, formats))                          if all(formats_info): +                            # The first format must contain the video and the +                            # second the audio +                            if formats_info[0].get('vcodec') == 'none': +                                self.report_error('The first format must ' +                                                  'contain the video, try using ' +                                                  '"-f %s+%s"' % (format_2, format_1)) +                                return                              selected_format = {                                  'requested_formats': formats_info,                                  'format': rf, @@ -882,8 +938,12 @@ class YoutubeDL(object):          if self.params.get('forceid', False):              self.to_stdout(info_dict['id'])          if self.params.get('forceurl', False): -            # For RTMP URLs, also include the playpath -            self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) +            if info_dict.get('requested_formats') is not None: +                for f in info_dict['requested_formats']: +                    self.to_stdout(f['url'] + f.get('play_path', '')) +            else: +                # For RTMP URLs, also include the playpath +                self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))          if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:              self.to_stdout(info_dict['thumbnail'])          if self.params.get('forcedescription', False) and info_dict.get('description') is not None: @@ -897,6 +957,8 @@ class YoutubeDL(object):          if self.params.get('forcejson', False):              info_dict['_filename'] = filename              self.to_stdout(json.dumps(info_dict)) +        if self.params.get('dump_single_json', False): +            info_dict['_filename'] = filename          # Do nothing else if in simulate mode          if self.params.get('simulate', False): @@ -962,7 +1024,7 @@ class YoutubeDL(object):                      else:                          self.to_screen('[info] Writing video subtitles to: ' + sub_filename)                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: -                                subfile.write(sub) +                            subfile.write(sub)                  except (OSError, IOError):                      self.report_error('Cannot write subtitles file ' + sub_filename)                      return @@ -974,7 +1036,7 @@ class YoutubeDL(object):              else:                  self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)                  try: -                    write_json_file(info_dict, encodeFilename(infofn)) +                    write_json_file(info_dict, infofn)                  except (OSError, IOError):                      self.report_error('Cannot write metadata to JSON file ' + infofn)                      return @@ -994,10 +1056,10 @@ class YoutubeDL(object):                          with open(thumb_filename, 'wb') as thumbf:                              shutil.copyfileobj(uf, thumbf)                          self.to_screen('[%s] %s: Writing thumbnail to: %s' % -                            (info_dict['extractor'], info_dict['id'], thumb_filename)) +                                       (info_dict['extractor'], info_dict['id'], thumb_filename))                      except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:                          self.report_warning('Unable to download thumbnail "%s": %s' % -                            (info_dict['thumbnail'], compat_str(err))) +                                            (info_dict['thumbnail'], compat_str(err)))          if not self.params.get('skip_download', False):              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): @@ -1015,11 +1077,11 @@ class YoutubeDL(object):                          downloaded = []                          success = True                          merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) -                        if not merger._get_executable(): +                        if not merger._executable:                              postprocessors = []                              self.report_warning('You have requested multiple ' -                                'formats but ffmpeg or avconv are not installed.' -                                ' The formats won\'t be merged') +                                                'formats but ffmpeg or avconv are not installed.' +                                                ' The formats won\'t be merged')                          else:                              postprocessors = [merger]                          for f in info_dict['requested_formats']: @@ -1063,13 +1125,16 @@ class YoutubeDL(object):          for url in url_list:              try: -                #It also downloads the videos -                self.extract_info(url) +                # It also downloads the videos +                res = self.extract_info(url)              except UnavailableVideoError:                  self.report_error('unable to download video')              except MaxDownloadsReached:                  self.to_screen('[info] Maximum number of downloaded files reached.')                  raise +            else: +                if self.params.get('dump_single_json', False): +                    self.to_stdout(json.dumps(res))          return self._download_retcode @@ -1193,6 +1258,8 @@ class YoutubeDL(object):              res += 'video@'          if fdict.get('vbr') is not None:              res += '%4dk' % fdict['vbr'] +        if fdict.get('fps') is not None: +            res += ', %sfps' % fdict['fps']          if fdict.get('acodec') is not None:              if res:                  res += ', ' @@ -1274,11 +1341,13 @@ class YoutubeDL(object):              self.report_warning(                  'Your Python is broken! Update to a newer and supported version') +        stdout_encoding = getattr( +            sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)          encoding_str = (              '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (                  locale.getpreferredencoding(),                  sys.getfilesystemencoding(), -                sys.stdout.encoding, +                stdout_encoding,                  self.get_encoding()))          write_string(encoding_str, encoding=None) @@ -1297,8 +1366,19 @@ class YoutubeDL(object):                  sys.exc_clear()              except:                  pass -        self._write_string('[debug] Python version %s - %s' % -                     (platform.python_version(), platform_name()) + '\n') +        self._write_string('[debug] Python version %s - %s\n' % ( +            platform.python_version(), platform_name())) + +        exe_versions = FFmpegPostProcessor.get_versions() +        exe_versions['rtmpdump'] = rtmpdump_version() +        exe_str = ', '.join( +            '%s %s' % (exe, v) +            for exe, v in sorted(exe_versions.items()) +            if v +        ) +        if not exe_str: +            exe_str = 'none' +        self._write_string('[debug] exe versions: %s\n' % exe_str)          proxy_map = {}          for handler in self._opener.handlers: | 
