aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml3
-rw-r--r--AUTHORS1
-rw-r--r--test/helper.py2
-rw-r--r--test/test_utils.py11
-rwxr-xr-xyoutube_dl/YoutubeDL.py187
-rw-r--r--youtube_dl/__init__.py23
-rw-r--r--youtube_dl/downloader/common.py32
-rw-r--r--youtube_dl/downloader/external.py43
-rw-r--r--youtube_dl/downloader/http.py12
-rw-r--r--youtube_dl/extractor/appletrailers.py4
-rw-r--r--youtube_dl/extractor/atresplayer.py2
-rw-r--r--youtube_dl/extractor/audiomack.py13
-rw-r--r--youtube_dl/extractor/bliptv.py2
-rw-r--r--youtube_dl/extractor/common.py3
-rw-r--r--youtube_dl/extractor/fc2.py58
-rw-r--r--youtube_dl/extractor/folketinget.py4
-rw-r--r--youtube_dl/extractor/krasview.py22
-rw-r--r--youtube_dl/extractor/mtv.py2
-rw-r--r--youtube_dl/extractor/smotri.py1
-rw-r--r--youtube_dl/extractor/testtube.py16
-rw-r--r--youtube_dl/extractor/ubu.py27
-rw-r--r--youtube_dl/extractor/videomega.py4
-rw-r--r--youtube_dl/extractor/wdr.py4
-rw-r--r--youtube_dl/options.py29
-rw-r--r--youtube_dl/utils.py13
25 files changed, 365 insertions, 153 deletions
diff --git a/.travis.yml b/.travis.yml
index f14014414..fb34299fc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,9 @@ python:
- "2.7"
- "3.3"
- "3.4"
+before_install:
+ - sudo apt-get update -qq
+ - sudo apt-get install -yqq rtmpdump
script: nosetests test --verbose
notifications:
email:
diff --git a/AUTHORS b/AUTHORS
index b8bf3cb6f..8362b6d8a 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -104,3 +104,4 @@ Ondřej Caletka
Dinesh S
Johan K. Jensen
Yen Chi Hsuan
+Enam Mijbah Noor
diff --git a/test/helper.py b/test/helper.py
index c416f388c..27a68091f 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -140,7 +140,7 @@ def expect_info_dict(self, got_dict, expected_dict):
# Are checkable fields missing from the test case definition?
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
for key, value in got_dict.items()
- if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
+ if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
def _repr(v):
diff --git a/test/test_utils.py b/test/test_utils.py
index bdd7f268a..ebec7986f 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -52,6 +52,7 @@ from youtube_dl.utils import (
urlencode_postdata,
version_tuple,
xpath_with_ns,
+ render_table,
)
@@ -434,5 +435,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
self.assertTrue(is_html( # UTF-32-LE
b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
+ def test_render_table(self):
+ self.assertEqual(
+ render_table(
+ ['a', 'bcd'],
+ [[123, 4], [9999, 51]]),
+ 'a bcd\n'
+ '123 4\n'
+ '9999 51')
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 54e732943..b772f87f1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -54,8 +54,10 @@ from .utils import (
PostProcessingError,
platform_name,
preferredencoding,
+ render_table,
SameFileError,
sanitize_filename,
+ std_headers,
subtitles_filename,
takewhile_inclusive,
UnavailableVideoError,
@@ -135,6 +137,7 @@ class YoutubeDL(object):
nooverwrites: Prevent overwriting files.
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
+ playlist_items: Specific indices of playlist to download.
playlistreverse: Download playlist items in reverse order.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
@@ -144,6 +147,7 @@ class YoutubeDL(object):
writeinfojson: Write the video description to a .info.json file
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
+ write_all_thumbnails: Write all thumbnail formats to files
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video
@@ -194,11 +198,12 @@ class YoutubeDL(object):
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
- * filename: The final filename
- * status: One of "downloading" and "finished"
-
- The dict may also have some of the following entries:
+ * status: One of "downloading" and "finished".
+ Check this first and ignore unknown values.
+ If status is one of "downloading" or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
* tmpfilename: The filename we're currently writing to
@@ -220,12 +225,15 @@ class YoutubeDL(object):
youtube-dl servers for debugging.
sleep_interval: Number of seconds to sleep before each download.
external_downloader: Executable of the external downloader to call.
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
- noresizebuffer, retries, continuedl, noprogress, consoletitle
+ noresizebuffer, retries, continuedl, noprogress, consoletitle,
+ xattr_set_filesize.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
@@ -698,24 +706,51 @@ class YoutubeDL(object):
if playlistend == -1:
playlistend = None
+ playlistitems_str = self.params.get('playlist_items', None)
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = iter_playlistitems(playlistitems_str)
+
ie_entries = ie_result['entries']
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
- entries = ie_entries[playliststart:playlistend]
+ if playlistitems:
+ entries = [ie_entries[i - 1] for i in playlistitems]
+ else:
+ entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
elif isinstance(ie_entries, PagedList):
- entries = ie_entries.getslice(
- playliststart, playlistend)
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
else: # iterable
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
+ if playlistitems:
+ entry_list = list(ie_entries)
+ entries = [entry_list[i - 1] for i in playlistitems]
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
@@ -865,6 +900,42 @@ class YoutubeDL(object):
return matches[-1]
return None
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ class _PseudoRequest(object):
+ def __init__(self, url):
+ self.url = url
+ self.headers = {}
+ self.unverifiable = False
+
+ def add_unredirected_header(self, k, v):
+ self.headers[k] = v
+
+ def get_full_url(self):
+ return self.url
+
+ def is_unverifiable(self):
+ return self.unverifiable
+
+ def has_header(self, h):
+ return h in self.headers
+
+ pr = _PseudoRequest(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.headers.get('Cookie')
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@@ -879,9 +950,14 @@ class YoutubeDL(object):
info_dict['playlist_index'] = None
thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ thumbnails = [{'url': thumbnail}]
if thumbnails:
thumbnails.sort(key=lambda t: (
- t.get('width'), t.get('height'), t.get('url')))
+ t.get('preference'), t.get('width'), t.get('height'),
+ t.get('id'), t.get('url')))
for t in thumbnails:
if 'width' in t and 'height' in t:
t['resolution'] = '%dx%d' % (t['width'], t['height'])
@@ -933,6 +1009,11 @@ class YoutubeDL(object):
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url']).lower()
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
format_limit = self.params.get('format_limit', None)
if format_limit:
@@ -948,9 +1029,12 @@ class YoutubeDL(object):
# element in the 'formats' field in info_dict is info_dict itself,
# wich can't be exported to json
info_dict['formats'] = formats
- if self.params.get('listformats', None):
+ if self.params.get('listformats'):
self.list_formats(info_dict)
return
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
req_format = self.params.get('format')
if req_format is None:
@@ -1157,25 +1241,7 @@ class YoutubeDL(object):
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
- if self.params.get('writethumbnail', False):
- if info_dict.get('thumbnail') is not None:
- thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
- thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
- self.to_screen('[%s] %s: Thumbnail is already present' %
- (info_dict['extractor'], info_dict['id']))
- else:
- self.to_screen('[%s] %s: Downloading thumbnail ...' %
- (info_dict['extractor'], info_dict['id']))
- try:
- uf = self.urlopen(info_dict['thumbnail'])
- with open(thumb_filename, 'wb') as thumbf:
- shutil.copyfileobj(uf, thumbf)
- self.to_screen('[%s] %s: Writing thumbnail to: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_warning('Unable to download thumbnail "%s": %s' %
- (info_dict['thumbnail'], compat_str(err)))
+ self._write_thumbnails(info_dict, filename)
if not self.params.get('skip_download', False):
try:
@@ -1186,6 +1252,7 @@ class YoutubeDL(object):
if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info)
+
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
@@ -1458,8 +1525,26 @@ class YoutubeDL(object):
header_line = line({
'format_id': 'format code', 'ext': 'extension',
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
- self.to_screen('[info] Available formats for %s:\n%s\n%s' %
- (info_dict['id'], header_line, '\n'.join(formats_s)))
+ self.to_screen(
+ '[info] Available formats for %s:\n%s\n%s' %
+ (info_dict['id'], header_line, '\n'.join(formats_s)))
+
+ def list_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if not thumbnails:
+ tn_url = info_dict.get('thumbnail')
+ if tn_url:
+ thumbnails = [{'id': '0', 'url': tn_url}]
+ else:
+ self.to_screen(
+ '[info] No thumbnails present for %s' % info_dict['id'])
+ return
+
+ self.to_screen(
+ '[info] Thumbnails for %s:' % info_dict['id'])
+ self.to_screen(render_table(
+ ['ID', 'width', 'height', 'URL'],
+ [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def urlopen(self, req):
""" Start an HTTP download """
@@ -1605,3 +1690,39 @@ class YoutubeDL(object):
if encoding is None:
encoding = preferredencoding()
return encoding
+
+ def _write_thumbnails(self, info_dict, filename):
+ if self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails:
+ thumbnails = [thumbnails[-1]]
+ elif self.params.get('write_all_thumbnails', False):
+ thumbnails = info_dict.get('thumbnails')
+ else:
+ return
+
+ if not thumbnails:
+ # No thumbnails present, so return immediately
+ return
+
+ for t in thumbnails:
+ thumb_ext = determine_ext(t['url'], 'jpg')
+ suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ self.to_screen('[%s] %s: Thumbnail %sis already present' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ else:
+ self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ uf = self.urlopen(t['url'])
+ with open(thumb_filename, 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download thumbnail "%s": %s' %
+ (t['url'], compat_str(err)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 3fc7dc5c2..71d2c6f35 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -143,10 +143,13 @@ def _real_main(argv=None):
parser.error('invalid max_filesize specified')
opts.max_filesize = numeric_limit
if opts.retries is not None:
- try:
- opts.retries = int(opts.retries)
- except (TypeError, ValueError):
- parser.error('invalid retry count specified')
+ if opts.retries in ('inf', 'infinite'):
+ opts_retries = float('inf')
+ else:
+ try:
+ opts_retries = int(opts.retries)
+ except (TypeError, ValueError):
+ parser.error('invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None:
@@ -238,6 +241,12 @@ def _real_main(argv=None):
'verboseOutput': opts.verbose,
'exec_cmd': opts.exec_cmd,
})
+ if opts.xattr_set_filesize:
+ try:
+ import xattr
+ xattr # Confuse flake8
+ except ImportError:
+ parser.error('setting filesize xattr requested but python-xattr is not available')
ydl_opts = {
'usenetrc': opts.usenetrc,
@@ -268,7 +277,7 @@ def _real_main(argv=None):
'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
- 'retries': opts.retries,
+ 'retries': opts_retries,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl,
@@ -286,6 +295,7 @@ def _real_main(argv=None):
'writeannotations': opts.writeannotations,
'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail,
+ 'write_all_thumbnails': opts.write_all_thumbnails,
'writesubtitles': opts.writesubtitles,
'writeautomaticsub': opts.writeautomaticsub,
'allsubtitles': opts.allsubtitles,
@@ -331,6 +341,9 @@ def _real_main(argv=None):
'call_home': opts.call_home,
'sleep_interval': opts.sleep_interval,
'external_downloader': opts.external_downloader,
+ 'list_thumbnails': opts.list_thumbnails,
+ 'playlist_items': opts.playlist_items,
+ 'xattr_set_filesize': opts.xattr_set_filesize,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index c35c42c1d..7bb3a948d 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -25,21 +25,23 @@ class FileDownloader(object):
Available options:
- verbose: Print additional info to stdout.
- quiet: Do not print messages to stdout.
- ratelimit: Download speed limit, in bytes/sec.
- retries: Number of times to retry for HTTP error 5xx
- buffersize: Size of download buffer in bytes.
- noresizebuffer: Do not automatically resize the download buffer.
- continuedl: Try to continue downloads if possible.
- noprogress: Do not print the progress bar.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
- nopart: Do not use temporary .part files.
- updatetime: Use the Last-modified header to set output file timestamps.
- test: Download only first bytes to test the downloader.
- min_filesize: Skip files smaller than this size
- max_filesize: Skip files larger than this size
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ ratelimit: Download speed limit, in bytes/sec.
+ retries: Number of times to retry for HTTP error 5xx
+ buffersize: Size of download buffer in bytes.
+ noresizebuffer: Do not automatically resize the download buffer.
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ test: Download only first bytes to test the downloader.
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
+ xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+ (experimenatal)
Subclasses of this one must re-define the real_download method.
"""
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 7ebe40096..af9fdba75 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -7,7 +7,6 @@ import sys
from .common import FileDownloader
from ..utils import (
encodeFilename,
- std_headers,
)
@@ -46,42 +45,6 @@ class ExternalFD(FileDownloader):
def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
- def _calc_headers(self, info_dict):
- res = std_headers.copy()
-
- ua = info_dict.get('user_agent')
- if ua is not None:
- res['User-Agent'] = ua
-
- cookies = self._calc_cookies(info_dict)
- if cookies:
- res['Cookie'] = cookies
-
- return res
-
- def _calc_cookies(self, info_dict):
- class _PseudoRequest(object):
- def __init__(self, url):
- self.url = url
- self.headers = {}
- self.unverifiable = False
-
- def add_unredirected_header(self, k, v):
- self.headers[k] = v
-
- def get_full_url(self):
- return self.url
-
- def is_unverifiable(self):
- return self.unverifiable
-
- def has_header(self, h):
- return h in self.headers
-
- pr = _PseudoRequest(info_dict['url'])
- self.ydl.cookiejar.add_cookie_header(pr)
- return pr.headers.get('Cookie')
-
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = self._make_cmd(tmpfilename, info_dict)
@@ -107,7 +70,7 @@ class ExternalFD(FileDownloader):
class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
- for key, val in self._calc_headers(info_dict).items():
+ for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += ['--', info_dict['url']]
return cmd
@@ -116,7 +79,7 @@ class CurlFD(ExternalFD):
class WgetFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
- for key, val in self._calc_headers(info_dict).items():
+ for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += ['--', info_dict['url']]
return cmd
@@ -131,7 +94,7 @@ class Aria2cFD(ExternalFD):
if dn:
cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)]
- for key, val in self._calc_headers(info_dict).items():
+ for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += ['--', info_dict['url']]
return cmd
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index e68f20c9f..8a1d578d5 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -24,10 +24,6 @@ class HttpFD(FileDownloader):
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
- if 'user_agent' in info_dict:
- headers['Youtubedl-user-agent'] = info_dict['user_agent']
- if 'http_referer' in info_dict:
- headers['Referer'] = info_dict['http_referer']
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
@@ -161,6 +157,14 @@ class HttpFD(FileDownloader):
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
+
+ if self.params.get('xattr_set_filesize', False) and data_len is not None:
+ try:
+ import xattr
+ xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+ except(OSError, IOError, ImportError) as err:
+ self.report_error('unable to set filesize xattr: %s' % str(err))
+
try:
stream.write(data_block)
except (IOError, OSError) as err:
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 7cd0482c7..70621946d 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor):
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader_id': uploader_id,
- 'user_agent': 'QuickTime compatible (youtube-dl)',
+ 'http_headers': {
+ 'User-Agent': 'QuickTime compatible (youtube-dl)',
+ },
})
return {
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index f42862be3..f016368fa 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -95,7 +95,7 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
- request.add_header('Youtubedl-user-agent', self._USER_AGENT)
+ request.add_header('User-Agent', self._USER_AGENT)
fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt)
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 8bfe50214..693ba22c6 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor):
# Album playlist ripped from fakeshoredrive with no metadata
{
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project)',
+ 'id': '837572',
+ },
'playlist': [{
'info_dict': {
- 'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
- 'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
+ 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
+ 'id': '837577',
'ext': 'mp3',
+ 'uploader': 'Lil Herb a.k.a. G Herbo',
}
}],
'params': {
- 'playliststart': 8,
- 'playlistend': 8,
+ 'playliststart': 9,
+ 'playlistend': 9,
}
}
]
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 14b814120..436cc5155 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
- req.add_header('Youtubedl-user-agent', 'youtube-dl')
+ req.add_header('User-Agent', 'youtube-dl')
return self._download_webpage(req, None, note=False)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 03f3f18c8..7b7a832dc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -108,7 +108,6 @@ class InfoExtractor(object):
(quality takes higher priority)
-1 for default (order by other properties),
-2 or smaller for less than default.
- * http_referer HTTP Referer header value to set.
* http_method HTTP method to use for the download.
* http_headers A dictionary of additional HTTP headers
to add to the request.
@@ -130,7 +129,9 @@ class InfoExtractor(object):
something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats"
thumbnails: A list of dictionaries, with the following entries:
+ * "id" (optional, string) - Thumbnail format ID
* "url"
+ * "preference" (optional, int) - quality of the image
* "width" (optional, int)
* "height" (optional, int)
* "resolution" (optional, string "{width}x{height"},
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
index 81ceace53..1ccc1a964 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -5,6 +5,7 @@ import hashlib
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
@@ -16,7 +17,8 @@ from ..utils import (
class FC2IE(InfoExtractor):
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
IE_NAME = 'fc2'
- _TEST = {
+ _NETRC_MACHINE = 'fc2'
+ _TESTS = [{
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
'info_dict': {
@@ -24,12 +26,57 @@ class FC2IE(InfoExtractor):
'ext': 'flv',
'title': 'Boxing again with Puff',
},
- }
+ }, {
+ 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
+ 'info_dict': {
+ 'id': '20150125cEva0hDn',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'username': 'ytdl@yt-dl.org',
+ 'password': '(snip)',
+ 'skip': 'requires actual password'
+ }
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None or password is None:
+ return False
+
+ # Log in
+ login_form_strs = {
+ 'email': username,
+ 'password': password,
+ 'done': 'video',
+ 'Submit': ' Login ',
+ }
+
+ # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+ # chokes on unicode
+ login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
+ login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+ request = compat_urllib_request.Request(
+ 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
+
+ login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
+ if 'mode=redirect&login=done' not in login_results:
+ self.report_warning('unable to log in: bad username or password')
+ return False
+
+ # this is also needed
+ login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
+ self._download_webpage(
+ login_redir, None, note='Login redirect', errnote='Login redirect failed')
+
+ return True
def _real_extract(self, url):
video_id = self._match_id(url)
+ self._login()
webpage = self._download_webpage(url, video_id)
self._downloader.cookiejar.clear_session_cookies() # must clear
+ self._login()
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
@@ -46,7 +93,12 @@ class FC2IE(InfoExtractor):
info = compat_urlparse.parse_qs(info_webpage)
if 'err_code' in info:
- raise ExtractorError('Error code: %s' % info['err_code'][0])
+ # most of the time we can still download wideo even if err_code is 403 or 602
+ self.report_warning(
+ 'Error code was: %s... but still trying' % info['err_code'][0])
+
+ if 'filepath' not in info:
+ raise ExtractorError('Cannot download file. Are you logged in?')
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
title_info = info.get('title')
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
index 68e2db943..0fb29de75 100644
--- a/youtube_dl/extractor/folketinget.py
+++ b/youtube_dl/extractor/folketinget.py
@@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
_TEST = {
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+ 'md5': '6269e8626fa1a891bf5369b386ae996a',
'info_dict': {
'id': '1165642',
'ext': 'mp4',
@@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor):
'upload_date': '20141120',
'duration': 3960,
},
- 'params': {
- 'skip_download': 'rtmpdump required',
- }
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index 6f3d2345b..e46954b47 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -2,18 +2,17 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
- unescapeHTML,
+ js_to_json,
)
class KrasViewIE(InfoExtractor):
IE_DESC = 'Красвью'
- _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
_TEST = {
'url': 'http://krasview.ru/video/512228',
@@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- flashvars = json.loads(self._search_regex(
- r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+ flashvars = json.loads(js_to_json(self._search_regex(
+ r'video_Init\(({.+?})', webpage, 'flashvars')))
video_url = flashvars['url']
- title = unescapeHTML(flashvars['title'])
- description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
- thumbnail = flashvars['image']
- duration = int(flashvars['duration'])
- filesize = int(flashvars['size'])
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(flashvars.get('duration'))
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
@@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'duration': duration,
- 'filesize': filesize,
'width': width,
'height': height,
}
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 5ebc78033..22a726327 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
- req.add_header('Youtubedl-user-agent', 'curl/7')
+ req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,
'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index 26f361c93..e94f41362 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -102,6 +102,7 @@ class SmotriIE(InfoExtractor):
'uploader_id': 'mopeder',
'duration': 71,
'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
+ 'upload_date': '20150114',
},
},
# swf player
diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py
index fd47e71a2..6a7b5e49d 100644
--- a/youtube_dl/extractor/testtube.py
+++ b/youtube_dl/extractor/testtube.py
@@ -1,7 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ qualities,
+)
class TestTubeIE(InfoExtractor):
@@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor):
self._sort_formats(formats)
duration = int_or_none(info.get('duration'))
+ images = info.get('images')
+ thumbnails = None
+ preference = qualities(['mini', 'small', 'medium', 'large'])
+ if images:
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': img_url,
+ 'preference': preference(thumbnail_id)
+ } for thumbnail_id, img_url in images.items()]
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
'description': info.get('summary'),
- 'thumbnail': info.get('images', {}).get('large'),
+ 'thumbnails': thumbnails,
'uploader': info.get('show', {}).get('name'),
'uploader_id': info.get('show', {}).get('slug'),
'duration': duration,
diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py
index 0182d67ec..d50237758 100644
--- a/youtube_dl/extractor/ubu.py
+++ b/youtube_dl/extractor/ubu.py
@@ -3,50 +3,51 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ qualities,
+)
class UbuIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
_TEST = {
'url': 'http://ubu.com/film/her_noise.html',
- 'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+ 'md5': '138d5652618bf0f03878978db9bef1ee',
'info_dict': {
'id': 'her_noise',
- 'ext': 'mp4',
+ 'ext': 'm4v',
'title': 'Her Noise - The Making Of (2007)',
'duration': 3600,
},
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
duration = int_or_none(self._html_search_regex(
- r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
- if duration:
- duration *= 60
+ r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
+ invscale=60)
formats = []
-
FORMAT_REGEXES = [
- ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
- ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+ ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
+ ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
]
-
+ preference = qualities([fid for fid, _ in FORMAT_REGEXES])
for format_id, format_regex in FORMAT_REGEXES:
m = re.search(format_regex, webpage)
if m:
formats.append({
'url': m.group(1),
'format_id': format_id,
+ 'preference': preference(format_id),
})
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index fc6e05fe0..273030316 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor):
'title': title,
'formats': formats,
'thumbnail': thumbnail,
- 'http_referer': iframe_url,
+ 'http_headers': {
+ 'Referer': iframe_url,
+ },
}
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 45466e31b..313b9c15d 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor):
'title': mobj.group('title'),
'age_limit': int(mobj.group('age_limit')),
'url': url,
- 'user_agent': 'mobile',
+ 'http_headers': {
+ 'User-Agent': 'mobile',
+ },
}
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index b38b8349f..dbc6f5528 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -201,6 +201,10 @@ def parseOpts(overrideArguments=None):
dest='playlistend', metavar='NUMBER', default=None, type=int,
help='playlist video to end at (default is last)')
selection.add_option(
+ '--playlist-items',
+ dest='playlist_items', metavar='ITEM_SPEC', default=None,
+ help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
help='download only matching titles (regex or caseless sub-string)')
@@ -373,7 +377,7 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'-R', '--retries',
dest='retries', metavar='RETRIES', default=10,
- help='number of retries (default is %default)')
+ help='number of retries (default is %default), or "infinite".')
downloader.add_option(
'--buffer-size',
dest='buffersize', metavar='SIZE', default='1024',
@@ -391,6 +395,10 @@ def parseOpts(overrideArguments=None):
action='store_true',
help='Download playlist videos in reverse order')
downloader.add_option(
+ '--xattr-set-filesize',
+ dest='xattr_set_filesize', action='store_true',
+ help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+ downloader.add_option(
'--external-downloader',
dest='external_downloader', metavar='COMMAND',
help='(experimental) Use the specified external downloader. '
@@ -615,10 +623,6 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='writeannotations', default=False,
help='write video annotations to a .annotation file')
filesystem.add_option(
- '--write-thumbnail',
- action='store_true', dest='writethumbnail', default=False,
- help='write thumbnail image to disk')
- filesystem.add_option(
'--load-info',
dest='load_info_filename', metavar='FILE',
help='json file containing the video information (created with the "--write-json" option)')
@@ -637,6 +641,20 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='rm_cachedir',
help='Delete all filesystem cache files')
+ thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
+ thumbnail.add_option(
+ '--write-thumbnail',
+ action='store_true', dest='writethumbnail', default=False,
+ help='write thumbnail image to disk')
+ thumbnail.add_option(
+ '--write-all-thumbnails',
+ action='store_true', dest='write_all_thumbnails', default=False,
+ help='write all thumbnail image formats to disk')
+ thumbnail.add_option(
+ '--list-thumbnails',
+ action='store_true', dest='list_thumbnails', default=False,
+ help='Simulate and list all available thumbnail formats')
+
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
postproc.add_option(
'-x', '--extract-audio',
@@ -702,6 +720,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(selection)
parser.add_option_group(downloader)
parser.add_option_group(filesystem)
+ parser.add_option_group(thumbnail)
parser.add_option_group(verbosity)
parser.add_option_group(workarounds)
parser.add_option_group(video_format)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 2970d02a1..b8c52af74 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -606,11 +606,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression']
- if 'Youtubedl-user-agent' in req.headers:
- if 'User-agent' in req.headers:
- del req.headers['User-agent']
- req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
- del req.headers['Youtubedl-user-agent']
if sys.version_info < (2, 7) and '#' in req.get_full_url():
# Python 2.6 is brain-dead when it comes to fragments
@@ -1664,3 +1659,11 @@ def determine_protocol(info_dict):
return 'f4m'
return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+ """ Render a list of rows, each as a list of values """
+ table = [header_row] + data
+ max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ return '\n'.join(format_str % tuple(row) for row in table)