aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_YoutubeDL.py34
-rwxr-xr-xyoutube_dl/YoutubeDL.py63
2 files changed, 81 insertions, 16 deletions
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index ca25025e2..0dfe25c00 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1['format_id'])
+ def test_audio_only_extractor_format_selection(self):
+ # For extractors with incomplete formats (all formats are audio-only or
+ # video-only) best and worst should fallback to corresponding best/worst
+ # video-only or audio-only formats (as per
+ # https://github.com/rg3/youtube-dl/pull/5556)
+ formats = [
+ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'high')
+
+ ydl = YDL({'format': 'worst'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'low')
+
+ def test_format_not_available(self):
+ formats = [
+ {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
+ {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # This must fail since complete video-audio format does not match filter
+ # and extractor does not provide incomplete only formats (i.e. only
+ # video-only or audio-only).
+ ydl = YDL({'format': 'best[height>360]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
def test_invalid_format_specs(self):
def assert_syntax_error(format_spec):
ydl = YDL({'format': format_spec})
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ba72ec6f3..cf9cd8297 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
import collections
import contextlib
+import copy
import datetime
import errno
import fileinput
@@ -1051,9 +1052,9 @@ class YoutubeDL(object):
if isinstance(selector, list):
fs = [_build_selector_function(s) for s in selector]
- def selector_function(formats):
+ def selector_function(ctx):
for f in fs:
- for format in f(formats):
+ for format in f(ctx):
yield format
return selector_function
elif selector.type == GROUP:
@@ -1061,17 +1062,17 @@ class YoutubeDL(object):
elif selector.type == PICKFIRST:
fs = [_build_selector_function(s) for s in selector.selector]
- def selector_function(formats):
+ def selector_function(ctx):
for f in fs:
- picked_formats = list(f(formats))
+ picked_formats = list(f(ctx))
if picked_formats:
return picked_formats
return []
elif selector.type == SINGLE:
format_spec = selector.selector
- def selector_function(formats):
- formats = list(formats)
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
if not formats:
return
if format_spec == 'all':
@@ -1084,9 +1085,10 @@ class YoutubeDL(object):
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
if audiovideo_formats:
yield audiovideo_formats[format_idx]
- # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
- elif (all(f.get('acodec') != 'none' for f in formats) or
- all(f.get('vcodec') != 'none' for f in formats)):
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) we will fallback to best/worst
+ # {video,audio}-only format
+ elif ctx['incomplete_formats']:
yield formats[format_idx]
elif format_spec == 'bestaudio':
audio_formats = [
@@ -1160,17 +1162,18 @@ class YoutubeDL(object):
}
video_selector, audio_selector = map(_build_selector_function, selector.selector)
- def selector_function(formats):
- formats = list(formats)
- for pair in itertools.product(video_selector(formats), audio_selector(formats)):
+ def selector_function(ctx):
+ for pair in itertools.product(
+ video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
yield _merge(pair)
filters = [self._build_format_filter(f) for f in selector.filters]
- def final_selector(formats):
+ def final_selector(ctx):
+ ctx_copy = copy.deepcopy(ctx)
for _filter in filters:
- formats = list(filter(_filter, formats))
- return selector_function(formats)
+ ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
+ return selector_function(ctx_copy)
return final_selector
stream = io.BytesIO(format_spec.encode('utf-8'))
@@ -1377,7 +1380,35 @@ class YoutubeDL(object):
req_format_list.append('best')
req_format = '/'.join(req_format_list)
format_selector = self.build_format_selector(req_format)
- formats_to_download = list(format_selector(formats))
+
+ # While in format selection we may need to have an access to the original
+ # format set in order to calculate some metrics or do some processing.
+ # For now we need to be able to guess whether original formats provided
+ # by extractor are incomplete or not (i.e. whether extractor provides only
+ # video-only or audio-only formats) for proper formats selection for
+ # extractors with such incomplete formats (see
+ # https://github.com/rg3/youtube-dl/pull/5556).
+ # Since formats may be filtered during format selection and may not match
+ # the original formats the results may be incorrect. Thus original formats
+ # or pre-calculated metrics should be passed to format selection routines
+ # as well.
+ # We will pass a context object containing all necessary additional data
+ # instead of just formats.
+ # This fixes incorrect format selection issue (see
+ # https://github.com/rg3/youtube-dl/issues/10083).
+ incomplete_formats = all(
+ # All formats are video-only or
+ f.get('vcodec') != 'none' and f.get('acodec') == 'none' or
+ # all formats are audio-only
+ f.get('vcodec') == 'none' and f.get('acodec') != 'none'
+ for f in formats)
+
+ ctx = {
+ 'formats': formats,
+ 'incomplete_formats': incomplete_formats,
+ }
+
+ formats_to_download = list(format_selector(ctx))
if not formats_to_download:
raise ExtractorError('requested format not available',
expected=True)