diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/cspan.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/instagram.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/spankbang.py | 4 |
4 files changed, 29 insertions, 4 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 11191c173..33290fd74 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -825,6 +825,12 @@ class InfoExtractor(object): if not formats: raise ExtractorError('No video formats found') + for f in formats: + # Automatically determine tbr when missing based on abr and vbr (improves + # formats sorting in some cases) + if 'tbr' not in f and 'abr' in f and 'vbr' in f: + f['tbr'] = f['abr'] + f['vbr'] + def _formats_key(f): # TODO remove the following workaround from ..utils import determine_ext diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index f78cbbd7e..b78edf729 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -112,6 +112,13 @@ class CSpanIE(InfoExtractor): 'height': int_or_none(get_text_attr(quality, 'height')), 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), }) + if not formats: + path = get_text_attr(f, 'path') + if not path: + continue + formats = self._extract_m3u8_formats( + path, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] self._sort_formats(formats) entries.append({ 'id': '%s_%d' % (video_id, partnum + 1), diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index e5e16ca3b..ed3e07118 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -22,6 +22,18 @@ class InstagramIE(InfoExtractor): 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', } }, { + # missing description + 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', + 'info_dict': { + 'id': 'BA-pQFBG8HZ', + 'ext': 'mp4', + 'uploader_id': 'britneyspears', + 'title': 'Video by britneyspears', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://instagram.com/p/-Cmh1cukG2/', 'only_matching': True, }] @@ -32,8 +44,8 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) - desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description', - fatal=False) + desc = self._search_regex( + r'"caption":"(.+?)"', webpage, 'description', default=None) return { 'id': video_id, diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 7f060b15b..3cfa671ed 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -34,11 +34,11 @@ class SpankBangIE(InfoExtractor): 'ext': 'mp4', 'format_id': '%sp' % height, 'height': int(height), - } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)] + } for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)] self._sort_formats(formats) title = self._html_search_regex( - r'(?s)<h1>(.+?)</h1>', webpage, 'title') + r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') description = self._search_regex( r'class="desc"[^>]*>([^<]+)', webpage, 'description', default=None) |