aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py6
-rw-r--r--youtube_dl/extractor/cspan.py7
-rw-r--r--youtube_dl/extractor/instagram.py16
-rw-r--r--youtube_dl/extractor/spankbang.py4
4 files changed, 29 insertions, 4 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 11191c173..33290fd74 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -825,6 +825,12 @@ class InfoExtractor(object):
if not formats:
raise ExtractorError('No video formats found')
+ for f in formats:
+ # Automatically determine tbr when missing based on abr and vbr (improves
+ # formats sorting in some cases)
+ if 'tbr' not in f and 'abr' in f and 'vbr' in f:
+ f['tbr'] = f['abr'] + f['vbr']
+
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index f78cbbd7e..b78edf729 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -112,6 +112,13 @@ class CSpanIE(InfoExtractor):
'height': int_or_none(get_text_attr(quality, 'height')),
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
})
+ if not formats:
+ path = get_text_attr(f, 'path')
+ if not path:
+ continue
+ formats = self._extract_m3u8_formats(
+ path, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
self._sort_formats(formats)
entries.append({
'id': '%s_%d' % (video_id, partnum + 1),
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index e5e16ca3b..ed3e07118 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -22,6 +22,18 @@ class InstagramIE(InfoExtractor):
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}, {
+ # missing description
+ 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
+ 'info_dict': {
+ 'id': 'BA-pQFBG8HZ',
+ 'ext': 'mp4',
+ 'uploader_id': 'britneyspears',
+ 'title': 'Video by britneyspears',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://instagram.com/p/-Cmh1cukG2/',
'only_matching': True,
}]
@@ -32,8 +44,8 @@ class InstagramIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, 'uploader id', fatal=False)
- desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
- fatal=False)
+ desc = self._search_regex(
+ r'"caption":"(.+?)"', webpage, 'description', default=None)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 7f060b15b..3cfa671ed 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -34,11 +34,11 @@ class SpankBangIE(InfoExtractor):
'ext': 'mp4',
'format_id': '%sp' % height,
'height': int(height),
- } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
+ } for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
self._sort_formats(formats)
title = self._html_search_regex(
- r'(?s)<h1>(.+?)</h1>', webpage, 'title')
+ r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
description = self._search_regex(
r'class="desc"[^>]*>([^<]+)',
webpage, 'description', default=None)