aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r--youtube_dl/extractor/common.py22
1 files changed, 16 insertions, 6 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 49e4dc710..82f5de2d8 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -22,6 +22,7 @@ from ..compat import (
compat_str,
)
from ..utils import (
+ NO_DEFAULT,
age_restricted,
bug_reports_message,
clean_html,
@@ -33,7 +34,6 @@ from ..utils import (
sanitize_filename,
unescapeHTML,
)
-_NO_DEFAULT = object()
class InfoExtractor(object):
@@ -523,7 +523,7 @@ class InfoExtractor(object):
video_info['description'] = playlist_description
return video_info
- def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
+ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group.
@@ -549,7 +549,7 @@ class InfoExtractor(object):
return next(g for g in mobj.groups() if g is not None)
else:
return mobj.group(group)
- elif default is not _NO_DEFAULT:
+ elif default is not NO_DEFAULT:
return default
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
@@ -557,7 +557,7 @@ class InfoExtractor(object):
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
- def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
+ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
"""
@@ -705,6 +705,12 @@ class InfoExtractor(object):
return self._html_search_meta('twitter:player', html,
'twitter card player')
+ @staticmethod
+ def _form_hidden_inputs(html):
+ return dict(re.findall(
+ r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
+ html))
+
def _sort_formats(self, formats, field_preference=None):
if not formats:
raise ExtractorError('No video formats found')
@@ -846,7 +852,8 @@ class InfoExtractor(object):
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None,
- m3u8_id=None, note=None, errnote=None):
+ m3u8_id=None, note=None, errnote=None,
+ fatal=True):
formats = [{
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
@@ -866,7 +873,10 @@ class InfoExtractor(object):
m3u8_doc = self._download_webpage(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
- errnote=errnote or 'Failed to download m3u8 information')
+ errnote=errnote or 'Failed to download m3u8 information',
+ fatal=fatal)
+ if m3u8_doc is False:
+ return m3u8_doc
last_info = None
last_media = None
kv_rex = re.compile(