aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r--youtube_dl/extractor/common.py44
1 files changed, 38 insertions, 6 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 661889593..df546da27 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -44,6 +44,7 @@ from ..utils import (
sanitized_Request,
unescapeHTML,
unified_strdate,
+ unified_timestamp,
url_basename,
xpath_element,
xpath_text,
@@ -163,6 +164,7 @@ class InfoExtractor(object):
* "height" (optional, int)
* "resolution" (optional, string "{width}x{height"},
deprecated)
+ * "filesize" (optional, int)
thumbnail: Full URL to a video thumbnail image.
description: Full video description.
uploader: Full name of the video uploader.
@@ -751,10 +753,12 @@ class InfoExtractor(object):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ if not isinstance(name, (list, tuple)):
+ name = [name]
if display_name is None:
- display_name = name
+ display_name = name[0]
return self._html_search_regex(
- self._meta_regex(name),
+ [self._meta_regex(n) for n in name],
html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):
@@ -803,15 +807,17 @@ class InfoExtractor(object):
return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _search_json_ld(self, html, video_id, **kwargs):
+ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
json_ld = self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
html, 'JSON-LD', group='json_ld', **kwargs)
if not json_ld:
return {}
- return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
+ return self._json_ld(
+ json_ld, video_id, fatal=kwargs.get('fatal', True),
+ expected_type=expected_type)
- def _json_ld(self, json_ld, video_id, fatal=True):
+ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
if isinstance(json_ld, compat_str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld:
@@ -819,6 +825,8 @@ class InfoExtractor(object):
info = {}
if json_ld.get('@context') == 'http://schema.org':
item_type = json_ld.get('@type')
+ if expected_type is not None and expected_type != item_type:
+ return info
if item_type == 'TVEpisode':
info.update({
'episode': unescapeHTML(json_ld.get('name')),
@@ -837,6 +845,19 @@ class InfoExtractor(object):
'title': unescapeHTML(json_ld.get('headline')),
'description': unescapeHTML(json_ld.get('articleBody')),
})
+ elif item_type == 'VideoObject':
+ info.update({
+ 'url': json_ld.get('contentUrl'),
+ 'title': unescapeHTML(json_ld.get('name')),
+ 'description': unescapeHTML(json_ld.get('description')),
+ 'thumbnail': json_ld.get('thumbnailUrl'),
+ 'duration': parse_duration(json_ld.get('duration')),
+ 'timestamp': unified_timestamp(json_ld.get('uploadDate')),
+ 'filesize': float_or_none(json_ld.get('contentSize')),
+ 'tbr': int_or_none(json_ld.get('bitrate')),
+ 'width': int_or_none(json_ld.get('width')),
+ 'height': int_or_none(json_ld.get('height')),
+ })
return dict((k, v) for k, v in info.items() if v is not None)
@staticmethod
@@ -878,7 +899,11 @@ class InfoExtractor(object):
f['ext'] = determine_ext(f['url'])
if isinstance(field_preference, (list, tuple)):
- return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
+ return tuple(
+ f.get(field)
+ if f.get(field) is not None
+ else ('' if field == 'format_id' else -1)
+ for field in field_preference)
preference = f.get('preference')
if preference is None:
@@ -1781,6 +1806,13 @@ class InfoExtractor(object):
def _mark_watched(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
+ def geo_verification_headers(self):
+ headers = {}
+ geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ if geo_verification_proxy:
+ headers['Ytdl-request-proxy'] = geo_verification_proxy
+ return headers
+
class SearchInfoExtractor(InfoExtractor):
"""