diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-07-29 03:43:03 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-07-29 03:43:03 +0600 |
commit | 864f24bd2c0cf9bde034812a2049c3750c1bb05c (patch) | |
tree | eb326d4df7e8676ad3b99be6a1014668a0ad92aa /youtube_dl/extractor | |
parent | 5316bf7487b608b7c085950ff2fb0444f2c36dc0 (diff) |
[extractor/common] Add _meta_regex and clarify tags field
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a227aeb9c..d54866d1f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -181,13 +181,13 @@ class InfoExtractor(object): by YoutubeDL if it's missing) categories: A list of categories that the video falls in, for example ["Sports", "Berlin"] + tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"] is_live: True, False, or None (=unknown). Whether this video is a live stream that goes on instead of a fixed-length video. start_time: Time in seconds where the reproduction should start, as specified in the URL. end_time: Time in seconds where the reproduction should end, as specified in the URL. - tags: A list of keywords attached to the video. Unless mentioned otherwise, the fields should be Unicode strings. @@ -631,6 +631,12 @@ class InfoExtractor(object): template % (content_re, property_re), ] + @staticmethod + def _meta_regex(prop): + return r'''(?isx)<meta + (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) + [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) + def _og_search_property(self, prop, html, name=None, **kargs): if name is None: name = 'OpenGraph %s' % prop @@ -661,9 +667,7 @@ class InfoExtractor(object): if display_name is None: display_name = name return self._html_search_regex( - r'''(?isx)<meta - (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) - [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name), + self._meta_regex(name), html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html): |