diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-09-06 00:48:37 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-09-06 00:48:37 +0700 |
commit | 66c9fa36c10860b380806b9de48f38d628289e03 (patch) | |
tree | 7d9b1991b5ae9082c3d0ea8e683c7cc7af627442 /youtube_dl/extractor/youtube.py | |
parent | c5c9bf0c120d2c481124a0c3913b981cf061fb95 (diff) |
[youtube] Separate methods for embeds extraction
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 953e38227..ad2e933ee 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor): playback_url, video_id, 'Marking watched', 'Unable to mark watched', fatal=False) + @staticmethod + def _extract_urls(webpage): + # Embedded YouTube player + entries = [ + unescapeHTML(mobj.group('url')) + for mobj in re.finditer(r'''(?x) + (?: + <iframe[^>]+?src=| + data-video-url=| + <embed[^>]+?src=| + embedSWF\(?:\s*| + <object[^>]+data=| + new\s+SWFObject\( + ) + (["\']) + (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ + (?:embed|v|p)/.+?) + \1''', webpage)] + + # lazyYT YouTube embed + entries.extend(list(map( + unescapeHTML, + re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) + + # Wordpress "YouTube Video Importer" plugin + matches = re.findall(r'''(?x)<div[^>]+ + class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ + data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) + entries.extend(m[-1] for m in matches) + + return entries + + @staticmethod + def _extract_url(webpage): + urls = YoutubeIE._extract_urls(webpage) + return urls[0] if urls else None + @classmethod def extract_id(cls, url): mobj = re.match(cls._VALID_URL, url, re.VERBOSE) |