diff options
| -rw-r--r-- | youtube_dl/extractor/generic.py | 33 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 37 | 
2 files changed, 41 insertions, 29 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c81efdc00..b83c18380 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):          if vid_me_embed_url is not None:              return self.url_result(vid_me_embed_url, 'Vidme') -        # Look for embedded YouTube player -        matches = re.findall(r'''(?x) -            (?: -                <iframe[^>]+?src=| -                data-video-url=| -                <embed[^>]+?src=| -                embedSWF\(?:\s*| -                <object[^>]+data=| -                new\s+SWFObject\( -            ) -            (["\']) -                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ -                (?:embed|v|p)/.+?) -            \1''', webpage) -        if matches: +        # Look for YouTube embeds +        youtube_urls = YoutubeIE._extract_urls(webpage) +        if youtube_urls:              return self.playlist_from_matches( -                matches, video_id, video_title, lambda m: unescapeHTML(m[1])) - -        # Look for lazyYT YouTube embed -        matches = re.findall( -            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) -        if matches: -            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) - -        # Look for Wordpress "YouTube Video Importer" plugin -        matches = re.findall(r'''(?x)<div[^>]+ -            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ -            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) -        if matches: -            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) +                youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())          matches = DailymotionIE._extract_urls(webpage)          if matches: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 953e38227..ad2e933ee 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              playback_url, video_id, 'Marking watched',              'Unable to mark watched', fatal=False) +    @staticmethod +    def _extract_urls(webpage): +        # Embedded YouTube player +        entries = [ +            unescapeHTML(mobj.group('url')) +            for mobj in re.finditer(r'''(?x) +            (?: +                <iframe[^>]+?src=| +                data-video-url=| +                <embed[^>]+?src=| +                embedSWF\(?:\s*| +                <object[^>]+data=| +                new\s+SWFObject\( +            ) +            (["\']) +                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ +                (?:embed|v|p)/.+?) +            \1''', webpage)] + +        # lazyYT YouTube embed +        entries.extend(list(map( +            unescapeHTML, +            re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)))) + +        # Wordpress "YouTube Video Importer" plugin +        matches = re.findall(r'''(?x)<div[^>]+ +            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ +            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) +        entries.extend(m[-1] for m in matches) + +        return entries + +    @staticmethod +    def _extract_url(webpage): +        urls = YoutubeIE._extract_urls(webpage) +        return urls[0] if urls else None +      @classmethod      def extract_id(cls, url):          mobj = re.match(cls._VALID_URL, url, re.VERBOSE) | 
