diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-07-16 04:32:37 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-07-16 04:32:37 +0700 | 
| commit | 3f59b0154a8b6dc85425edfbb3dfdc64f41a6ecb (patch) | |
| tree | ebd774761b7230481c22d00b427b958c678a3e0f | |
| parent | 089b97cfee8553886d33cd52b7ede178cebd7034 (diff) | |
[nexx:embed] Add extractor for iframe embeds
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/nexx.py | 46 | 
3 files changed, 80 insertions, 2 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 28f0d3f0d..e8a066b83 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -653,7 +653,10 @@ from .nextmedia import (      AppleDailyIE,      NextTVIE,  ) -from .nexx import NexxIE +from .nexx import ( +    NexxIE, +    NexxEmbedIE, +)  from .nfb import NFBIE  from .nfl import NFLIE  from .nhk import NhkVodIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 123a21296..0ab2ef2d6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -36,7 +36,10 @@ from .brightcove import (      BrightcoveLegacyIE,      BrightcoveNewIE,  ) -from .nexx import NexxIE +from .nexx import ( +    NexxIE, +    NexxEmbedIE, +)  from .nbc import NBCSportsVPlayerIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE @@ -1566,6 +1569,27 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, +        # Nexx iFrame embed +        { +            'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', +            'info_dict': { +                'id': '161464', +                'ext': 'mp4', +                'title': 'Nervenkitzel Achterbahn', +                'alt_title': 'Karussellbauer in Deutschland', +                'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', +                'release_year': 2005, +                'creator': 'SPIEGEL TV', +                'thumbnail': r're:^https?://.*\.jpg$', +                'duration': 2761, +                'timestamp': 1394021479, +                'upload_date': '20140305', +            }, +            'params': { +                'format': 'bestvideo', +                'skip_download': True, +            }, +        },          # Facebook <iframe> embed          {              'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -2155,6 +2179,11 @@ class GenericIE(InfoExtractor):          if nexx_urls:              return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key()) +        # Look for Nexx iFrame embeds +        nexx_embed_urls = NexxEmbedIE._extract_urls(webpage) +        if nexx_embed_urls: +            return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key()) +          # Look for ThePlatform embeds          tp_urls = ThePlatformIE._extract_urls(webpage)          if tp_urls: diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py index 12450d4c5..e2960273e 100644 --- a/youtube_dl/extractor/nexx.py +++ b/youtube_dl/extractor/nexx.py @@ -88,6 +88,10 @@ class NexxIE(InfoExtractor):          return entries +    @staticmethod +    def _extract_url(webpage): +        return NexxIE._extract_urls(webpage)[0] +      def _handle_error(self, response):          status = int_or_none(try_get(              response, lambda x: x['metadata']['status']) or 200) @@ -223,3 +227,45 @@ class NexxIE(InfoExtractor):                  video, lambda x: x['episodedata']['season'])),              'formats': formats,          } + + +class NexxEmbedIE(InfoExtractor): +    _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)' +    _TEST = { +        'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1', +        'md5': '16746bfc28c42049492385c989b26c4a', +        'info_dict': { +            'id': '161464', +            'ext': 'mp4', +            'title': 'Nervenkitzel Achterbahn', +            'alt_title': 'Karussellbauer in Deutschland', +            'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', +            'release_year': 2005, +            'creator': 'SPIEGEL TV', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 2761, +            'timestamp': 1394021479, +            'upload_date': '20140305', +        }, +        'params': { +            'format': 'bestvideo', +            'skip_download': True, +        }, +    } + +    @staticmethod +    def _extract_urls(webpage): +        # Reference: +        # 1. https://nx-s.akamaized.net/files/201510/44.pdf + +        # iFrame Embed Integration +        return [mobj.group('url') for mobj in re.finditer( +                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1', +            webpage)] + +    def _real_extract(self, url): +        embed_id = self._match_id(url) + +        webpage = self._download_webpage(url, embed_id) + +        return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key()) | 
