diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 101 | 
1 files changed, 73 insertions, 28 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0ab2ef2d6..49b00b87e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -97,6 +97,8 @@ from .washingtonpost import WashingtonPostIE  from .wistia import WistiaIE  from .mediaset import MediasetIE  from .joj import JojIE +from .megaphone import MegaphoneIE +from .vzaar import VzaarIE  class GenericIE(InfoExtractor): @@ -574,6 +576,19 @@ class GenericIE(InfoExtractor):              },              'skip': 'movie expired',          }, +        # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js +        { +            'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', +            'info_dict': { +                'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', +                'ext': 'mp4', +                'title': 'Steampunk Fest Comes to Honesdale', +                'duration': 43.276, +            }, +            'params': { +                'skip_download': True, +            } +        },          # embed.ly video          {              'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -1504,14 +1519,27 @@ class GenericIE(InfoExtractor):          # LiveLeak embed          {              'url': 'http://www.wykop.pl/link/3088787/', -            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', +            'md5': '7619da8c820e835bef21a1efa2a0fc71',              'info_dict': {                  'id': '874_1459135191',                  'ext': 'mp4',                  'title': 'Man shows poor quality of new apartment building',                  'description': 'The wall is like a sand pile.',                  'uploader': 'Lake8737', -            } +            }, +            'add_ie': [LiveLeakIE.ie_key()], +        }, +        # Another LiveLeak embed pattern (#13336) +        { +            'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/', +            'info_dict': { +                'id': '2eb_1496309988', +                'ext': 'mp4', +                'title': 'Thief robs place where everyone was armed', +                'description': 'md5:694d73ee79e535953cf2488562288eee', +                'uploader': 'brazilwtf', +            }, +            'add_ie': [LiveLeakIE.ie_key()],          },          # Duplicated embedded video URLs          { @@ -1569,27 +1597,6 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, -        # Nexx iFrame embed -        { -            'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', -            'info_dict': { -                'id': '161464', -                'ext': 'mp4', -                'title': 'Nervenkitzel Achterbahn', -                'alt_title': 'Karussellbauer in Deutschland', -                'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', -                'release_year': 2005, -                'creator': 'SPIEGEL TV', -                'thumbnail': r're:^https?://.*\.jpg$', -                'duration': 2761, -                'timestamp': 1394021479, -                'upload_date': '20140305', -            }, -            'params': { -                'format': 'bestvideo', -                'skip_download': True, -            }, -        },          # Facebook <iframe> embed          {              'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -1792,6 +1799,21 @@ class GenericIE(InfoExtractor):              'playlist_mincount': 5,          },          { +            # Limelight embed (LimelightPlayerUtil.embed) +            'url': 'https://tv5.ca/videos?v=xuu8qowr291ri', +            'info_dict': { +                'id': '95d035dc5c8a401588e9c0e6bd1e9c92', +                'ext': 'mp4', +                'title': '07448641', +                'timestamp': 1499890639, +                'upload_date': '20170712', +            }, +            'params': { +                'skip_download': True, +            }, +            'add_ie': ['LimelightMedia'], +        }, +        {              'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',              'info_dict': {                  'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest', @@ -1847,6 +1869,16 @@ class GenericIE(InfoExtractor):                  'title': 'Стас Намин: «Мы нарушили девственность Кремля»',              },          }, +        { +            # vzaar embed +            'url': 'http://help.vzaar.com/article/165-embedding-video', +            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4', +            'info_dict': { +                'id': '8707641', +                'ext': 'mp4', +                'title': 'Building A Business Online: Principal Chairs Q & A', +            }, +        },          # {          #     # TODO: find another test          #     # http://schema.org/VideoObject @@ -1996,7 +2028,7 @@ class GenericIE(InfoExtractor):          if head_response is not False:              # Check for redirect -            new_url = head_response.geturl() +            new_url = compat_str(head_response.geturl())              if url != new_url:                  self.report_following_redirect(new_url)                  if force_videoid: @@ -2097,7 +2129,7 @@ class GenericIE(InfoExtractor):              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):                  info_dict['formats'] = self._parse_mpd_formats(                      doc, video_id, -                    mpd_base_url=full_response.geturl().rpartition('/')[0], +                    mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],                      mpd_url=url)                  self._sort_formats(info_dict['formats'])                  return info_dict @@ -2313,6 +2345,7 @@ class GenericIE(InfoExtractor):          # Look for Ooyala videos          mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or                  re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or +                re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))          if mobj is not None: @@ -2737,9 +2770,9 @@ class GenericIE(InfoExtractor):                  self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())          # Look for LiveLeak embeds -        liveleak_url = LiveLeakIE._extract_url(webpage) -        if liveleak_url: -            return self.url_result(liveleak_url, 'LiveLeak') +        liveleak_urls = LiveLeakIE._extract_urls(webpage) +        if liveleak_urls: +            return self.playlist_from_matches(liveleak_urls, video_id, video_title)          # Look for 3Q SDN embeds          threeqsdn_url = ThreeQSDNIE._extract_url(webpage) @@ -2811,6 +2844,18 @@ class GenericIE(InfoExtractor):              return self.playlist_from_matches(                  joj_urls, video_id, video_title, ie=JojIE.ie_key()) +        # Look for megaphone.fm embeds +        mpfn_urls = MegaphoneIE._extract_urls(webpage) +        if mpfn_urls: +            return self.playlist_from_matches( +                mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + +        # Look for vzaar embeds +        vzaar_urls = VzaarIE._extract_urls(webpage) +        if vzaar_urls: +            return self.playlist_from_matches( +                vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) +          def merge_dicts(dict1, dict2):              merged = {}              for k, v in dict1.items(): | 
