diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-07-09 20:06:24 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-07-09 20:06:24 +0700 | 
| commit | 71a1db89198100a0e9bc5099aeed622264690203 (patch) | |
| tree | 5f73a393808e19513a5c45a21bf4cea85e530aa3 | |
| parent | 6e925598d68f5d5216aa3e9abed5c7706a68c891 (diff) | |
[dailymail] Add support for embeds
| -rw-r--r-- | youtube_dl/extractor/dailymail.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 21 | 
2 files changed, 35 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py index 538565c66..af3978035 100644 --- a/youtube_dl/extractor/dailymail.py +++ b/youtube_dl/extractor/dailymail.py @@ -1,6 +1,8 @@  # coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import ( @@ -12,8 +14,8 @@ from ..utils import (  class DailyMailIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)' +    _TESTS = [{          'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',          'md5': 'f6129624562251f628296c3a9ffde124',          'info_dict': { @@ -22,7 +24,16 @@ class DailyMailIE(InfoExtractor):              'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',              'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',          } -    } +    }, { +        'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_urls(webpage): +        return re.findall( +            r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)', +            webpage)      def _real_extract(self, url):          video_id = self._match_id(url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f2c577f98..5e8890d41 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -57,6 +57,7 @@ from .dailymotion import (      DailymotionIE,      DailymotionCloudIE,  ) +from .dailymail import DailyMailIE  from .onionstudios import OnionStudiosIE  from .viewlift import ViewLiftEmbedIE  from .mtv import MTVServicesEmbeddedIE @@ -760,6 +761,20 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['Dailymotion'],          }, +        # DailyMail embed +        { +            'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot', +            'info_dict': { +                'id': '1495629', +                'ext': 'mp4', +                'title': 'Care worker punches elderly dementia patient in head 11 times', +                'description': 'md5:3a743dee84e57e48ec68bf67113199a5', +            }, +            'add_ie': ['DailyMail'], +            'params': { +                'skip_download': True, +            }, +        },          # YouTube embed          {              'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', @@ -2190,6 +2205,12 @@ class GenericIE(InfoExtractor):                  return self.playlist_from_matches(                      playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) +        # Look for DailyMail embeds +        dailymail_urls = DailyMailIE._extract_urls(webpage) +        if dailymail_urls: +            return self.playlist_from_matches( +                dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) +          # Look for embedded Wistia player          wistia_url = WistiaIE._extract_url(webpage)          if wistia_url: | 
