diff options
| -rw-r--r-- | youtube_dl/extractor/foxnews.py | 42 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 6 | 
2 files changed, 41 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index dc0662f74..4c402806a 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):          },      ] +    @staticmethod +    def _extract_urls(webpage): +        return [ +            mobj.group('url') +            for mobj in re.finditer( +                r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1', +                webpage)] +      def _real_extract(self, url):          host, video_id = re.match(self._VALID_URL, url).groups() @@ -71,18 +79,35 @@ class FoxNewsArticleIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'      IE_NAME = 'foxnews:article' -    _TEST = { +    _TESTS = [{ +        # data-video-id          'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', -        'md5': '62aa5a781b308fdee212ebb6f33ae7ef', +        'md5': '83d44e1aff1433e7a29a7b537d1700b5',          'info_dict': {              'id': '5116295019001',              'ext': 'mp4',              'title': 'Trump and Clinton asked to defend positions on Iraq War',              'description': 'Veterans react on \'The Kelly File\'', -            'timestamp': 1473299755, +            'timestamp': 1473301045,              'upload_date': '20160908',          }, -    } +    }, { +        # iframe embed +        'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', +        'info_dict': { +            'id': '5748266721001', +            'ext': 'flv', +            'title': 'Kyle Kashuv has a positive message for the Trump White House', +            'description': 'Marjory Stoneman Douglas student disagrees with classmates.', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 229, +            'timestamp': 1520594670, +            'upload_date': '20180309', +        }, +        'params': { +            'skip_download': True, +        }, +    }]      def _real_extract(self, url):          display_id = self._match_id(url) @@ -90,10 +115,13 @@ class FoxNewsArticleIE(InfoExtractor):          video_id = self._html_search_regex(              r'data-video-id=([\'"])(?P<id>[^\'"]+)\1', -            webpage, 'video ID', group='id') +            webpage, 'video ID', group='id', default=None) +        if video_id: +            return self.url_result( +                'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key()) +          return self.url_result( -            'http://video.foxnews.com/v/' + video_id, -            FoxNewsIE.ie_key()) +            FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())  class FoxNewsInsiderIE(InfoExtractor): diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6c0f772ac..d71cb9050 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -111,6 +111,7 @@ from .cloudflarestream import CloudflareStreamIE  from .peertube import PeerTubeIE  from .indavideo import IndavideoEmbedIE  from .apa import APAIE +from .foxnews import FoxNewsIE  class GenericIE(InfoExtractor): @@ -3091,6 +3092,11 @@ class GenericIE(InfoExtractor):              return self.playlist_from_matches(                  apa_urls, video_id, video_title, ie=APAIE.ie_key()) +        foxnews_urls = FoxNewsIE._extract_urls(webpage) +        if foxnews_urls: +            return self.playlist_from_matches( +                foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key()) +          sharevideos_urls = [mobj.group('url') for mobj in re.finditer(              r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',              webpage)] | 
