diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-03 18:16:19 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-03 18:16:19 +0800 | 
| commit | cdc783510bb575b2318b1d7d42fb98f0c0f0df18 (patch) | |
| tree | 6e76324c500807964f4963eee3341e49528c484e | |
| parent | cf0efe96366259a5f0f07ae79280bfa17dc6f6e7 (diff) | |
[foxnews:insider] Add new extractor
Closes #10445
| -rw-r--r-- | ChangeLog | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/foxnews.py | 48 | 
3 files changed, 52 insertions, 2 deletions
| @@ -1,6 +1,7 @@  version <unreleased>  Extractors ++ [foxnews] Add support for FoxNews Insider (#10445)  + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d851e5f36..8c6ee0503 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,7 +287,10 @@ from .formula1 import Formula1IE  from .fourtube import FourTubeIE  from .fox import FOXIE  from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE +from .foxnews import ( +    FoxNewsIE, +    FoxNewsInsiderIE, +)  from .foxsports import FoxSportsIE  from .franceculture import FranceCultureIE  from .franceinter import FranceInterIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index b04da2415..5c7acd795 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals  import re  from .amp import AMPIE +from .common import InfoExtractor  class FoxNewsIE(AMPIE):      IE_DESC = 'Fox News and Fox Business Video' -    _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' +    _VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'      _TESTS = [          {              'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -49,6 +50,11 @@ class FoxNewsIE(AMPIE):              'url': 'http://video.foxbusiness.com/v/4442309889001',              'only_matching': True,          }, +        { +            # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words +            'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', +            'only_matching': True, +        },      ]      def _real_extract(self, url): @@ -58,3 +64,43 @@ class FoxNewsIE(AMPIE):              'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))          info['id'] = video_id          return info + + +class FoxNewsInsiderIE(InfoExtractor): +    _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)' +    IE_NAME = 'foxnews:insider' + +    _TEST = { +        'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', +        'md5': 'a10c755e582d28120c62749b4feb4c0c', +        'info_dict': { +            'id': '5099377331001', +            'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', +            'ext': 'mp4', +            'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', +            'description': 'Is campus censorship getting out of control?', +            'timestamp': 1472168725, +            'upload_date': '20160825', +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +        'add_ie': [FoxNewsIE.ie_key()], +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') + +        title = self._og_search_title(webpage) +        description = self._og_search_description(webpage) + +        return { +            '_type': 'url_transparent', +            'ie_key': FoxNewsIE.ie_key(), +            'url': embed_url, +            'display_id': display_id, +            'title': title, +            'description': description, +        } | 
