diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/foxnews.py | 94 | ||||
| -rw-r--r-- | youtube_dl/extractor/vine.py | 27 | 
3 files changed, 112 insertions, 10 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a56ec4fb5..e0c4a3486 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -122,6 +122,7 @@ from .flickr import FlickrIE  from .folketinget import FolketingetIE  from .fourtube import FourTubeIE  from .foxgay import FoxgayIE +from .foxnews import FoxNewsIE  from .franceculture import FranceCultureIE  from .franceinter import FranceInterIE  from .francetv import ( diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py new file mode 100644 index 000000000..917f76b1e --- /dev/null +++ b/youtube_dl/extractor/foxnews.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    parse_iso8601, +    int_or_none, +) + + +class FoxNewsIE(InfoExtractor): +    _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' +    _TESTS = [ +        { +            'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', +            'md5': '32aaded6ba3ef0d1c04e238d01031e5e', +            'info_dict': { +                'id': '3937480', +                'ext': 'flv', +                'title': 'Frozen in Time', +                'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', +                'duration': 265, +                'timestamp': 1304411491, +                'upload_date': '20110503', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips', +            'md5': '5846c64a1ea05ec78175421b8323e2df', +            'info_dict': { +                'id': '3922535568001', +                'ext': 'mp4', +                'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", +                'description': "Congressman discusses the president's executive action", +                'duration': 292, +                'timestamp': 1417662047, +                'upload_date': '20141204', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', +            'only_matching': True, +        }, +    ] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        video = self._download_json( +            'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id) + +        item = video['channel']['item'] +        title = item['title'] +        description = item['description'] +        timestamp = parse_iso8601(item['dc-date']) + +        media_group = item['media-group'] +        duration = None +        formats = [] +        for media in media_group['media-content']: +            attributes = media['@attributes'] +            video_url = attributes['url'] +            if video_url.endswith('.f4m'): +                formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id)) +            elif video_url.endswith('.m3u8'): +                formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv')) +            elif not video_url.endswith('.smil'): +                duration = int_or_none(attributes.get('duration')) +                formats.append({ +                    'url': video_url, +                    'format_id': media['media-category']['@attributes']['label'], +                    'preference': 1, +                    'vbr': int_or_none(attributes.get('bitrate')), +                    'filesize': int_or_none(attributes.get('fileSize')) +                }) +        self._sort_formats(formats) + +        media_thumbnail = media_group['media-thumbnail']['@attributes'] +        thumbnails = [{ +            'url': media_thumbnail['url'], +            'width': int_or_none(media_thumbnail.get('width')), +            'height': int_or_none(media_thumbnail.get('height')), +        }] if media_thumbnail else [] + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'timestamp': timestamp, +            'formats': formats, +            'thumbnails': thumbnails, +        } diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 42995226e..4970b2f23 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -63,29 +63,36 @@ class VineIE(InfoExtractor):  class VineUserIE(InfoExtractor):      IE_NAME = 'vine:user' -    _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$' +    _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'      _VINE_BASE_URL = "https://vine.co/" -    _TEST = { -        'url': 'https://vine.co/Visa', -        'info_dict': { -            'id': 'Visa', +    _TESTS = [ +        { +            'url': 'https://vine.co/Visa', +            'info_dict': { +                'id': 'Visa', +            }, +            'playlist_mincount': 46,          }, -        'playlist_mincount': 46, -    } +        { +            'url': 'https://vine.co/u/941705360593584128', +            'only_matching': True, +        }, +    ]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          user = mobj.group('user') +        u = mobj.group('u') -        profile_url = "%sapi/users/profiles/vanity/%s" % ( -            self._VINE_BASE_URL, user) +        profile_url = "%sapi/users/profiles/%s%s" % ( +            self._VINE_BASE_URL, 'vanity/' if not u else '', user)          profile_data = self._download_json(              profile_url, user, note='Downloading user profile data')          user_id = profile_data['data']['userId']          timeline_data = []          for pagenum in itertools.count(1): -            timeline_url = "%sapi/timelines/users/%s?page=%s" % ( +            timeline_url = "%sapi/timelines/users/%s?page=%s&size=100" % (                  self._VINE_BASE_URL, user_id, pagenum)              timeline_page = self._download_json(                  timeline_url, user, note='Downloading page %d' % pagenum) | 
