diff options
author | Sergey M․ <dstftw@gmail.com> | 2014-05-03 02:28:38 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2014-05-03 02:28:38 +0700 |
commit | d664de44b70e45a6a1764c372c49839ea4769ba2 (patch) | |
tree | 7a6e962df60db8610440def9c31345b0a5806942 /youtube_dl/extractor/nytimes.py | |
parent | bbe99d26ecf148390a4c5740ccd9ab86bd108cd5 (diff) |
[nytimes] Add support for nytimes.com (Closes #2846)
Diffstat (limited to 'youtube_dl/extractor/nytimes.py')
-rw-r--r-- | youtube_dl/extractor/nytimes.py | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py new file mode 100644 index 000000000..07d4deee2 --- /dev/null +++ b/youtube_dl/extractor/nytimes.py @@ -0,0 +1,69 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_iso8601 + + +class NYTimesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', + 'md5': '18a525a510f942ada2720db5f31644c0', + 'info_dict': { + 'id': '100000002847155', + 'ext': 'mov', + 'title': 'Verbatim: What Is a Photocopier?', + 'description': 'md5:93603dada88ddbda9395632fdc5da260', + 'timestamp': 1398631707, + 'upload_date': '20140427', + 'uploader': 'Brett Weiner', + 'duration': 419, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + video_data = self._download_json( + 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON') + + title = video_data['headline'] + description = video_data['summary'] + duration = video_data['duration'] / 1000.0 + + uploader = video_data['byline'] + timestamp = parse_iso8601(video_data['publication_date'][:-8]) + + formats = [ + { + 'url': video['url'], + 'format_id': video['type'], + 'vcodec': video['video_codec'], + 'width': video['width'], + 'height': video['height'], + 'filesize': video['fileSize'], + } for video in video_data['renditions'] + ] + self._sort_formats(formats) + + thumbnails = [ + { + 'url': 'http://www.nytimes.com/%s' % image['url'], + 'resolution': '%dx%d' % (image['width'], image['height']), + } for image in video_data['images'] + ] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + 'formats': formats, + 'thumbnails': thumbnails, + }
\ No newline at end of file |