aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2014-05-03 02:28:38 +0700
committerSergey M․ <dstftw@gmail.com>2014-05-03 02:28:38 +0700
commitd664de44b70e45a6a1764c372c49839ea4769ba2 (patch)
tree7a6e962df60db8610440def9c31345b0a5806942
parentbbe99d26ecf148390a4c5740ccd9ab86bd108cd5 (diff)
downloadyoutube-dl-d664de44b70e45a6a1764c372c49839ea4769ba2.tar.xz
[nytimes] Add support for nytimes.com (Closes #2846)
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/nytimes.py69
2 files changed, 70 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 09209a739..cf01d813f 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -192,6 +192,7 @@ from .nowness import NownessIE
from .nowvideo import NowVideoIE
from .nrk import NRKIE
from .ntv import NTVIE
+from .nytimes import NYTimesIE
from .oe1 import OE1IE
from .ooyala import OoyalaIE
from .orf import ORFIE
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
new file mode 100644
index 000000000..07d4deee2
--- /dev/null
+++ b/youtube_dl/extractor/nytimes.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class NYTimesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
+ 'md5': '18a525a510f942ada2720db5f31644c0',
+ 'info_dict': {
+ 'id': '100000002847155',
+ 'ext': 'mov',
+ 'title': 'Verbatim: What Is a Photocopier?',
+ 'description': 'md5:93603dada88ddbda9395632fdc5da260',
+ 'timestamp': 1398631707,
+ 'upload_date': '20140427',
+ 'uploader': 'Brett Weiner',
+ 'duration': 419,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ video_data = self._download_json(
+ 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
+
+ title = video_data['headline']
+ description = video_data['summary']
+ duration = video_data['duration'] / 1000.0
+
+ uploader = video_data['byline']
+ timestamp = parse_iso8601(video_data['publication_date'][:-8])
+
+ formats = [
+ {
+ 'url': video['url'],
+ 'format_id': video['type'],
+ 'vcodec': video['video_codec'],
+ 'width': video['width'],
+ 'height': video['height'],
+ 'filesize': video['fileSize'],
+ } for video in video_data['renditions']
+ ]
+ self._sort_formats(formats)
+
+ thumbnails = [
+ {
+ 'url': 'http://www.nytimes.com/%s' % image['url'],
+ 'resolution': '%dx%d' % (image['width'], image['height']),
+ } for image in video_data['images']
+ ]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ } \ No newline at end of file