aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/npr.py
diff options
context:
space:
mode:
authorkaspi <je326@hotmail.com>2015-10-17 23:27:03 -0400
committerSergey M․ <dstftw@gmail.com>2016-01-07 01:55:55 +0600
commit76048b23e8a4aac93a33a96356fe64a9bcf78421 (patch)
tree468399c2da285cd3c343a1bea04df995322e67bb /youtube_dl/extractor/npr.py
parentf20756fb10ec560177282f032684327f600acc34 (diff)
[npr] Add extractor
removed md5 from _TEST moved from xml data to json test changed _TEST url to one that will not expire, so tests would not be failing
Diffstat (limited to 'youtube_dl/extractor/npr.py')
-rw-r--r--youtube_dl/extractor/npr.py71
1 files changed, 71 insertions, 0 deletions
diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py
new file mode 100644
index 000000000..a823bc096
--- /dev/null
+++ b/youtube_dl/extractor/npr.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os.path
+import re
+
+from ..compat import compat_urllib_parse_unquote
+from ..utils import url_basename
+from .common import InfoExtractor
+
+class NprIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer.html?.*id=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
+ 'info_dict': {
+ 'id': '449974205',
+ 'ext': 'mp4',
+ 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
+ }
+}
+
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage_url = 'http://www.npr.org/player/v2/mediaPlayer.html?id=' + video_id
+ webpage = self._download_webpage(webpage_url, video_id)
+ key = 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010'
+ xml_url = 'http://api.npr.org/query?id=%s&apiKey=%s' % (video_id, key)
+ json_url = 'http://api.npr.org/query?id=%s&apiKey=%s&format=json' % (video_id, key)
+
+ formats = []
+ entries = []
+
+ config = self._download_json(json_url, video_id)
+
+ content = config["list"]["story"]
+
+ album_title = config["list"]["story"][0]['song'][0]['album']['albumTitle']
+ print album_title['$text']
+
+ for key in content:
+ if "audio" in key:
+ for x in key['audio']:
+ if x['type'] == 'standard':
+ playlist = True
+ song_duration = x["duration"]['$text']
+ song_title = x["title"]["$text"]
+ song_id = x["id"]
+
+ for k in x["format"]:
+ if type(x["format"][k]) is list:
+ for z in x["format"][k]:
+ formats.append({ 'format': z['type'],
+ 'url' : z['$text']
+ })
+ else:
+ formats.append({ 'format': k,
+ 'url' : x["format"][k]['$text']
+ })
+
+ entries.append({ "title":song_title,
+ "id":song_id,
+ "duration": song_duration ,
+ "formats":formats})
+ formats = []
+
+ return { '_type': 'playlist',
+ 'id' : video_id,
+ 'title' : album_title,
+ 'entries': entries }