diff options
| -rw-r--r-- | youtube_dl/extractor/npr.py | 129 | 
1 files changed, 70 insertions, 59 deletions
diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index a823bc096..125c7010b 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -1,71 +1,82 @@ -# coding: utf-8  from __future__ import unicode_literals -import os.path -import re - -from ..compat import compat_urllib_parse_unquote -from ..utils import url_basename  from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ( +    int_or_none, +    qualities, +) -class NprIE(InfoExtractor): -    _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer.html?.*id=(?P<id>[0-9]+)' -    _TEST = { -    'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', -    'info_dict': { -        'id': '449974205', -        'ext': 'mp4', -        'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' -    } -} +class NprIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', +        'info_dict': { +            'id': '449974205', +            'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' +        }, +        'playlist_count': 7, +    }, { +        'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1', +        'info_dict': { +            'id': '446928052', +            'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" +        }, +        'playlist': [{ +            'md5': '12fa60cb2d3ed932f53609d4aeceabf1', +            'info_dict': { +                'id': '446929930', +                'ext': 'mp3', +                'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)', +                'duration': 402, +            }, +        }], +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        webpage_url = 'http://www.npr.org/player/v2/mediaPlayer.html?id=' + video_id -        webpage = self._download_webpage(webpage_url, video_id) -        key = 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010' -        xml_url = 'http://api.npr.org/query?id=%s&apiKey=%s' % (video_id, key) -        json_url = 'http://api.npr.org/query?id=%s&apiKey=%s&format=json' % (video_id, key) - -        formats = [] -        entries = [] - -        config = self._download_json(json_url, video_id) +        playlist_id = self._match_id(url) -        content = config["list"]["story"] +        config = self._download_json( +            'http://api.npr.org/query?%s' % compat_urllib_parse.urlencode({ +                'id': playlist_id, +                'fields': 'titles,audio,show', +                'format': 'json', +                'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', +            }), playlist_id) -        album_title = config["list"]["story"][0]['song'][0]['album']['albumTitle'] -        print album_title['$text'] +        story = config['list']['story'][0] -        for key in content: -            if "audio" in key: -                for x in key['audio']: -                    if x['type'] == 'standard': -                        playlist = True -                        song_duration = x["duration"]['$text'] -                        song_title = x["title"]["$text"] -                        song_id = x["id"] +        KNOWN_FORMATS = ('threegp', 'mp4', 'mp3') +        quality = qualities(KNOWN_FORMATS) -                        for k in x["format"]: -                            if type(x["format"][k]) is list: -                                for z in x["format"][k]: -                                    formats.append({ 'format': z['type'],  -                                                     'url'   : z['$text'] -                                              }) -                            else: -                                formats.append({ 'format': k,  -                                                 'url'   : x["format"][k]['$text'] -                                      }) - -                        entries.append({ "title":song_title, -                                         "id":song_id, -                                         "duration": song_duration , -                                         "formats":formats}) -                        formats = [] +        entries = [] +        for audio in story.get('audio', []): +            title = audio.get('title', {}).get('$text') +            duration = int_or_none(audio.get('duration', {}).get('$text')) +            formats = [] +            for format_id, formats_entry in audio.get('format', {}).items(): +                if not formats_entry: +                    continue +                if isinstance(formats_entry, list): +                    formats_entry = formats_entry[0] +                format_url = formats_entry.get('$text') +                if not format_url: +                    continue +                if format_id in KNOWN_FORMATS: +                    formats.append({ +                        'url': format_url, +                        'format_id': format_id, +                        'ext': formats_entry.get('type'), +                        'quality': quality(format_id), +                    }) +            self._sort_formats(formats) +            entries.append({ +                'id': audio['id'], +                'title': title, +                'duration': duration, +                'formats': formats, +            }) -        return {    '_type': 'playlist', -                    'id' : video_id, -                    'title' : album_title, -                    'entries': entries  } +        playlist_title = story.get('title', {}).get('$text') +        return self.playlist_result(entries, playlist_id, playlist_title)  | 
