diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 73 | 
1 files changed, 54 insertions, 19 deletions
| diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 25f019231..7444b7b5b 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  import json @@ -5,30 +7,63 @@ from .common import InfoExtractor  class PBSIE(InfoExtractor): -    _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?' +    _VALID_URL = r'''(?x)https?:// +        (?: +            # Direct video URL +            video\.pbs\.org/video/(?P<id>[0-9]+)/? | +            # Article with embedded player +           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | +           # Player +           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/ +        ) +    '''      _TEST = { -        u'url': u'http://video.pbs.org/video/2365006249/', -        u'file': u'2365006249.mp4', -        u'md5': 'ce1888486f0908d555a8093cac9a7362', -        u'info_dict': { -            u'title': u'A More Perfect Union', -            u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', -            u'duration': 3190, +        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', +        'md5': 'ce1888486f0908d555a8093cac9a7362', +        'info_dict': { +            'id': '2365006249', +            'ext': 'mp4', +            'title': 'A More Perfect Union', +            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', +            'duration': 3190,          },      }      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') + +        presumptive_id = mobj.group('presumptive_id') +        display_id = presumptive_id +        if presumptive_id: +            webpage = self._download_webpage(url, display_id) +            url = self._search_regex( +                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>', +                webpage, 'player URL') +            mobj = re.match(self._VALID_URL, url) + +        player_id = mobj.group('player_id') +        if not display_id: +            display_id = player_id +        if player_id: +            player_page = self._download_webpage( +                url, display_id, note='Downloading player page', +                errnote='Could not download player page') +            video_id = self._search_regex( +                r'<div\s+id="video_([0-9]+)"', player_page, 'video ID') +        else: +            video_id = mobj.group('id') +            display_id = video_id +          info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id -        info_page = self._download_webpage(info_url, video_id) -        info =json.loads(info_page) -        return {'id': video_id, -                'title': info['title'], -                'url': info['alternate_encoding']['url'], -                'ext': 'mp4', -                'description': info['program'].get('description'), -                'thumbnail': info.get('image_url'), -                'duration': info.get('duration'), -                } +        info = self._download_json(info_url, display_id) + +        return { +            'id': video_id, +            'title': info['title'], +            'url': info['alternate_encoding']['url'], +            'ext': 'mp4', +            'description': info['program'].get('description'), +            'thumbnail': info.get('image_url'), +            'duration': info.get('duration'), +        } | 
