diff options
Diffstat (limited to 'youtube_dl/extractor/pbs.py')
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 87 | 
1 files changed, 74 insertions, 13 deletions
| diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 683c81de3..8fb9b1849 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -8,6 +8,7 @@ from ..utils import (      ExtractorError,      determine_ext,      int_or_none, +    strip_jsonp,      unified_strdate,      US_RATINGS,  ) @@ -108,12 +109,12 @@ class PBSIE(InfoExtractor):          {              'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',              'info_dict': { -                'id': '2280706814', +                'id': '2276541483',                  'display_id': 'player',                  'ext': 'mp4', -                'title': 'American Experience - Death and the Civil War', +                'title': 'American Experience - Death and the Civil War, Chapter 1',                  'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.', -                'duration': 6705, +                'duration': 682,                  'thumbnail': 're:^https?://.*\.jpg$',              },              'params': { @@ -134,8 +135,49 @@ class PBSIE(InfoExtractor):              'params': {                  'skip_download': True,  # requires ffmpeg              }, +            'skip': 'Expired', +        }, +        { +            # Video embedded in iframe containing angle brackets as attribute's value (e.g. +            # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see +            # https://github.com/rg3/youtube-dl/issues/7059) +            'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/', +            'info_dict': { +                'id': '2365546844', +                'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', +                'ext': 'mp4', +                'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", +                'description': 'md5:61db2ddf27c9912f09c241014b118ed1', +                'duration': 1480, +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +            'params': { +                'skip_download': True,  # requires ffmpeg +            }, +        }, +        { +            # Frontline video embedded via flp2012.js +            'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists', +            'info_dict': { +                'id': '2070868960', +                'display_id': 'the-atomic-artists', +                'ext': 'mp4', +                'title': 'FRONTLINE - The Atomic Artists', +                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', +                'duration': 723, +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +            'params': { +                'skip_download': True,  # requires ffmpeg +            },          }      ] +    _ERRORS = { +        101: 'We\'re sorry, but this video is not yet available.', +        403: 'We\'re sorry, but this video is not available in your region due to right restrictions.', +        404: 'We are experiencing technical difficulties that are preventing us from playing the video at this time. Please check back again soon.', +        410: 'This video has expired and is no longer available for online streaming.', +    }      def _extract_webpage(self, url):          mobj = re.match(self._VALID_URL, url) @@ -166,9 +208,30 @@ class PBSIE(InfoExtractor):              if media_id:                  return media_id, presumptive_id, upload_date -            url = self._search_regex( -                r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']', -                webpage, 'player URL') +            # Fronline video embedded via flp +            video_id = self._search_regex( +                r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) +            if video_id: +                # pkg_id calculation is reverse engineered from +                # http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js +                prg_id = self._search_regex( +                    r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:] +                if 'q' in prg_id: +                    prg_id = prg_id.split('q')[1] +                prg_id = int(prg_id, 16) +                getdir = self._download_json( +                    'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id, +                    presumptive_id, 'Downloading getdir JSON', +                    transform_source=strip_jsonp) +                return getdir['mid'], presumptive_id, upload_date + +            for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage): +                url = self._search_regex( +                    r'src=(["\'])(?P<url>.+?partnerplayer.+?)\1', iframe, +                    'player URL', default=None, group='url') +                if url: +                    break +              mobj = re.match(self._VALID_URL, url)          player_id = mobj.group('player_id') @@ -213,13 +276,11 @@ class PBSIE(InfoExtractor):                  'Downloading %s video url info' % encoding_name)              if redirect_info['status'] == 'error': -                if redirect_info['http_code'] == 403: -                    message = ( -                        'The video is not available in your region due to ' -                        'right restrictions') -                else: -                    message = redirect_info['message'] -                raise ExtractorError(message, expected=True) +                raise ExtractorError( +                    '%s said: %s' % ( +                        self.IE_NAME, +                        self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])), +                    expected=True)              format_url = redirect_info.get('url')              if not format_url: | 
