aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/pbs.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/pbs.py')
-rw-r--r--youtube_dl/extractor/pbs.py95
1 files changed, 80 insertions, 15 deletions
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 683c81de3..b787e2a73 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -8,6 +8,7 @@ from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
+ strip_jsonp,
unified_strdate,
US_RATINGS,
)
@@ -21,7 +22,7 @@ class PBSIE(InfoExtractor):
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
- video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
+ (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'''
@@ -108,12 +109,12 @@ class PBSIE(InfoExtractor):
{
'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
'info_dict': {
- 'id': '2280706814',
+ 'id': '2276541483',
'display_id': 'player',
'ext': 'mp4',
- 'title': 'American Experience - Death and the Civil War',
+ 'title': 'American Experience - Death and the Civil War, Chapter 1',
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
- 'duration': 6705,
+ 'duration': 682,
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
@@ -134,8 +135,53 @@ class PBSIE(InfoExtractor):
'params': {
'skip_download': True, # requires ffmpeg
},
+ 'skip': 'Expired',
+ },
+ {
+ # Video embedded in iframe containing angle brackets as attribute's value (e.g.
+ # "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
+ # https://github.com/rg3/youtube-dl/issues/7059)
+ 'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
+ 'info_dict': {
+ 'id': '2365546844',
+ 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
+ 'ext': 'mp4',
+ 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
+ 'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
+ 'duration': 1480,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ },
+ {
+ # Frontline video embedded via flp2012.js
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists',
+ 'info_dict': {
+ 'id': '2070868960',
+ 'display_id': 'the-atomic-artists',
+ 'ext': 'mp4',
+ 'title': 'FRONTLINE - The Atomic Artists',
+ 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+ 'duration': 723,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ },
+ {
+ 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
+ 'only_matching': True,
}
]
+ _ERRORS = {
+ 101: 'We\'re sorry, but this video is not yet available.',
+ 403: 'We\'re sorry, but this video is not available in your region due to right restrictions.',
+ 404: 'We are experiencing technical difficulties that are preventing us from playing the video at this time. Please check back again soon.',
+ 410: 'This video has expired and is no longer available for online streaming.',
+ }
def _extract_webpage(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -166,9 +212,30 @@ class PBSIE(InfoExtractor):
if media_id:
return media_id, presumptive_id, upload_date
- url = self._search_regex(
- r'<iframe\s+[^>]*\s+src=["\']([^\'"]+partnerplayer[^\'"]+)["\']',
- webpage, 'player URL')
+ # Fronline video embedded via flp
+ video_id = self._search_regex(
+ r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
+ if video_id:
+ # pkg_id calculation is reverse engineered from
+ # http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js
+ prg_id = self._search_regex(
+ r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:]
+ if 'q' in prg_id:
+ prg_id = prg_id.split('q')[1]
+ prg_id = int(prg_id, 16)
+ getdir = self._download_json(
+ 'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id,
+ presumptive_id, 'Downloading getdir JSON',
+ transform_source=strip_jsonp)
+ return getdir['mid'], presumptive_id, upload_date
+
+ for iframe in re.findall(r'(?s)<iframe(.+?)></iframe>', webpage):
+ url = self._search_regex(
+ r'src=(["\'])(?P<url>.+?partnerplayer.+?)\1', iframe,
+ 'player URL', default=None, group='url')
+ if url:
+ break
+
mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id')
@@ -196,7 +263,7 @@ class PBSIE(InfoExtractor):
return self.playlist_result(entries, display_id)
info = self._download_json(
- 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
+ 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
display_id)
formats = []
@@ -213,13 +280,11 @@ class PBSIE(InfoExtractor):
'Downloading %s video url info' % encoding_name)
if redirect_info['status'] == 'error':
- if redirect_info['http_code'] == 403:
- message = (
- 'The video is not available in your region due to '
- 'right restrictions')
- else:
- message = redirect_info['message']
- raise ExtractorError(message, expected=True)
+ raise ExtractorError(
+ '%s said: %s' % (
+ self.IE_NAME,
+ self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])),
+ expected=True)
format_url = redirect_info.get('url')
if not format_url: