diff options
Diffstat (limited to 'youtube_dl/extractor/pbs.py')
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 32 | 
1 files changed, 29 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 0727e381b..8889e4a1a 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -10,6 +10,7 @@ from ..utils import (      int_or_none,      float_or_none,      js_to_json, +    orderedSet,      strip_jsonp,      strip_or_none,      unified_strdate, @@ -188,7 +189,7 @@ class PBSIE(InfoExtractor):             # Direct video URL             (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |             # Article with embedded player (or direct video) -           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | +           (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |             # Player             (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/          ) @@ -265,6 +266,13 @@ class PBSIE(InfoExtractor):              'playlist_count': 2,          },          { +            'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/', +            'info_dict': { +                'id': 'great-war', +            }, +            'playlist_count': 3, +        }, +        {              'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',              'info_dict': {                  'id': '2276541483', @@ -338,6 +346,21 @@ class PBSIE(InfoExtractor):              },          },          { +            # https://github.com/rg3/youtube-dl/issues/13801 +            'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/', +            'info_dict': { +                'id': '3003333873', +                'ext': 'mp4', +                'title': 'PBS NewsHour - full episode July 31, 2017', +                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', +                'duration': 3265, +                'thumbnail': r're:^https?://.*\.jpg$', +            }, +            'params': { +                'skip_download': True, +            }, +        }, +        {              'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',              'only_matching': True,          }, @@ -382,10 +405,10 @@ class PBSIE(InfoExtractor):              # tabbed frontline videos              MULTI_PART_REGEXES = (                  r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', -                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)', +                r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',              )              for p in MULTI_PART_REGEXES: -                tabbed_videos = re.findall(p, webpage) +                tabbed_videos = orderedSet(re.findall(p, webpage))                  if tabbed_videos:                      return tabbed_videos, presumptive_id, upload_date, description @@ -425,6 +448,9 @@ class PBSIE(InfoExtractor):                  if url:                      break +            if not url: +                url = self._og_search_url(webpage) +              mobj = re.match(self._VALID_URL, url)          player_id = mobj.group('player_id') | 
