diff options
Diffstat (limited to 'youtube_dl/extractor/pbs.py')
| -rw-r--r-- | youtube_dl/extractor/pbs.py | 31 | 
1 files changed, 28 insertions, 3 deletions
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 3e51b4dd7..16cc667d0 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -8,7 +8,9 @@ from ..utils import (      ExtractorError,      determine_ext,      int_or_none, +    float_or_none,      js_to_json, +    orderedSet,      strip_jsonp,      strip_or_none,      unified_strdate, @@ -264,6 +266,13 @@ class PBSIE(InfoExtractor):              'playlist_count': 2,          },          { +            'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/', +            'info_dict': { +                'id': 'great-war', +            }, +            'playlist_count': 3, +        }, +        {              'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',              'info_dict': {                  'id': '2276541483', @@ -381,10 +390,10 @@ class PBSIE(InfoExtractor):              # tabbed frontline videos              MULTI_PART_REGEXES = (                  r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', -                r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)', +                r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',              )              for p in MULTI_PART_REGEXES: -                tabbed_videos = re.findall(p, webpage) +                tabbed_videos = orderedSet(re.findall(p, webpage))                  if tabbed_videos:                      return tabbed_videos, presumptive_id, upload_date, description @@ -464,6 +473,7 @@ class PBSIE(InfoExtractor):                      redirects.append(redirect)                      redirect_urls.add(redirect_url) +        chapters = []          # Player pages may also serve different qualities          for page in ('widget/partnerplayer', 'portalplayer'):              player = self._download_webpage( @@ -479,6 +489,20 @@ class PBSIE(InfoExtractor):                      extract_redirect_urls(video_info)                      if not info:                          info = video_info +                if not chapters: +                    for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player): +                        chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False) +                        if not chapter: +                            continue +                        start_time = float_or_none(chapter.get('start_time'), 1000) +                        duration = float_or_none(chapter.get('duration'), 1000) +                        if start_time is None or duration is None: +                            continue +                        chapters.append({ +                            'start_time': start_time, +                            'end_time': start_time + duration, +                            'title': chapter.get('title'), +                        })          formats = []          http_url = None @@ -515,7 +539,7 @@ class PBSIE(InfoExtractor):                      http_url = format_url          self._remove_duplicate_formats(formats)          m3u8_formats = list(filter( -            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', +            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',              formats))          if http_url:              for m3u8_format in m3u8_formats: @@ -588,4 +612,5 @@ class PBSIE(InfoExtractor):              'upload_date': upload_date,              'formats': formats,              'subtitles': subtitles, +            'chapters': chapters,          }  | 
