diff options
author | remitamine <remitamine@gmail.com> | 2016-05-05 21:42:37 +0100 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2017-05-02 20:41:48 +0100 |
commit | 7a6d33a9a5390d0dab7e9162d6b2552cb0fe23a5 (patch) | |
tree | d361383c1092e7eea9af71eb29657e0014745394 | |
parent | fa2a36d9bcaddf793e8bac281f5fa7859af16a46 (diff) |
[pbs] extract chapters information
-rw-r--r-- | youtube_dl/extractor/pbs.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 6166dc2ad..0727e381b 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, determine_ext, int_or_none, + float_or_none, js_to_json, strip_jsonp, strip_or_none, @@ -464,6 +465,7 @@ class PBSIE(InfoExtractor): redirects.append(redirect) redirect_urls.add(redirect_url) + chapters = [] # Player pages may also serve different qualities for page in ('widget/partnerplayer', 'portalplayer'): player = self._download_webpage( @@ -479,6 +481,20 @@ class PBSIE(InfoExtractor): extract_redirect_urls(video_info) if not info: info = video_info + if not chapters: + for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player): + chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False) + if not chapter: + continue + start_time = float_or_none(chapter.get('start_time'), 1000) + duration = float_or_none(chapter.get('duration'), 1000) + if start_time is None or duration is None: + continue + chapters.append({ + 'start_time': start_time, + 'end_time': start_time + duration, + 'title': chapter.get('title'), + }) formats = [] http_url = None @@ -588,4 +604,5 @@ class PBSIE(InfoExtractor): 'upload_date': upload_date, 'formats': formats, 'subtitles': subtitles, + 'chapters': chapters, } |