diff options
author | Sergey M․ <dstftw@gmail.com> | 2018-06-02 00:35:07 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2018-06-02 00:35:07 +0700 |
commit | f20f636596aa4ec949360e7b05f6b9499e28c2a2 (patch) | |
tree | 5226e7c81b2e4b795a0158d422bd97c5daadbf01 | |
parent | b995043ab8b987cb5d4d83a3b56bb28d009ac0cb (diff) |
[cbc] Improve extraction (closes #16583, closes #16593)
-rw-r--r-- | youtube_dl/extractor/cbc.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index ce8e3d346..43f95c739 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -17,6 +17,7 @@ from ..utils import ( xpath_element, xpath_with_ns, find_xpath_attr, + orderedSet, parse_duration, parse_iso8601, parse_age_limit, @@ -136,9 +137,15 @@ class CBCIE(InfoExtractor): entries = [ self._extract_player_init(player_init, display_id) for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + media_ids = [] + for media_id_re in ( + r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', + r'<div[^>]+\bid=["\']player-(\d+)', + r'guid["\']\s*:\s*["\'](\d+)'): + media_ids.extend(re.findall(media_id_re, webpage)) entries.extend([ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) - for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]) + for media_id in orderedSet(media_ids)]) return self.playlist_result( entries, display_id, strip_or_none(title), self._og_search_description(webpage)) |