aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-06-02 00:35:07 +0700
committerSergey M․ <dstftw@gmail.com>2018-06-02 00:35:07 +0700
commitf20f636596aa4ec949360e7b05f6b9499e28c2a2 (patch)
tree5226e7c81b2e4b795a0158d422bd97c5daadbf01
parentb995043ab8b987cb5d4d83a3b56bb28d009ac0cb (diff)
[cbc] Improve extraction (closes #16583, closes #16593)
-rw-r--r--youtube_dl/extractor/cbc.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
index ce8e3d346..43f95c739 100644
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@@ -17,6 +17,7 @@ from ..utils import (
xpath_element,
xpath_with_ns,
find_xpath_attr,
+ orderedSet,
parse_duration,
parse_iso8601,
parse_age_limit,
@@ -136,9 +137,15 @@ class CBCIE(InfoExtractor):
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
- for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+ for media_id in orderedSet(media_ids)])
return self.playlist_result(
entries, display_id, strip_or_none(title),
self._og_search_description(webpage))