diff options
author | Sergey M․ <dstftw@gmail.com> | 2018-07-30 23:28:44 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2018-07-30 23:28:44 +0700 |
commit | 19b9de13c4e83416f09708dddecba6edc69b4525 (patch) | |
tree | e7c015bca569c53ca1929ff668cdc3a5d0d52b69 | |
parent | 6f2d82a5a0fd020ac36bdfe4faa0ee8a9f4945ac (diff) |
[watchbox] Fix extraction (closes #17107)
-rw-r--r-- | youtube_dl/extractor/watchbox.py | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dl/extractor/watchbox.py index d99313080..5a4e46e73 100644 --- a/youtube_dl/extractor/watchbox.py +++ b/youtube_dl/extractor/watchbox.py @@ -10,6 +10,7 @@ from ..utils import ( js_to_json, strip_or_none, try_get, + unescapeHTML, unified_timestamp, ) @@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - source = (self._parse_json( + player_config = self._parse_json( self._search_regex( - r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', - default='{}'), - video_id, transform_source=js_to_json, - fatal=False) or {}).get('source') or {} + r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage, + 'player config', default='{}', group='data'), + video_id, transform_source=unescapeHTML, fatal=False) + + if not player_config: + player_config = self._parse_json( + self._search_regex( + r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) or {} + + source = player_config.get('source') or {} video_id = compat_str(source.get('videoId') or video_id) |