diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-02-17 21:49:15 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-02-17 21:49:15 +0600 |
commit | 9654fc875bdcd055031670b640531375bc513012 (patch) | |
tree | 70cf0bacfc2ad65efe81a77d4008bc2dae112426 | |
parent | 0f425e65ec6d9ac47fa7f348fae7c06be2557f8e (diff) |
[arte:+7] Fix extraction for react-based layout
-rw-r--r-- | youtube_dl/extractor/arte.py | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 67194bd31..497899f8d 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -102,10 +102,20 @@ class ArteTVPlus7IE(InfoExtractor): iframe_url = find_iframe_url(webpage, None) if not iframe_url: embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url') - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) + r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) + if embed_url: + player = self._download_json( + embed_url, video_id, 'Downloading player page') + iframe_url = find_iframe_url(player['html']) + # en and es URLs produce react-based pages with different layout (e.g. + # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) + if not iframe_url: + embed_html = self._parse_json( + self._search_regex( + r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', + webpage, 'program'), + video_id)['embed_html'] + iframe_url= find_iframe_url(embed_html) json_url = compat_parse_qs( compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] return self._extract_from_json_url(json_url, video_id, lang) |