aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-02-17 21:49:15 +0600
committerSergey M․ <dstftw@gmail.com>2016-02-17 21:49:15 +0600
commit9654fc875bdcd055031670b640531375bc513012 (patch)
tree70cf0bacfc2ad65efe81a77d4008bc2dae112426
parent0f425e65ec6d9ac47fa7f348fae7c06be2557f8e (diff)
[arte:+7] Fix extraction for react-based layout
-rw-r--r--youtube_dl/extractor/arte.py18
1 files changed, 14 insertions, 4 deletions
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 67194bd31..497899f8d 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -102,10 +102,20 @@ class ArteTVPlus7IE(InfoExtractor):
iframe_url = find_iframe_url(webpage, None)
if not iframe_url:
embed_url = self._html_search_regex(
- r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
- player = self._download_json(
- embed_url, video_id, 'Downloading player page')
- iframe_url = find_iframe_url(player['html'])
+ r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
+ if embed_url:
+ player = self._download_json(
+ embed_url, video_id, 'Downloading player page')
+ iframe_url = find_iframe_url(player['html'])
+ # en and es URLs produce react-based pages with different layout (e.g.
+ # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
+ if not iframe_url:
+ embed_html = self._parse_json(
+ self._search_regex(
+ r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
+ webpage, 'program'),
+ video_id)['embed_html']
+ iframe_url= find_iframe_url(embed_html)
json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
return self._extract_from_json_url(json_url, video_id, lang)