aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-03-24 22:01:47 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-03-24 22:01:47 +0100
commit893f8832b52926847353f2b678e313687806a775 (patch)
tree9688898ecae9c7e4de0ccfefb6b52a106396bb6c
parent878d11ec29d63b2c8ca32163f38762d1b12a52c6 (diff)
[arte] Add support for embedded videos (Fixes #2620)
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/arte.py25
-rw-r--r--youtube_dl/extractor/generic.py22
3 files changed, 45 insertions, 3 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index a7a339e7d..39b250b10 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -14,6 +14,7 @@ from .arte import (
ArteTVConcertIE,
ArteTVFutureIE,
ArteTVDDCIE,
+ ArteTVEmbedIE,
)
from .auengine import AUEngineIE
from .bambuser import BambuserIE, BambuserChannelIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 49fb64077..257dc1f61 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -75,9 +75,7 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
- json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
- self.report_extraction(video_id)
- info = json.loads(json_info)
+ info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
info_dict = {
@@ -99,6 +97,8 @@ class ArteTVPlus7IE(InfoExtractor):
l = 'F'
elif lang == 'de':
l = 'A'
+ else:
+ l = lang
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
@@ -228,3 +228,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
'description': 'md5:486eb08f991552ade77439fe6d82c305',
},
}
+
+
+class ArteTVEmbedIE(ArteTVPlus7IE):
+ IE_NAME = 'arte.tv:embed'
+ _VALID_URL = r'''(?x)
+ http://www\.arte\.tv
+ /playerv2/embed\.php\?json_url=
+ (?P<json_url>
+ http://arte\.tv/papi/tvguide/videos/stream/player/
+ (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
+ )
+ '''
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ lang = mobj.group('lang')
+ json_url = mobj.group('json_url')
+ return self._extract_from_json_url(json_url, video_id, lang)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 4d649fe71..e7ee31877 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -197,6 +197,21 @@ class GenericIE(InfoExtractor):
'description': 'No description',
},
},
+ # arte embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+ 'md5': '7653032cbb25bf6c80d80f217055fa43',
+ 'info_dict': {
+ 'id': '048195-004_PLUS7-F',
+ 'ext': 'flv',
+ 'title': 'X:enius',
+ 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+ 'upload_date': '20140320',
+ },
+ 'params': {
+ 'skip_download': 'Requires rtmpdump'
+ }
+ },
]
def report_download_webpage(self, video_id):
@@ -525,6 +540,13 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
+ # Look for embedded arte.tv player
+ mobj = re.search(
+ r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: