diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-03-24 22:01:47 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-03-24 22:01:47 +0100 | 
| commit | 893f8832b52926847353f2b678e313687806a775 (patch) | |
| tree | 9688898ecae9c7e4de0ccfefb6b52a106396bb6c | |
| parent | 878d11ec29d63b2c8ca32163f38762d1b12a52c6 (diff) | |
[arte] Add support for embedded videos (Fixes #2620)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/arte.py | 25 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 22 | 
3 files changed, 45 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a7a339e7d..39b250b10 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -14,6 +14,7 @@ from .arte import (      ArteTVConcertIE,      ArteTVFutureIE,      ArteTVDDCIE, +    ArteTVEmbedIE,  )  from .auengine import AUEngineIE  from .bambuser import BambuserIE, BambuserChannelIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 49fb64077..257dc1f61 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -75,9 +75,7 @@ class ArteTVPlus7IE(InfoExtractor):          return self._extract_from_json_url(json_url, video_id, lang)      def _extract_from_json_url(self, json_url, video_id, lang): -        json_info = self._download_webpage(json_url, video_id, 'Downloading info json') -        self.report_extraction(video_id) -        info = json.loads(json_info) +        info = self._download_json(json_url, video_id)          player_info = info['videoJsonPlayer']          info_dict = { @@ -99,6 +97,8 @@ class ArteTVPlus7IE(InfoExtractor):                  l = 'F'              elif lang == 'de':                  l = 'A' +            else: +                l = lang              regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]              return any(re.match(r, f['versionCode']) for r in regexes)          # Some formats may not be in the same language as the url @@ -228,3 +228,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):              'description': 'md5:486eb08f991552ade77439fe6d82c305',          },      } + + +class ArteTVEmbedIE(ArteTVPlus7IE): +    IE_NAME = 'arte.tv:embed' +    _VALID_URL = r'''(?x) +        http://www\.arte\.tv +        /playerv2/embed\.php\?json_url= +        (?P<json_url> +            http://arte\.tv/papi/tvguide/videos/stream/player/ +            (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* +        ) +    ''' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        lang = mobj.group('lang') +        json_url = mobj.group('json_url') +        return self._extract_from_json_url(json_url, video_id, lang) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4d649fe71..e7ee31877 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -197,6 +197,21 @@ class GenericIE(InfoExtractor):                  'description': 'No description',              },          }, +        # arte embed +        { +            'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html', +            'md5': '7653032cbb25bf6c80d80f217055fa43', +            'info_dict': { +                'id': '048195-004_PLUS7-F', +                'ext': 'flv', +                'title': 'X:enius', +                'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168', +                'upload_date': '20140320', +            }, +            'params': { +                'skip_download': 'Requires rtmpdump' +            } +        },      ]      def report_download_webpage(self, video_id): @@ -525,6 +540,13 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'TED') +        # Look for embedded arte.tv player +        mobj = re.search( +            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', +            webpage) +        if mobj is not None: +            return self.url_result(mobj.group('url'), 'ArteTVEmbed') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: | 
