diff options
author | ngld <ngld@tproxy.de> | 2015-08-12 16:59:04 +0200 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-10-02 21:30:07 +0600 |
commit | 3bb3f0410822d3d21c6199bb8915b598990628e6 (patch) | |
tree | 77291bc3f77fb61a7ea614602b7a19a41d3521cb /youtube_dl/extractor | |
parent | 59a9efe85b15e53c5928b7fdb810c150f5bf4b78 (diff) |
[europa] Add new extractor
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/europa.py | 60 |
2 files changed, 61 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a73a1317e..495a18c17 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE from .escapist import EscapistIE from .espn import ESPNIE from .esri import EsriVideoIE +from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE diff --git a/youtube_dl/extractor/europa.py b/youtube_dl/extractor/europa.py new file mode 100644 index 000000000..c437c4886 --- /dev/null +++ b/youtube_dl/extractor/europa.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + xpath_text +) + + +class EuropaIE(InfoExtractor): + _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P<id>[A-Za-z0-9]+)' + _TEST = { + 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', + 'md5': '728cca2fd41d5aa7350cec1141fbe620', + 'info_dict': { + 'id': 'I107758', + 'ext': 'mp4', + 'title': 'TRADE - Wikileaks on TTIP', + 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015', + 'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + lang = query.get('sitelang', ['en'])[0] + + playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id) + videos = {} + formats = [] + + for item in playlist.findall('info/title/item'): + videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()} + + for item in playlist.findall('info/description/item'): + videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip() + + for item in playlist.findall('files/file'): + lg = xpath_text(item, 'lg') + vid = videos[lg] + vid['format_note'] = xpath_text(item, 'lglabel') + vid['url'] = xpath_text(item, 'url') + + if lg == lang: + vid['language_preference'] = 10 + + formats.append(vid) + + formats.reverse() + def_video = videos.get(lang, videos['int']) + + return { + 'id': video_id, + 'title': def_video['title'], + 'description': def_video['description'], + 'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'), + 'formats': formats + } |