From 943f7f7a399c6fb3006eb2bd68070f28a272171f Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Sun, 18 Aug 2013 16:11:47 +0200 Subject: Download videos from jeuxvideo.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/jeuxvideo.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/jeuxvideo.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84c02c2ed..b9bd3a429 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -36,6 +36,7 @@ from .ign import IGNIE, OneUPIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE +from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE from .kankan import KankanIE diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py new file mode 100644 index 000000000..d74a1c9b4 --- /dev/null +++ b/youtube_dl/extractor/jeuxvideo.py @@ -0,0 +1,33 @@ +import json +import re + +from .common import InfoExtractor + +class JeuxVideoIE(InfoExtractor): + _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = re.match(self._VALID_URL, url).group(1) + webpage = self._download_webpage(url, title) + m_download = re.search(r'', webpage) + + xml_link = m_download.group(1) + + id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1) + + xml_config = self._download_webpage(xml_link, title, + 'Downloading XML config') + info = re.search(r'(.*?)', + xml_config, re.MULTILINE|re.DOTALL).group(1) + info = json.loads(info)['versions'][0] + + video_url = 'http://video720.jeuxvideo.com/' + info['file'] + + track_info = {'id':id, + 'title' : title, + 'ext' : 'mp4', + 'url' : video_url + } + + return [track_info] -- cgit v1.2.3 From 7070b83687ed134af6d9a71bbf2ec759a56965d5 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Thu, 22 Aug 2013 12:54:17 +0200 Subject: Merge remote-tracking branch 'upstream/master' --- youtube_dl/extractor/jeuxvideo.py | 1 - 1 file changed, 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index c8a8ae1b3..4327bc13d 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -31,7 +31,6 @@ class JeuxVideoIE(InfoExtractor): xml_config = self._download_webpage(xml_link, title, 'Downloading XML config') - config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8')) info = re.search(r'(.*?)', xml_config, re.MULTILINE|re.DOTALL).group(1) -- cgit v1.2.3 From cd0abcc0bb4c218fd02850a139b626d252e22599 Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Thu, 22 Aug 2013 13:54:23 +0200 Subject: Extractor for canalc2.tv --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/canalc2.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/canalc2.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9d12608e1..576b8433a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -7,6 +7,7 @@ from .bliptv import BlipTVIE, BlipTVUserIE from .breakcom import BreakIE from .brightcove import BrightcoveIE from .canalplus import CanalplusIE +from .canalc2 import Canalc2IE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE from .condenast import CondeNastIE diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py new file mode 100644 index 000000000..d0e2ed536 --- /dev/null +++ b/youtube_dl/extractor/canalc2.py @@ -0,0 +1,37 @@ +# coding: utf-8 +"""Extractor for canalc2.tv""" +import re +import lxml.html + +from .common import InfoExtractor + +class Canalc2IE(InfoExtractor): + """Extractor for canalc2.tv""" + _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' + + _TEST = { + u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', + u'file': u'12163.mp4', + u'md5': u'c00fa80517373764ff5c0b5eb5a58780', + u'info_dict': { + u'title': u'Terrasses du Numérique' + } + } + + def _real_extract(self, url): + video_id = re.match(self._VALID_URL, url).group(1) + webpage = self._download_webpage(url, video_id) + file_name = re.search(r"so\.addVariable\('file','(.*?)'\);", + webpage).group(1) + + video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name + + html = lxml.html.fromstring(webpage) + + title = html.cssselect('.evenement8')[0].text_content() + + return {'id': video_id, + 'ext' : 'mp4', + 'url' : video_url, + 'title' : title + } -- cgit v1.2.3 From ff2424595adf02cbe5d1f1071e53c3b2e5f32c9e Mon Sep 17 00:00:00 2001 From: Pierre Rudloff Date: Thu, 22 Aug 2013 14:47:51 +0200 Subject: lxml is not part of the standard library. --- youtube_dl/extractor/canalc2.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index d0e2ed536..215abf537 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -1,7 +1,6 @@ # coding: utf-8 """Extractor for canalc2.tv""" import re -import lxml.html from .common import InfoExtractor @@ -25,10 +24,9 @@ class Canalc2IE(InfoExtractor): webpage).group(1) video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name - - html = lxml.html.fromstring(webpage) - - title = html.cssselect('.evenement8')[0].text_content() + + title = self._html_search_regex(r'class="evenement8">(.*?)', + webpage, u'title') return {'id': video_id, 'ext' : 'mp4', -- cgit v1.2.3