From b0f001a6cbd220c8b10c0ce359f17072d6347a8f Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 21 Sep 2015 15:52:36 +0100 Subject: [canalc2] fix info extraction --- youtube_dl/extractor/canalc2.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'youtube_dl/extractor/canalc2.py') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index c4fefefe4..66a9ff093 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -8,34 +8,40 @@ from .common import InfoExtractor class Canalc2IE(InfoExtractor): IE_NAME = 'canalc2.tv' - _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P\d+)' + _VALID_URL = r'https?://(www\.)?canalc2\.tv/video/(?P\d+)' _TEST = { - 'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', + 'url': 'http://www.canalc2.tv/video/12163', 'md5': '060158428b650f896c542dfbb3d6487f', 'info_dict': { 'id': '12163', 'ext': 'mp4', 'title': 'Terrasses du Numérique' + }, + 'params': { + 'skip_download': True, # Requires rtmpdump } } def _real_extract(self, url): - video_id = re.match(self._VALID_URL, url).group('id') - # We need to set the voir field for getting the file name - url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - file_name = self._search_regex( - r"so\.addVariable\('file','(.*?)'\);", - webpage, 'file name') - video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name + video_url = self._search_regex( + r'jwplayer\("Player"\).setup\({[^}]*file: "([^"]+)"', + webpage, 'video_url') + formats = [{'url': video_url}] + if video_url.startswith('rtmp://'): + rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?Pmp4:.+)$', video_url) + formats[0].update({ + 'app': rtmp.group('app'), + 'play_path': rtmp.group('play_path'), + }) title = self._html_search_regex( - r'class="evenement8">(.*?)', webpage, 'title') + r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') return { 'id': video_id, - 'ext': 'mp4', - 'url': video_url, + 'formats': formats, 'title': title, } -- cgit v1.2.3 From 6682049dee5e73b98e99e1359b959240d0920d6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Oct 2015 19:19:43 +0600 Subject: [canalc2] Improve rtmp extraction --- youtube_dl/extractor/canalc2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/canalc2.py') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index 66a9ff093..648af2e18 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -31,10 +31,12 @@ class Canalc2IE(InfoExtractor): webpage, 'video_url') formats = [{'url': video_url}] if video_url.startswith('rtmp://'): - rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?Pmp4:.+)$', video_url) + rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+/))(?Pmp4:.+)$', video_url) formats[0].update({ + 'url': rtmp.group('url'), 'app': rtmp.group('app'), 'play_path': rtmp.group('play_path'), + 'page_url': url, }) title = self._html_search_regex( -- cgit v1.2.3 From ef6c868f23f2fe0d493831e0d4cba71c735bd160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Oct 2015 19:23:31 +0600 Subject: [canalc2] Improve some regexes --- youtube_dl/extractor/canalc2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/canalc2.py') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index 648af2e18..d9137e2ef 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class Canalc2IE(InfoExtractor): IE_NAME = 'canalc2.tv' - _VALID_URL = r'https?://(www\.)?canalc2\.tv/video/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P\d+)' _TEST = { 'url': 'http://www.canalc2.tv/video/12163', @@ -27,8 +27,8 @@ class Canalc2IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'jwplayer\("Player"\).setup\({[^}]*file: "([^"]+)"', - webpage, 'video_url') + r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P.+?)\2', + webpage, 'video_url', group='file') formats = [{'url': video_url}] if video_url.startswith('rtmp://'): rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+/))(?Pmp4:.+)$', video_url) -- cgit v1.2.3 From 14bddf35fbe8253e283042630e24b134996b2575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Oct 2015 19:23:52 +0600 Subject: [canalc2] Add ext --- youtube_dl/extractor/canalc2.py | 1 + 1 file changed, 1 insertion(+) (limited to 'youtube_dl/extractor/canalc2.py') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index d9137e2ef..ba82bb2b7 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -34,6 +34,7 @@ class Canalc2IE(InfoExtractor): rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+/))(?Pmp4:.+)$', video_url) formats[0].update({ 'url': rtmp.group('url'), + 'ext': 'flv', 'app': rtmp.group('app'), 'play_path': rtmp.group('play_path'), 'page_url': url, -- cgit v1.2.3 From b1bf063503893192637f95e929d1a9147de59a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Oct 2015 19:27:05 +0600 Subject: [canalc2] Extract duration --- youtube_dl/extractor/canalc2.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/canalc2.py') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index ba82bb2b7..e326b8fbd 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import parse_duration class Canalc2IE(InfoExtractor): @@ -42,9 +43,13 @@ class Canalc2IE(InfoExtractor): title = self._html_search_regex( r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') + duration = parse_duration(self._search_regex( + r'id=["\']video_duree["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)) return { 'id': video_id, - 'formats': formats, 'title': title, + 'duration': duration, + 'formats': formats, } -- cgit v1.2.3 From 608945d44a7e47fa5115295839c993af545936eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Oct 2015 19:27:22 +0600 Subject: [canalc2] Fix test --- youtube_dl/extractor/canalc2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/canalc2.py') diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index e326b8fbd..f6a1ff381 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -16,8 +16,9 @@ class Canalc2IE(InfoExtractor): 'md5': '060158428b650f896c542dfbb3d6487f', 'info_dict': { 'id': '12163', - 'ext': 'mp4', - 'title': 'Terrasses du Numérique' + 'ext': 'flv', + 'title': 'Terrasses du Numérique', + 'duration': 122, }, 'params': { 'skip_download': True, # Requires rtmpdump -- cgit v1.2.3