diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-03-03 12:53:11 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-03-03 12:53:11 +0100 |
commit | 9f62eaf4ef87cd379318a1330373317cd6d4d63c (patch) | |
tree | 61401b56fd05580ea942499bdfc35a48289512e2 /youtube_dl/extractor/canal13cl.py | |
parent | f92259c0260880ac8e5f285b4b267183b97761c0 (diff) |
[canal13cl] Add test and improve extraction (#2498)
Diffstat (limited to 'youtube_dl/extractor/canal13cl.py')
-rw-r--r-- | youtube_dl/extractor/canal13cl.py | 48 |
1 files changed, 32 insertions, 16 deletions
diff --git a/youtube_dl/extractor/canal13cl.py b/youtube_dl/extractor/canal13cl.py index 781c1b503..93241fefe 100644 --- a/youtube_dl/extractor/canal13cl.py +++ b/youtube_dl/extractor/canal13cl.py @@ -1,32 +1,48 @@ +# coding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor class Canal13clIE(InfoExtractor): - _VALID_URL = r'^http://(?:www\.)?13\.cl/' - IE_NAME = 'Canal13cl' + _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)' + _TEST = { + 'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', + 'md5': '4cb1fa38adcad8fea88487a078831755', + 'info_dict': { + 'id': '1403022125', + 'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', + 'ext': 'mp4', + 'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda', + 'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.', + } + } def _real_extract(self, url): - webpage = self._download_webpage(url, url) - video_id = self._html_search_regex( - r'http://streaming.13.cl/(.*)\.mp4', - webpage, u'video_id') - title = self._html_search_regex( - r'(articuloTitulo = \"(.*?)\"|(.*?)\|)', - webpage, u'title') + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_meta( + 'twitter:title', webpage, 'title', fatal=True) + description = self._html_search_meta( + 'twitter:description', webpage, 'description') url = self._html_search_regex( - r'articuloVideo = \"(.*?)\"', - webpage, u'url') - thumbnail = self._html_search_regex ( - r'articuloImagen = \"(.*?)\"', - webpage, u'thumbnail') + r'articuloVideo = \"(.*?)\"', webpage, 'url') + real_id = self._search_regex( + r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id) + thumbnail = self._html_search_regex( + r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail') return { - 'video_id': video_id, + 'id': real_id, + 'display_id': display_id, 'url': url, 'title': title, + 'description': description, 'ext': 'mp4', - 'thumbnail': thumbnail + 'thumbnail': thumbnail, } |