diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-04-11 00:06:05 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-04-11 00:06:05 +0600 |
commit | 452908b257da1a5b228a2c0522c89fff87296622 (patch) | |
tree | a827e558bf7de85d345ed48faed0f83e5a5b2d11 /youtube_dl/extractor/telebruxelles.py | |
parent | 5899e988d55f4c64500721716bb99c5ecf86afc6 (diff) |
[telebruxelles] Fix extraction (Closes #9142)
Diffstat (limited to 'youtube_dl/extractor/telebruxelles.py')
-rw-r--r-- | youtube_dl/extractor/telebruxelles.py | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py index a3d05f97d..eefecc490 100644 --- a/youtube_dl/extractor/telebruxelles.py +++ b/youtube_dl/extractor/telebruxelles.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class TeleBruxellesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' _TESTS = [{ 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', 'md5': '59439e568c9ee42fb77588b2096b214f', @@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor): webpage = self._download_webpage(url, display_id) article_id = self._html_search_regex( - r"<article id=\"post-(\d+)\"", webpage, 'article ID') + r"<article id=\"post-(\d+)\"", webpage, 'article ID', default=None) title = self._html_search_regex( r'<h1 class=\"entry-title\">(.*?)</h1>', webpage, 'title') - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) rtmp_url = self._html_search_regex( - r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"", + r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', webpage, 'RTMP url') - rtmp_url = rtmp_url.replace("\" + \"", "") + rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) return { - 'id': article_id, + 'id': article_id or display_id, 'display_id': display_id, 'title': title, 'description': description, |