diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-05-04 01:24:19 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-05-04 01:24:19 +0800 |
commit | 848edeab898ee45e45a8fcdb355f35be8b73dd5d (patch) | |
tree | 1aa31a950d1e121f6b474b68207ef6f3ee4ff1d4 | |
parent | 1748d67aea66b6d63bc203ada1036110e7fd2402 (diff) |
[lifenews] Detect <iframe> (fixes #5346)
-rw-r--r-- | youtube_dl/extractor/lifenews.py | 47 |
1 files changed, 38 insertions, 9 deletions
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 3c00ed333..330138692 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -16,7 +16,7 @@ class LifeNewsIE(InfoExtractor): IE_DESC = 'LIFE | NEWS' _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://lifenews.ru/news/126342', 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a', 'info_dict': { @@ -27,7 +27,19 @@ class LifeNewsIE(InfoExtractor): 'thumbnail': 're:http://.*\.jpg', 'upload_date': '20140130', } - } + }, { + # video in <iframe> + 'url': 'http://lifenews.ru/news/152125', + 'md5': '77d19a6f0886cd76bdbf44b4d971a273', + 'info_dict': { + 'id': '152125', + 'ext': 'mp4', + 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', + 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', + 'upload_date': '20150402', + 'uploader': 'embed.life.ru', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -36,7 +48,9 @@ class LifeNewsIE(InfoExtractor): webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page') videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) - if not videos: + iframe_link = self._html_search_regex( + '<iframe[^>]+src="([^"]+)', webpage, 'iframe link', default=None) + if not videos and not iframe_link: raise ExtractorError('No media links available for %s' % video_id) title = self._og_search_title(webpage) @@ -56,17 +70,32 @@ class LifeNewsIE(InfoExtractor): if upload_date is not None: upload_date = unified_strdate(upload_date) + common_info = { + 'description': description, + 'view_count': int_or_none(view_count), + 'comment_count': int_or_none(comment_count), + 'upload_date': upload_date, + } + def make_entry(video_id, media, video_number=None): - return { + cur_info = dict(common_info) + cur_info.update({ 'id': video_id, 'url': media[1], 'thumbnail': media[0], 'title': title if video_number is None else '%s-video%s' % (title, video_number), - 'description': description, - 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - 'upload_date': upload_date, - } + }) + return cur_info + + if iframe_link: + cur_info = dict(common_info) + cur_info.update({ + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'url': iframe_link, + }) + return cur_info if len(videos) == 1: return make_entry(video_id, videos[0]) |