diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-05-04 01:24:19 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-05-04 01:24:19 +0800 | 
| commit | 848edeab898ee45e45a8fcdb355f35be8b73dd5d (patch) | |
| tree | 1aa31a950d1e121f6b474b68207ef6f3ee4ff1d4 /youtube_dl/extractor | |
| parent | 1748d67aea66b6d63bc203ada1036110e7fd2402 (diff) | |
[lifenews] Detect <iframe> (fixes #5346)
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/lifenews.py | 47 | 
1 files changed, 38 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 3c00ed333..330138692 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -16,7 +16,7 @@ class LifeNewsIE(InfoExtractor):      IE_DESC = 'LIFE | NEWS'      _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://lifenews.ru/news/126342',          'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',          'info_dict': { @@ -27,7 +27,19 @@ class LifeNewsIE(InfoExtractor):              'thumbnail': 're:http://.*\.jpg',              'upload_date': '20140130',          } -    } +    }, { +        # video in <iframe> +        'url': 'http://lifenews.ru/news/152125', +        'md5': '77d19a6f0886cd76bdbf44b4d971a273', +        'info_dict': { +            'id': '152125', +            'ext': 'mp4', +            'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', +            'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', +            'upload_date': '20150402', +            'uploader': 'embed.life.ru', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -36,7 +48,9 @@ class LifeNewsIE(InfoExtractor):          webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')          videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) -        if not videos: +        iframe_link = self._html_search_regex( +            '<iframe[^>]+src="([^"]+)', webpage, 'iframe link', default=None) +        if not videos and not iframe_link:              raise ExtractorError('No media links available for %s' % video_id)          title = self._og_search_title(webpage) @@ -56,17 +70,32 @@ class LifeNewsIE(InfoExtractor):          if upload_date is not None:              upload_date = unified_strdate(upload_date) +        common_info = { +            'description': description, +            'view_count': int_or_none(view_count), +            'comment_count': int_or_none(comment_count), +            'upload_date': upload_date, +        } +          def make_entry(video_id, media, video_number=None): -            return { +            cur_info = dict(common_info) +            cur_info.update({                  'id': video_id,                  'url': media[1],                  'thumbnail': media[0],                  'title': title if video_number is None else '%s-video%s' % (title, video_number), -                'description': description, -                'view_count': int_or_none(view_count), -                'comment_count': int_or_none(comment_count), -                'upload_date': upload_date, -            } +            }) +            return cur_info + +        if iframe_link: +            cur_info = dict(common_info) +            cur_info.update({ +                '_type': 'url_transparent', +                'id': video_id, +                'title': title, +                'url': iframe_link, +            }) +            return cur_info          if len(videos) == 1:              return make_entry(video_id, videos[0]) | 
