diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-05-17 03:32:53 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-05-17 03:32:53 +0600 |
commit | 588b82bbf8c90981c54f180eca40e6c743f8f89f (patch) | |
tree | 842183ab70e6e2995033e91dc31073438d0ce1a7 /youtube_dl | |
parent | bc0f937b55aae6ce731d259a7658b0281c2e62ec (diff) |
[tv2:article] Add extractor (Closes #5724)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/tv2.py | 33 |
2 files changed, 37 insertions, 1 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb4f63ca3..6f8c261d5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,7 +572,10 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE -from .tv2 import TV2IE +from .tv2 import ( + TV2IE, + TV2ArticleIE, +) from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 2dcc0e971..fa338b936 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -1,12 +1,15 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, float_or_none, parse_iso8601, + remove_end, ) @@ -91,3 +94,33 @@ class TV2IE(InfoExtractor): 'categories': categories, 'formats': formats, } + + +class TV2ArticleIE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', + 'info_dict': { + 'id': '6930542', + 'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', + 'description': 'md5:339573779d3eea3542ffe12006190954', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.tv2.no/a/6930542', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') + for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] + + title = remove_end(self._og_search_title(webpage), ' - TV2.no') + description = remove_end(self._og_search_description(webpage), ' - TV2.no') + + return self.playlist_result(entries, playlist_id, title, description) |