diff options
Diffstat (limited to 'youtube_dl/extractor/vk.py')
-rw-r--r-- | youtube_dl/extractor/vk.py | 52 |
1 files changed, 39 insertions, 13 deletions
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 8ac3aeac0..765e9e6fd 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -17,10 +17,12 @@ from ..utils import ( unescapeHTML, unified_strdate, ) +from .vimeo import VimeoIE class VKIE(InfoExtractor): - IE_NAME = 'vk.com' + IE_NAME = 'vk' + IE_DESC = 'VK' _VALID_URL = r'''(?x) https?:// (?: @@ -154,6 +156,11 @@ class VKIE(InfoExtractor): 'only_matching': True, }, { + # age restricted video, requires vk account credentials + 'url': 'https://vk.com/video205387401_164765225', + 'only_matching': True, + }, + { # vk wrapper 'url': 'http://www.biqle.ru/watch/847655_160197695', 'only_matching': True, @@ -168,7 +175,7 @@ class VKIE(InfoExtractor): login_page = self._download_webpage( 'https://vk.com', None, 'Downloading login page') - login_form = self._form_hidden_inputs(login_page) + login_form = self._hidden_inputs(login_page) login_form.update({ 'email': username.encode('cp1251'), @@ -204,6 +211,12 @@ class VKIE(InfoExtractor): info_page = self._download_webpage(info_url, video_id) + error_message = self._html_search_regex( + r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', + info_page, 'error message', default=None) + if error_message: + raise ExtractorError(error_message, expected=True) + if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): raise ExtractorError( 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', @@ -237,6 +250,10 @@ class VKIE(InfoExtractor): if youtube_url: return self.url_result(youtube_url, 'Youtube') + vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) + if vimeo_url is not None: + return self.url_result(vimeo_url) + m_rutube = re.search( r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) if m_rutube is not None: @@ -289,25 +306,34 @@ class VKIE(InfoExtractor): class VKUserVideosIE(InfoExtractor): - IE_NAME = 'vk.com:user-videos' - IE_DESC = 'vk.com:All of a user\'s videos' - _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?' + IE_NAME = 'vk:uservideos' + IE_DESC = "VK - User's Videos" + _VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$' _TEMPLATE_URL = 'https://vk.com/videos' - _TEST = { + _TESTS = [{ 'url': 'http://vk.com/videos205387401', 'info_dict': { 'id': '205387401', + 'title': "Tom Cruise's Videos", }, 'playlist_mincount': 4, - } + }, { + 'url': 'http://vk.com/videos-77521', + 'only_matching': True, + }] def _real_extract(self, url): page_id = self._match_id(url) - page = self._download_webpage(url, page_id) - video_ids = orderedSet( - m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page)) - url_entries = [ + + webpage = self._download_webpage(url, page_id) + + entries = [ self.url_result( 'http://vk.com/video' + video_id, 'VK', video_id=video_id) - for video_id in video_ids] - return self.playlist_result(url_entries, page_id) + for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))] + + title = unescapeHTML(self._search_regex( + r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos', + webpage, 'title', default=page_id)) + + return self.playlist_result(entries, page_id, title) |