diff options
Diffstat (limited to 'youtube_dl/extractor/vk.py')
-rw-r--r-- | youtube_dl/extractor/vk.py | 52 |
1 files changed, 35 insertions, 17 deletions
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 90d8a6d07..f13ba1c8e 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import re import json @@ -10,19 +12,27 @@ from ..utils import ( class VKIE(InfoExtractor): - IE_NAME = u'vk.com' + IE_NAME = 'vk.com' _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' - _TEST = { - u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', - u'md5': u'0deae91935c54e00003c2a00646315f0', - u'info_dict': { - u'id': u'162222515', - u'ext': u'flv', - u'title': u'ProtivoGunz - Хуёвая песня', - u'uploader': u'Noize MC', + _TESTS = [{ + 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', + 'file': '162222515.flv', + 'md5': '0deae91935c54e00003c2a00646315f0', + 'info_dict': { + 'title': 'ProtivoGunz - Хуёвая песня', + 'uploader': 'Noize MC', }, - } + }, + { + 'url': 'http://vk.com/video4643923_163339118', + 'file': '163339118.mp4', + 'md5': 'f79bccb5cd182b1f43502ca5685b2b36', + 'info_dict': { + 'uploader': 'Elvira Dzhonik', + 'title': 'Dream Theater - Hollow Years Live at Budokan 720*', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -33,13 +43,21 @@ class VKIE(InfoExtractor): if m_yt is not None: self.to_screen(u'Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') - vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars') - vars = json.loads(vars_json) + data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') + data = json.loads(data_json) + + formats = [{ + 'format_id': k, + 'url': v, + 'width': int(k[len('url'):]), + } for k, v in data.items() + if k.startswith('url')] + self._sort_formats(formats) return { - 'id': compat_str(vars['vid']), - 'url': vars['url240'], - 'title': unescapeHTML(vars['md_title']), - 'thumbnail': vars['jpg'], - 'uploader': vars['md_author'], + 'id': compat_str(data['vid']), + 'formats': formats, + 'title': unescapeHTML(data['md_title']), + 'thumbnail': data.get('jpg'), + 'uploader': data.get('md_author'), } |