diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-01 22:28:51 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-01 22:34:18 +0100 |
commit | 60d142aa8d896674ca2b062a53b3d18c644192ea (patch) | |
tree | 03603f366c7a2e6b6aeb0fcbd58c6da705953abc /youtube_dl/extractor | |
parent | 66cf3ac3426b62fb960b4de770c4ea8203a0e205 (diff) |
Add an extractor for vk.com (closes #1635)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/vk.py | 45 |
2 files changed, 46 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index caaf54456..bcf1cce7f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,7 @@ from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE +from .vk import VKIE from .wat import WatIE from .websurg import WeBSurgIE from .weibo import WeiboIE diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py new file mode 100644 index 000000000..90d8a6d07 --- /dev/null +++ b/youtube_dl/extractor/vk.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + unescapeHTML, +) + + +class VKIE(InfoExtractor): + IE_NAME = u'vk.com' + _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' + + _TEST = { + u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', + u'md5': u'0deae91935c54e00003c2a00646315f0', + u'info_dict': { + u'id': u'162222515', + u'ext': u'flv', + u'title': u'ProtivoGunz - Хуёвая песня', + u'uploader': u'Noize MC', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id + info_page = self._download_webpage(info_url, video_id) + m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) + if m_yt is not None: + self.to_screen(u'Youtube video detected') + return self.url_result(m_yt.group(1), 'Youtube') + vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars') + vars = json.loads(vars_json) + + return { + 'id': compat_str(vars['vid']), + 'url': vars['url240'], + 'title': unescapeHTML(vars['md_title']), + 'thumbnail': vars['jpg'], + 'uploader': vars['md_author'], + } |