diff options
author | kaspi <je326@hotmail.com> | 2015-10-12 01:25:57 -0400 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-10-13 22:31:12 +0200 |
commit | 3dc582e5ea69af4ad7f51d30c1d87cf93aa6b72b (patch) | |
tree | 0ebc934bc3caeadf787b34b51e0a23d5f64e1482 | |
parent | 506e261d2073d8c00d5b43d272e8173cb0d63728 (diff) |
[fczenit] Add extractor
Closes #7143.
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/fczenit.py | 41 |
2 files changed, 42 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 75720843c..f6d185818 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -167,6 +167,7 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE +from .fczenit import FczenitIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py new file mode 100644 index 000000000..f1f150ef2 --- /dev/null +++ b/youtube_dl/extractor/fczenit.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FczenitIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://fc-zenit.ru/video/gl6785/', + 'md5': '458bacc24549173fe5a5aa29174a5606', + 'info_dict': { + 'id': '6785', + 'ext': 'mp4', + 'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title') + + bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL') + bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw) + + formats = [{ + 'url': furl, + 'tbr': tbr, + } for furl, tbr in bitrates] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + } |