aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-08-22 02:19:30 +0700
committerSergey M․ <dstftw@gmail.com>2018-08-22 02:19:30 +0700
commitdf4d817bc3802e776c5056c3288953aa0ff817b1 (patch)
tree18bd25331a2fdd1082dbd6471da27140ab7b0e02
parentdb192b29329696547b422d904819321077efddfe (diff)
[kinopoisk] Add extractor (closes #17283)
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/kinopoisk.py70
2 files changed, 71 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d8e86c277..9fc1cfa70 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -520,6 +520,7 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
+from .kinopoisk import KinoPoiskIE
from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
diff --git a/youtube_dl/extractor/kinopoisk.py b/youtube_dl/extractor/kinopoisk.py
new file mode 100644
index 000000000..9e8d01f53
--- /dev/null
+++ b/youtube_dl/extractor/kinopoisk.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ int_or_none,
+)
+
+
+class KinoPoiskIE(InfoExtractor):
+ _GEO_COUNTRIES = ['RU']
+ _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.kinopoisk.ru/film/81041/watch/',
+ 'md5': '4f71c80baea10dfa54a837a46111d326',
+ 'info_dict': {
+ 'id': '81041',
+ 'ext': 'mp4',
+ 'title': 'Алеша попович и тугарин змей',
+ 'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 4533,
+ 'age_limit': 12,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://www.kinopoisk.ru/film/81041',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
+ query={'kpId': video_id})
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
+ webpage, 'data'),
+ video_id)['models']
+
+ film = data['filmStatus']
+ title = film.get('title') or film['originalTitle']
+
+ formats = self._extract_m3u8_formats(
+ data['playlistEntity']['uri'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ description = dict_get(
+ film, ('descriptscription', 'description',
+ 'shortDescriptscription', 'shortDescription'))
+ thumbnail = film.get('coverUrl') or film.get('posterUrl')
+ duration = int_or_none(film.get('duration'))
+ age_limit = int_or_none(film.get('restrictionAge'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }