aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorluceatnobis <wehrmeyer.martin@web.de>2017-07-04 11:26:02 +0200
committerSergey M․ <dstftw@gmail.com>2017-09-10 18:40:33 +0700
commitdebed8d759e74507371758d2344ce5afe5e237c2 (patch)
treede459e2d5deac9ad3e071d2a6122c81bfb69e725
parent51aee72d16eb844377a44c12e50dbb95cd4ced27 (diff)
downloadyoutube-dl-debed8d759e74507371758d2344ce5afe5e237c2.tar.xz
[rutube:playlist] Add extractor (closes #13534)
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/rutube.py84
2 files changed, 85 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 46a11f3ef..aefadc56f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -899,6 +899,7 @@ from .rutube import (
RutubeEmbedIE,
RutubeMovieIE,
RutubePersonIE,
+ RutubePlaylistIE,
)
from .rutv import RUTVIE
from .ruutu import RuutuIE
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 889fa7628..a6b17c0ef 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -7,10 +7,14 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_str,
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
unified_strdate,
+ try_get,
+ int_or_none,
)
@@ -42,8 +46,24 @@ class RutubeIE(InfoExtractor):
}, {
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ parts = compat_urllib_parse_urlparse(url)
+ params = compat_parse_qs(parts.query)
+
+ # see if URL without parameters is OK
+ res = super(RutubeIE, cls).suitable(url)
+
+ if params: # we only allow pl_id parameter in the url
+ res = res and 'pl_id' in params and len(params) == 1
+
+ return res
+
@staticmethod
def _extract_urls(webpage):
return [mobj.group('url') for mobj in re.finditer(
@@ -193,3 +213,67 @@ class RutubePersonIE(RutubeChannelIE):
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
+
+
+class RutubePlaylistIE(InfoExtractor):
+ IE_NAME = 'rutube:playlist'
+ IE_DESC = 'Rutube playlists'
+ _TESTS = [{
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
+ 'info_dict': {
+ 'id': '4252',
+ },
+ 'playlist_count': 25,
+ }]
+
+ _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)'
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s'
+
+ @staticmethod
+ def suitable(url):
+ params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return params.get('pl_id') and int_or_none(params['pl_id'][0]) \
+ and params.get('pl_type')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ return self._extract_playlist(playlist_id)
+
+ def _extract_playlist(self, playlist_id):
+ entries = []
+ for pagenum in itertools.count(1):
+ page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum)
+
+ # download_json will sent an accept: application/xml header
+ page = self._download_json(page_url, playlist_id,
+ "Downloading metadata for page %s" % pagenum,
+ headers={'Accept': 'application/json'})
+
+ if not page['results']:
+ break
+
+ results = page['results']
+ for result in results:
+ entry = self.url_result(result.get('video_url'), 'Rutube')
+ category = try_get(result, lambda x: x['category']['name'])
+ entry.update({
+ 'id': result.get('id'),
+ 'uploader': try_get(result, lambda x: x['author']['name']),
+ 'uploader_id': try_get(result, lambda x: x['author']['id']),
+ 'upload_date': unified_strdate(result.get('created_ts')),
+ 'title': result.get('title'),
+ 'description': result.get('description'),
+ 'thumbnail': result.get('thumbnail_url'),
+ 'duration': int_or_none(result.get('duration')),
+ 'category': [category] if category else None,
+ 'age_limit': 18 if result.get('is_adult') else 0,
+ 'view_count': int_or_none(result.get('hits')),
+ 'is_live': result.get('is_livestream'),
+ 'webpage_url': result.get('video_url'),
+ })
+ entries.append(entry)
+
+ if page['has_next'] is False:
+ break
+
+ return self.playlist_result(entries, playlist_id, page['name'])