diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-08-13 23:25:47 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-08-13 23:25:47 +0600 |
commit | 3c12a027d48a2d6d1162ab515df0308237aef881 (patch) | |
tree | b632ed43ae8b7b9992796ffeadb1ccc257e6a85c /youtube_dl/extractor/indavideo.py | |
parent | cb28e0338665c96b2d5b35d203b1d54a57f3feb1 (diff) |
[indavideo] Split in two extractors, extract all formats and fix timestamp
Diffstat (limited to 'youtube_dl/extractor/indavideo.py')
-rw-r--r-- | youtube_dl/extractor/indavideo.py | 170 |
1 files changed, 110 insertions, 60 deletions
diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 2a2cf2bd3..b75715244 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -3,77 +3,127 @@ from __future__ import unicode_literals from .. import utils from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_age_limit, + parse_iso8601, +) -class IndavideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P<id>.+)' - _TESTS = [ - { - 'url': 'http://indavideo.hu/video/Cicatanc', - 'md5': 'c8a507a1c7410685f83a06eaeeaafeab', - 'info_dict': { - 'id': '1837039', - 'title': 'Cicatánc', - 'ext': 'mp4', - 'display_id': 'Cicatanc', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': '', - 'uploader': 'cukiajanlo', - 'uploader_id': '83729', - 'duration': 72, - 'age_limit': 0, - 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'] - }, - }, - { - 'url': 'http://indavideo.hu/video/Vicces_cica_1', - 'md5': '8c82244ba85d2a2310275b318eb51eac', - 'info_dict': { - 'id': '1335611', - 'title': 'Vicces cica', - 'ext': 'mp4', - 'display_id': 'Vicces_cica_1', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Játszik a tablettel. :D', - 'uploader': 'Jet_Pack', - 'uploader_id': '491217', - 'duration': 7, - 'age_limit': 0, - 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], - }, +class IndavideoEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)' + _TESTS = [{ + 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/', + 'md5': 'f79b009c66194acacd40712a6778acfa', + 'info_dict': { + 'id': '1837039', + 'ext': 'mp4', + 'title': 'Cicatánc', + 'description': '', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'cukiajanlo', + 'uploader_id': '83729', + 'timestamp': 1439193826, + 'upload_date': '20150810', + 'duration': 72, + 'age_limit': 0, + 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'], }, - ] + }, { + 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1', + 'only_matching': True, + }, { + 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1', + 'only_matching': True, + }] def _real_extract(self, url): - video_disp_id = self._match_id(url) - webpage = self._download_webpage(url, video_disp_id) + video_id = self._match_id(url) + + video = self._download_json( + 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, + video_id)['data'] + + video_id = video['id'] + title = video['title'] - embed_url = self._html_search_regex(r'<link rel="video_src" href="(.+?)"/>', webpage, 'embed_url') - video_hash = embed_url.split('/')[-1] + video_urls = video.get('video_files', []) + video_file = video.get('video_file') + if video: + video_urls.append(video_file) + video_urls = list(set(video_urls)) - payload = self._download_json('http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/' + video_hash, video_disp_id) - video_info = payload['data'] + video_prefix = video_urls[0].rsplit('/', 1)[0] - thumbnails = video_info.get('thumbnails') - if thumbnails: - thumbnails = [{'url': self._proto_relative_url(x)} for x in thumbnails] + for flv_file in video.get('flv_files', []): + flv_url = '%s/%s' % (video_prefix, flv_file) + if flv_url not in video_urls: + video_urls.append(flv_url) - tags = video_info.get('tags') - if tags: - tags = [x['title'] for x in tags] + formats = [{ + 'url': video_url, + 'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None), + } for video_url in video_urls] + self._sort_formats(formats) + + timestamp = video.get('date') + if timestamp: + # upload date is in CEST + timestamp = parse_iso8601(timestamp + ' +0200', ' ') + + thumbnails = [{ + 'url': self._proto_relative_url(thumbnail) + } for thumbnail in video.get('thumbnails', [])] + + tags = [tag['title'] for tag in video.get('tags', [])] return { - 'id': video_info.get('id'), - 'title': video_info['title'], - 'url': video_info['video_file'], - 'ext': 'mp4', - 'display_id': video_disp_id, + 'id': video_id, + 'title': title, + 'description': video.get('description'), 'thumbnails': thumbnails, - 'description': video_info.get('description'), - 'uploader': video_info.get('user_name'), - # TODO: upload date (it's in CET/CEST) - 'uploader_id': video_info.get('user_id'), - 'duration': utils.int_or_none(video_info.get('length')), - 'age_limit': utils.int_or_none(video_info.get('age_limit')), + 'uploader': video.get('user_name'), + 'uploader_id': video.get('user_id'), + 'timestamp': timestamp, + 'duration': int_or_none(video.get('length')), + 'age_limit': parse_age_limit(video.get('age_limit')), 'tags': tags, + 'formats': formats, + } + + +class IndavideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P<id>[^/#?]+)' + _TEST = { + 'url': 'http://indavideo.hu/video/Vicces_cica_1', + 'md5': '8c82244ba85d2a2310275b318eb51eac', + 'info_dict': { + 'id': '1335611', + 'display_id': 'Vicces_cica_1', + 'ext': 'mp4', + 'title': 'Vicces cica', + 'description': 'Játszik a tablettel. :D', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Jet_Pack', + 'uploader_id': '491217', + 'timestamp': 1390821212, + 'upload_date': '20140127', + 'duration': 7, + 'age_limit': 0, + 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + embed_url = self._search_regex( + r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url') + + return { + '_type': 'url_transparent', + 'ie_key': 'IndavideoEmbed', + 'url': embed_url, + 'display_id': display_id, } |