diff options
author | Lucas <mikotosc@gmail.com> | 2015-09-24 22:19:09 +0200 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-10-31 15:18:23 +0100 |
commit | 47f2d01a5ac074f6959aa12e8bc00310f18a54e8 (patch) | |
tree | 6448528042ef9a267357054326546a43491cd987 | |
parent | 6722ebd43720e836af8217fa078fa1a604b98229 (diff) |
Add new extractor
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/kika.py | 115 |
2 files changed, 116 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f98e6487e..5ad4e9c36 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -274,6 +274,7 @@ from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE +from .kika import KikaIE from .keek import KeekIE from .kontrtube import KontrTubeIE from .krasview import KrasViewIE diff --git a/youtube_dl/extractor/kika.py b/youtube_dl/extractor/kika.py new file mode 100644 index 000000000..db0f333ff --- /dev/null +++ b/youtube_dl/extractor/kika.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class KikaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|sendung)(?P<id>\d+).*' + + _TESTS = [ + { + 'url': 'http://www.kika.de/baumhaus/videos/video9572.html', + 'md5': '94fc748cf5d64916571d275a07ffe2d5', + 'info_dict': { + 'id': '9572', + 'ext': 'mp4', + 'title': 'Baumhaus vom 29. Oktober 2014', + 'description': None + } + }, + { + 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', + 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', + 'info_dict': { + 'id': '8182', + 'ext': 'mp4', + 'title': 'Beutolomäus und der geheime Weihnachtswunsch', + 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd' + } + }, + { + 'url': 'http://www.kika.de/videos/allevideos/video9572_zc-32ca94ad_zs-3f535991.html', + 'md5': '94fc748cf5d64916571d275a07ffe2d5', + 'info_dict': { + 'id': '9572', + 'ext': 'mp4', + 'title': 'Baumhaus vom 29. Oktober 2014', + 'description': None + } + }, + { + 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/sendung81244_zc-81d703f8_zs-f82d5e31.html', + 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', + 'info_dict': { + 'id': '8182', + 'ext': 'mp4', + 'title': 'Beutolomäus und der geheime Weihnachtswunsch', + 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd' + } + } + ] + + def _real_extract(self, url): + # broadcast_id may be the same as the video_id + broadcast_id = self._match_id(url) + webpage = self._download_webpage(url, broadcast_id) + + xml_re = r'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?:/[a-z-]+?)*?/video(\d+)-avCustom\.xml' + video_id = self._search_regex(xml_re, webpage, "xml_url", default=None) + if not video_id: + # Video is not available online + err_msg = 'Video %s is not available online' % broadcast_id + raise ExtractorError(err_msg, expected=True) + + xml_url = 'http://www.kika.de/video%s-avCustom.xml' % (video_id) + xml_tree = self._download_xml(xml_url, video_id) + + title = xml_tree.find('title').text + webpage_url = xml_tree.find('htmlUrl').text + + # Try to get the description, not available for all videos + try: + broadcast_elem = xml_tree.find('broadcast') + description = broadcast_elem.find('broadcastDescription').text + except AttributeError: + # No description available + description = None + + # duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42) + tmp = xml_tree.find('duration').text.split(':') + duration = int(tmp[0]) * 60 + int(tmp[1]) + + formats_list = [] + for elem in xml_tree.find('assets'): + format_dict = {} + format_dict['url'] = elem.find('progressiveDownloadUrl').text + format_dict['ext'] = elem.find('mediaType').text.lower() + format_dict['format'] = elem.find('profileName').text + width = int(elem.find('frameWidth').text) + height = int(elem.find('frameHeight').text) + format_dict['width'] = width + format_dict['height'] = height + format_dict['resolution'] = '%dx%d' % (width, height) + format_dict['abr'] = int(elem.find('bitrateAudio').text) + format_dict['vbr'] = int(elem.find('bitrateVideo').text) + format_dict['tbr'] = format_dict['abr'] + format_dict['vbr'] + format_dict['filesize'] = int(elem.find('fileSize').text) + + # append resolution and dict for sorting by resolution + formats_list.append((width * height, format_dict)) + + # Sort by resolution (=quality) + formats_list.sort() + + out_list = [x[1] for x in formats_list] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': out_list, + 'duration': duration, + 'webpage_url': webpage_url + } |