diff options
Diffstat (limited to 'youtube_dl/extractor/zdf.py')
-rw-r--r-- | youtube_dl/extractor/zdf.py | 65 |
1 files changed, 46 insertions, 19 deletions
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 98f15177b..9a3331a69 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -9,6 +9,7 @@ from ..utils import ( int_or_none, unified_strdate, OnDemandPagedList, + xpath_text, ) @@ -19,13 +20,11 @@ def extract_from_xml_url(ie, video_id, xml_url): errnote='Failed to download video info') title = doc.find('.//information/title').text - description = doc.find('.//information/detail').text - duration = int(doc.find('.//details/lengthSec').text) - uploader_node = doc.find('.//details/originChannelTitle') - uploader = None if uploader_node is None else uploader_node.text - uploader_id_node = doc.find('.//details/originChannelId') - uploader_id = None if uploader_id_node is None else uploader_id_node.text - upload_date = unified_strdate(doc.find('.//details/airtime').text) + description = xpath_text(doc, './/information/detail', 'description') + duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) + uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') + uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') + upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) def xml_to_format(fnode): video_url = fnode.find('url').text @@ -40,15 +39,14 @@ def extract_from_xml_url(ie, video_id, xml_url): ext = format_m.group('container') proto = format_m.group('proto').lower() - quality = fnode.find('./quality').text - abr = int(fnode.find('./audioBitrate').text) // 1000 - vbr_node = fnode.find('./videoBitrate') - vbr = None if vbr_node is None else int(vbr_node.text) // 1000 + quality = xpath_text(fnode, './quality', 'quality') + abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) + vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) - width_node = fnode.find('./width') - width = None if width_node is None else int_or_none(width_node.text) - height_node = fnode.find('./height') - height = None if height_node is None else int_or_none(height_node.text) + width = int_or_none(xpath_text(fnode, './width', 'width')) + height = int_or_none(xpath_text(fnode, './height', 'height')) + + filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) format_note = '' if not format_note: @@ -64,12 +62,31 @@ def extract_from_xml_url(ie, video_id, xml_url): 'vbr': vbr, 'width': width, 'height': height, - 'filesize': int_or_none(fnode.find('./filesize').text), + 'filesize': filesize, 'format_note': format_note, 'protocol': proto, '_available': is_available, } + def xml_to_thumbnails(fnode): + thumbnails = [] + for node in fnode: + thumbnail_url = node.text + if not thumbnail_url: + continue + thumbnail = { + 'url': thumbnail_url, + } + if 'key' in node.attrib: + m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) + if m: + thumbnail['width'] = int(m.group(1)) + thumbnail['height'] = int(m.group(2)) + thumbnails.append(thumbnail) + return thumbnails + + thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) + format_nodes = doc.findall('.//formitaeten/formitaet') formats = list(filter( lambda f: f['_available'], @@ -81,6 +98,7 @@ def extract_from_xml_url(ie, video_id, xml_url): 'title': title, 'description': description, 'duration': duration, + 'thumbnails': thumbnails, 'uploader': uploader, 'uploader_id': uploader_id, 'upload_date': upload_date, @@ -113,14 +131,23 @@ class ZDFIE(InfoExtractor): class ZDFChannelIE(InfoExtractor): - _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', 'info_dict': { 'id': '1586442', }, 'playlist_count': 3, - } + }, { + 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332', + 'only_matching': True, + }, { + 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332', + 'only_matching': True, + }, { + 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off', + 'only_matching': True, + }] _PAGE_SIZE = 50 def _fetch_page(self, channel_id, page): |