diff options
Diffstat (limited to 'youtube_dl/extractor/zdf.py')
| -rw-r--r-- | youtube_dl/extractor/zdf.py | 65 | 
1 files changed, 46 insertions, 19 deletions
| diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 98f15177b..9a3331a69 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -9,6 +9,7 @@ from ..utils import (      int_or_none,      unified_strdate,      OnDemandPagedList, +    xpath_text,  ) @@ -19,13 +20,11 @@ def extract_from_xml_url(ie, video_id, xml_url):          errnote='Failed to download video info')      title = doc.find('.//information/title').text -    description = doc.find('.//information/detail').text -    duration = int(doc.find('.//details/lengthSec').text) -    uploader_node = doc.find('.//details/originChannelTitle') -    uploader = None if uploader_node is None else uploader_node.text -    uploader_id_node = doc.find('.//details/originChannelId') -    uploader_id = None if uploader_id_node is None else uploader_id_node.text -    upload_date = unified_strdate(doc.find('.//details/airtime').text) +    description = xpath_text(doc, './/information/detail', 'description') +    duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) +    uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') +    uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') +    upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))      def xml_to_format(fnode):          video_url = fnode.find('url').text @@ -40,15 +39,14 @@ def extract_from_xml_url(ie, video_id, xml_url):          ext = format_m.group('container')          proto = format_m.group('proto').lower() -        quality = fnode.find('./quality').text -        abr = int(fnode.find('./audioBitrate').text) // 1000 -        vbr_node = fnode.find('./videoBitrate') -        vbr = None if vbr_node is None else int(vbr_node.text) // 1000 +        quality = xpath_text(fnode, './quality', 'quality') +        abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) +        vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) -        width_node = fnode.find('./width') -        width = None if width_node is None else int_or_none(width_node.text) -        height_node = fnode.find('./height') -        height = None if height_node is None else int_or_none(height_node.text) +        width = int_or_none(xpath_text(fnode, './width', 'width')) +        height = int_or_none(xpath_text(fnode, './height', 'height')) + +        filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))          format_note = ''          if not format_note: @@ -64,12 +62,31 @@ def extract_from_xml_url(ie, video_id, xml_url):              'vbr': vbr,              'width': width,              'height': height, -            'filesize': int_or_none(fnode.find('./filesize').text), +            'filesize': filesize,              'format_note': format_note,              'protocol': proto,              '_available': is_available,          } +    def xml_to_thumbnails(fnode): +        thumbnails = [] +        for node in fnode: +            thumbnail_url = node.text +            if not thumbnail_url: +                continue +            thumbnail = { +                'url': thumbnail_url, +            } +            if 'key' in node.attrib: +                m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) +                if m: +                    thumbnail['width'] = int(m.group(1)) +                    thumbnail['height'] = int(m.group(2)) +            thumbnails.append(thumbnail) +        return thumbnails + +    thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) +      format_nodes = doc.findall('.//formitaeten/formitaet')      formats = list(filter(          lambda f: f['_available'], @@ -81,6 +98,7 @@ def extract_from_xml_url(ie, video_id, xml_url):          'title': title,          'description': description,          'duration': duration, +        'thumbnails': thumbnails,          'uploader': uploader,          'uploader_id': uploader_id,          'upload_date': upload_date, @@ -113,14 +131,23 @@ class ZDFIE(InfoExtractor):  class ZDFChannelIE(InfoExtractor): -    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' -    _TEST = { +    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P<id>[0-9]+)' +    _TESTS = [{          'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',          'info_dict': {              'id': '1586442',          },          'playlist_count': 3, -    } +    }, { +        'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332', +        'only_matching': True, +    }, { +        'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332', +        'only_matching': True, +    }, { +        'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off', +        'only_matching': True, +    }]      _PAGE_SIZE = 50      def _fetch_page(self, channel_id, page): | 
