diff options
Diffstat (limited to 'youtube_dl/extractor/ard.py')
| -rw-r--r-- | youtube_dl/extractor/ard.py | 76 | 
1 files changed, 70 insertions, 6 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 7f0da8ab6..967bd865c 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -4,16 +4,21 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor +from .generic import GenericIE  from ..utils import (      determine_ext,      ExtractorError,      qualities, -    compat_urllib_parse_urlparse, -    compat_urllib_parse, +    int_or_none, +    parse_duration, +    unified_strdate, +    xpath_text, +    parse_xml,  ) -class ARDIE(InfoExtractor): +class ARDMediathekIE(InfoExtractor): +    IE_NAME = 'ARD:mediathek'      _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'      _TESTS = [{ @@ -46,14 +51,16 @@ class ARDIE(InfoExtractor):          else:              video_id = m.group('video_id') -        urlp = compat_urllib_parse_urlparse(url) -        url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl() -          webpage = self._download_webpage(url, video_id)          if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:              raise ExtractorError('Video %s is no longer available' % video_id, expected=True) +        if re.search(r'[\?&]rss($|[=&])', url): +            doc = parse_xml(webpage) +            if doc.tag == 'rss': +                return GenericIE()._extract_rss(url, video_id, doc) +          title = self._html_search_regex(              [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',               r'<meta name="dcterms.title" content="(.*?)"/>', @@ -128,3 +135,60 @@ class ARDIE(InfoExtractor):              'formats': formats,              'thumbnail': thumbnail,          } + + +class ARDIE(InfoExtractor): +    _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html' +    _TEST = { +        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', +        'md5': 'd216c3a86493f9322545e045ddc3eb35', +        'info_dict': { +            'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge', +            'id': '100', +            'ext': 'mp4', +            'duration': 2600, +            'title': 'Die Story im Ersten: Mission unter falscher Flagge', +            'upload_date': '20140804', +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        display_id = mobj.group('display_id') + +        player_url = mobj.group('mainurl') + '~playerXml.xml' +        doc = self._download_xml(player_url, display_id) +        video_node = doc.find('./video') +        upload_date = unified_strdate(xpath_text( +            video_node, './broadcastDate')) +        thumbnail = xpath_text(video_node, './/teaserImage//variant/url') + +        formats = [] +        for a in video_node.findall('.//asset'): +            f = { +                'format_id': a.attrib['type'], +                'width': int_or_none(a.find('./frameWidth').text), +                'height': int_or_none(a.find('./frameHeight').text), +                'vbr': int_or_none(a.find('./bitrateVideo').text), +                'abr': int_or_none(a.find('./bitrateAudio').text), +                'vcodec': a.find('./codecVideo').text, +                'tbr': int_or_none(a.find('./totalBitrate').text), +            } +            if a.find('./serverPrefix').text: +                f['url'] = a.find('./serverPrefix').text +                f['playpath'] = a.find('./fileName').text +            else: +                f['url'] = a.find('./fileName').text +            formats.append(f) +        self._sort_formats(formats) + +        return { +            'id': mobj.group('id'), +            'formats': formats, +            'display_id': display_id, +            'title': video_node.find('./title').text, +            'duration': parse_duration(video_node.find('./duration').text), +            'upload_date': upload_date, +            'thumbnail': thumbnail, +        }  | 
