diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-12-11 16:35:45 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-12-11 16:35:45 +0100 | 
| commit | cf68bcaeff77f5a5784c7c5473ce4fea0b8c73ae (patch) | |
| tree | f0814151a6174723d04ac15b85327f78a71b1925 | |
| parent | cbe2bd914d65fc717d31034234102cd25c7f7313 (diff) | |
| parent | 8560c618422214745caa4766292afa9c0d3f35c2 (diff) | |
Merge remote-tracking branch 'akretz/master'
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/zdf.py | 85 | 
2 files changed, 83 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9f2dc803b..027fb06f9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -526,7 +526,7 @@ from .youtube import (      YoutubeUserIE,      YoutubeWatchLaterIE,  ) -from .zdf import ZDFIE +from .zdf import ZDFIE, ZDFChannelIE  from .zingmp3 import (      ZingMp3SongIE,      ZingMp3AlbumIE, diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 9ff00e26c..221f16686 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -86,6 +86,26 @@ def extract_from_xml_url(ie, video_id, xml_url):      } +def extract_channel_from_xml_url(ie, channel_id, xml_url): +    doc = ie._download_xml( +        xml_url, channel_id, +        note='Downloading channel info', +        errnote='Failed to download channel info') + +    title = doc.find('.//information/title').text +    description = doc.find('.//information/detail').text +    assets = [{'id': asset.find('./details/assetId').text, +               'type': asset.find('./type').text, +               } for asset in doc.findall('.//teasers/teaser')] + +    return { +        'id': channel_id, +        'title': title, +        'description': description, +        'assets': assets, +    } + +  class ZDFIE(InfoExtractor):      _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' @@ -104,8 +124,67 @@ class ZDFIE(InfoExtractor):          'skip': 'Videos on ZDF.de are depublicised in short order',      } -    def _real_extract(self, url): -        video_id = self._match_id(url) - +    def _extract_video(self, video_id):          xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id          return extract_from_xml_url(self, video_id, xml_url) + +    def _real_extract(self, url): +        return self._extract_video(self._match_id(url)) + + +class ZDFChannelIE(ZDFIE): +    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*kanaluebersicht/)(?P<id>[0-9]+)' + +    _TEST = { +        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', +        'info_dict': { +            'id': '1586442', +            'title': 'Titanic', +            'description': 'md5:444c048cfe3fdc2561be7de4bcbf1d04', +        }, +        'playlist_count': 3, +    } + +    def _extract_channel(self, channel_id): +        def load_chunks(channel_id, chunk_length): +            offset = 0 +            while True: +                url = ('http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' +                       % (offset, chunk_length, channel_id)) +                result = extract_channel_from_xml_url(self, channel_id, url) +                yield result +                if len(result['assets']) < chunk_length: +                    return +                offset += chunk_length + +        def load_channel(channel_id): +            chunks = list(load_chunks(channel_id, 50))  # The server rejects higher values +            assets = [asset for chunk in chunks for asset in chunk['assets']] +            video_ids = [asset['id'] for asset in +                         filter(lambda asset: asset['type'] == 'video', +                                assets)] +            topic_ids = [asset['id'] for asset in +                         filter(lambda asset: asset['type'] == 'thema', +                                assets)] +            if topic_ids: +                video_ids = reduce(list.__add__, +                                   [load_channel(topic_id)['video_ids'] +                                    for topic_id in topic_ids], +                                   video_ids) + +            result = chunks[0] +            result['video_ids'] = video_ids +            return result + +        channel = load_channel(channel_id) +        return { +            '_type': 'playlist', +            'id': channel['id'], +            'title': channel['title'], +            'description': channel['description'], +            'entries': [self._extract_video(video_id) +                        for video_id in channel['video_ids']], +        } + +    def _real_extract(self, url): +        return self._extract_channel(self._match_id(url)) | 
