diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-10-27 02:43:59 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-10-27 02:43:59 +0100 | 
| commit | 8cc3eba79ae19cf5ec4780356b75ccb9813916f0 (patch) | |
| tree | 34d9adc88e0dd73ab25a7f857504176e70e168b5 | |
| parent | b0fb6d4db1ed19dfa3a38ee5fd1d38e6227c3d93 (diff) | |
[phoenix] Add new extractor (Fixes #4036)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/phoenix.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/zdf.py | 151 | 
3 files changed, 109 insertions, 74 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 17ab49283..3979b8270 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -280,6 +280,7 @@ from .orf import (  from .parliamentliveuk import ParliamentLiveUKIE  from .patreon import PatreonIE  from .pbs import PBSIE +from .phoenix import PhoenixIE  from .photobucket import PhotobucketIE  from .planetaplay import PlanetaPlayIE  from .played import PlayedIE diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py new file mode 100644 index 000000000..a20672c0c --- /dev/null +++ b/youtube_dl/extractor/phoenix.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .zdf import extract_from_xml_url + + +class PhoenixIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.phoenix.de/content/884301', +        'md5': 'ed249f045256150c92e72dbb70eadec6', +        'info_dict': { +            'id': '884301', +            'ext': 'mp4', +            'title': 'Michael Krons mit Hans-Werner Sinn', +            'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', +            'upload_date': '20141025', +            'uploader': 'Im Dialog', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        internal_id = self._search_regex( +            r'<div class="phx_vod" id="phx_vod_([0-9]+)"', +            webpage, 'internal video ID') + +        api_url = 'http://www.phoenix.de/php/zdfplayer-v1.3/data/beitragsDetails.php?ak=web&id=%s' % internal_id +        return extract_from_xml_url(self, video_id, api_url) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 52b066ce3..9ff00e26c 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -10,6 +10,82 @@ from ..utils import (  ) +def extract_from_xml_url(ie, video_id, xml_url): +    doc = ie._download_xml( +        xml_url, video_id, +        note='Downloading video info', +        errnote='Failed to download video info') + +    title = doc.find('.//information/title').text +    description = doc.find('.//information/detail').text +    duration = int(doc.find('.//details/lengthSec').text) +    uploader_node = doc.find('.//details/originChannelTitle') +    uploader = None if uploader_node is None else uploader_node.text +    uploader_id_node = doc.find('.//details/originChannelId') +    uploader_id = None if uploader_id_node is None else uploader_id_node.text +    upload_date = unified_strdate(doc.find('.//details/airtime').text) + +    def xml_to_format(fnode): +        video_url = fnode.find('url').text +        is_available = 'http://www.metafilegenerator' not in video_url + +        format_id = fnode.attrib['basetype'] +        format_m = re.match(r'''(?x) +            (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ +            (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) +        ''', format_id) + +        ext = format_m.group('container') +        proto = format_m.group('proto').lower() + +        quality = fnode.find('./quality').text +        abr = int(fnode.find('./audioBitrate').text) // 1000 +        vbr_node = fnode.find('./videoBitrate') +        vbr = None if vbr_node is None else int(vbr_node.text) // 1000 + +        width_node = fnode.find('./width') +        width = None if width_node is None else int_or_none(width_node.text) +        height_node = fnode.find('./height') +        height = None if height_node is None else int_or_none(height_node.text) + +        format_note = '' +        if not format_note: +            format_note = None + +        return { +            'format_id': format_id + '-' + quality, +            'url': video_url, +            'ext': ext, +            'acodec': format_m.group('acodec'), +            'vcodec': format_m.group('vcodec'), +            'abr': abr, +            'vbr': vbr, +            'width': width, +            'height': height, +            'filesize': int_or_none(fnode.find('./filesize').text), +            'format_note': format_note, +            'protocol': proto, +            '_available': is_available, +        } + +    format_nodes = doc.findall('.//formitaeten/formitaet') +    formats = list(filter( +        lambda f: f['_available'], +        map(xml_to_format, format_nodes))) +    ie._sort_formats(formats) + +    return { +        'id': video_id, +        'title': title, +        'description': description, +        'duration': duration, +        'uploader': uploader, +        'uploader_id': uploader_id, +        'upload_date': upload_date, +        'formats': formats, +    } + +  class ZDFIE(InfoExtractor):      _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' @@ -32,77 +108,4 @@ class ZDFIE(InfoExtractor):          video_id = self._match_id(url)          xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id -        doc = self._download_xml( -            xml_url, video_id, -            note='Downloading video info', -            errnote='Failed to download video info') - -        title = doc.find('.//information/title').text -        description = doc.find('.//information/detail').text -        duration = int(doc.find('.//details/lengthSec').text) -        uploader_node = doc.find('.//details/originChannelTitle') -        uploader = None if uploader_node is None else uploader_node.text -        uploader_id_node = doc.find('.//details/originChannelId') -        uploader_id = None if uploader_id_node is None else uploader_id_node.text -        upload_date = unified_strdate(doc.find('.//details/airtime').text) - -        def xml_to_format(fnode): -            video_url = fnode.find('url').text -            is_available = 'http://www.metafilegenerator' not in video_url - -            format_id = fnode.attrib['basetype'] -            format_m = re.match(r'''(?x) -                (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_ -                (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+) -            ''', format_id) - -            ext = format_m.group('container') -            proto = format_m.group('proto').lower() - -            quality = fnode.find('./quality').text -            abr = int(fnode.find('./audioBitrate').text) // 1000 -            vbr_node = fnode.find('./videoBitrate') -            vbr = None if vbr_node is None else int(vbr_node.text) // 1000 - -            width_node = fnode.find('./width') -            width = None if width_node is None else int_or_none(width_node.text) -            height_node = fnode.find('./height') -            height = None if height_node is None else int_or_none(height_node.text) - -            format_note = '' -            if not format_note: -                format_note = None - -            return { -                'format_id': format_id + '-' + quality, -                'url': video_url, -                'ext': ext, -                'acodec': format_m.group('acodec'), -                'vcodec': format_m.group('vcodec'), -                'abr': abr, -                'vbr': vbr, -                'width': width, -                'height': height, -                'filesize': int_or_none(fnode.find('./filesize').text), -                'format_note': format_note, -                'protocol': proto, -                '_available': is_available, -            } - -        format_nodes = doc.findall('.//formitaeten/formitaet') -        formats = list(filter( -            lambda f: f['_available'], -            map(xml_to_format, format_nodes))) - -        self._sort_formats(formats) - -        return { -            'id': video_id, -            'title': title, -            'description': description, -            'duration': duration, -            'uploader': uploader, -            'uploader_id': uploader_id, -            'upload_date': upload_date, -            'formats': formats, -        }
\ No newline at end of file +        return extract_from_xml_url(self, video_id, xml_url)  | 
