diff options
author | Sergey M․ <dstftw@gmail.com> | 2021-03-02 06:03:17 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2021-03-02 06:07:30 +0700 |
commit | 3fb14cd214fdadfae195745b26498e012f78be8e (patch) | |
tree | 4cd8aa3250ccfe4b782a947a9d072ab2c76a1ac7 /youtube_dl/extractor/phoenix.py | |
parent | bee618268014480bb3dd7887986b456c8e9c0236 (diff) |
[zdf] Rework extractors (closes #11606, closes #13473, closes #17354, closes #21185, closes #26711, closes #27068, closes #27930, closes #28198, closes #28199, closes #28274)
* Generalize unique video ids for zdf based extractors
* Improve extraction
* Fix 3sat and phoenix
Diffstat (limited to 'youtube_dl/extractor/phoenix.py')
-rw-r--r-- | youtube_dl/extractor/phoenix.py | 149 |
1 files changed, 116 insertions, 33 deletions
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py index e435c28e1..dbbfce983 100644 --- a/youtube_dl/extractor/phoenix.py +++ b/youtube_dl/extractor/phoenix.py @@ -1,45 +1,128 @@ +# coding: utf-8 from __future__ import unicode_literals -from .dreisat import DreiSatIE +import re +from .youtube import YoutubeIE +from .zdf import ZDFBaseIE +from ..compat import compat_str +from ..utils import ( + int_or_none, + merge_dicts, + unified_timestamp, + xpath_text, +) -class PhoenixIE(DreiSatIE): + +class PhoenixIE(ZDFBaseIE): IE_NAME = 'phoenix.de' - _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ - (?: - phoenix/die_sendungen/(?:[^/]+/)? - )? - (?P<id>[0-9]+)''' - _TESTS = [ - { - 'url': 'http://www.phoenix.de/content/884301', - 'md5': 'ed249f045256150c92e72dbb70eadec6', - 'info_dict': { - 'id': '884301', - 'ext': 'mp4', - 'title': 'Michael Krons mit Hans-Werner Sinn', - 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', - 'upload_date': '20141025', - 'uploader': 'Im Dialog', - } + _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html' + _TESTS = [{ + # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html + 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html', + 'md5': '34ec321e7eb34231fd88616c65c92db0', + 'info_dict': { + 'id': '210222_phx_nachgehakt_corona_protest', + 'ext': 'mp4', + 'title': 'Wohin führt der Protest in der Pandemie?', + 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', + 'duration': 1691, + 'timestamp': 1613906100, + 'upload_date': '20210221', + 'uploader': 'Phoenix', + 'channel': 'corona nachgehakt', }, - { - 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815', - 'only_matching': True, + }, { + # Youtube embed + 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html', + 'info_dict': { + 'id': 'hMQtqFYjomk', + 'ext': 'mp4', + 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?', + 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd', + 'duration': 3509, + 'upload_date': '20201219', + 'uploader': 'phoenix', + 'uploader_id': 'phoenix', }, - { - 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234', - 'only_matching': True, + 'params': { + 'skip_download': True, }, - ] + }, { + 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html', + 'only_matching': True, + }, { + # no media + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html', + 'only_matching': True, + }, { + # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + article_id = self._match_id(url) + + article = self._download_json( + 'https://www.phoenix.de/response/id/%s' % article_id, article_id, + 'Downloading article JSON') + + video = article['absaetze'][0] + title = video.get('titel') or article.get('subtitel') + + if video.get('typ') == 'video-youtube': + video_id = video['id'] + return self.url_result( + video_id, ie=YoutubeIE.ie_key(), video_id=video_id, + video_title=title) + + video_id = compat_str(video.get('basename') or video.get('content')) - internal_id = self._search_regex( - r'<div class="phx_vod" id="phx_vod_([0-9]+)"', - webpage, 'internal video ID') + details = self._download_xml( + 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', + video_id, 'Downloading details XML', query={ + 'ak': 'web', + 'ptmd': 'true', + 'id': video_id, + 'profile': 'player2', + }) + + title = title or xpath_text( + details, './/information/title', 'title', fatal=True) + content_id = xpath_text( + details, './/video/details/basename', 'content id', fatal=True) + + info = self._extract_ptmd( + 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, + content_id, None, url) + + timestamp = unified_timestamp(xpath_text(details, './/details/airtime')) + + thumbnails = [] + for node in details.findall('.//teaserimages/teaserimage'): + thumbnail_url = node.text + if not thumbnail_url: + continue + thumbnail = { + 'url': thumbnail_url, + } + thumbnail_key = node.get('key') + if thumbnail_key: + m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) + if m: + thumbnail['width'] = int(m.group(1)) + thumbnail['height'] = int(m.group(2)) + thumbnails.append(thumbnail) - api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id - return self.extract_from_xml_url(video_id, api_url) + return merge_dicts(info, { + 'id': content_id, + 'title': title, + 'description': xpath_text(details, './/information/detail'), + 'duration': int_or_none(xpath_text(details, './/details/lengthSec')), + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'uploader': xpath_text(details, './/details/channel'), + 'uploader_id': xpath_text(details, './/details/originChannelId'), + 'channel': xpath_text(details, './/details/originChannelTitle'), + }) |