diff options
author | ChocoLZS <61224208+ChocoLZS@users.noreply.github.com> | 2024-11-18 02:41:57 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-17 19:41:57 +0100 |
commit | d867f99622ef7fba690b08da56c39d739b822bb7 (patch) | |
tree | b237b6bf7dc9bb8e0821df6fe8ddbd521475e4da | |
parent | 10fc719bc7f1eef469389c5219102266ef411f29 (diff) |
[ie/PiaLive] Add extractor (#10811)
Authored by: ChocoLZS
-rw-r--r-- | yt_dlp/extractor/_extractors.py | 6 | ||||
-rw-r--r-- | yt_dlp/extractor/pialive.py | 122 | ||||
-rw-r--r-- | yt_dlp/extractor/piaulizaportal.py | 70 | ||||
-rw-r--r-- | yt_dlp/extractor/uliza.py | 113 |
4 files changed, 240 insertions, 71 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0d849c169..967010826 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1520,8 +1520,8 @@ from .pgatour import PGATourIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pialive import PiaLiveIE from .piapro import PiaproIE -from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, @@ -2250,6 +2250,10 @@ from .ufctv import ( ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE +from .uliza import ( + UlizaPlayerIE, + UlizaPortalIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE diff --git a/yt_dlp/extractor/pialive.py b/yt_dlp/extractor/pialive.py new file mode 100644 index 000000000..7469135c1 --- /dev/null +++ b/yt_dlp/extractor/pialive.py @@ -0,0 +1,122 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + multipart_encode, + str_or_none, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class PiaLiveIE(InfoExtractor): + _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)' + _PLAYER_ROOT_URL = 'https://player.pia-live.jp/' + _PIA_LIVE_API_URL = 'https://api.pia-live.jp' + _API_KEY = 'kfds)FKFps-dms9e' + _TESTS = [{ + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76', + 'info_dict': { + 'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84', + 'display_id': '2431867_001', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }, { + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ', + 'info_dict': { + 'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93', + 'display_id': '2431867_002', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }] + + def _extract_var(self, variable, html): + return self._search_regex( + rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + html, f'variable {variable}', group='value') + + def _real_extract(self, url): + video_key = self._match_id(url) + webpage = self._download_webpage(url, video_key) + + program_code = self._extract_var('programCode', webpage) + article_code = self._extract_var('articleCode', webpage) + title = self._html_extract_title(webpage) + + if get_element_html_by_class('play-end', webpage): + raise ExtractorError('The video is no longer available', expected=True, video_id=program_code) + + if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)): + date, time = self._search_regex( + r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})', + start_info, 'start_info', fatal=False, group=('date', 'time')) + if date and time: + release_timestamp_str = f'{date} {time} +09:00' + release_timestamp = unified_timestamp(release_timestamp_str) + self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True) + return { + 'id': program_code, + 'title': title, + 'live_status': 'is_upcoming', + 'release_timestamp': release_timestamp, + } + + payload, content_type = multipart_encode({ + 'play_url': video_key, + 'api_key': self._API_KEY, + }) + api_data_and_headers = { + 'data': payload, + 'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL}, + } + + player_tag_list = self._download_json( + f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code, + 'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers) + + return self.url_result( + extract_attributes(player_tag_list['data']['movie_one_tag'])['src'], + url_transparent=True, title=title, display_id=program_code, + __post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers)) + + def _get_comments(self, program_code, article_code, api_data_and_headers): + chat_room_url = traverse_obj(self._download_json( + f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code, + 'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers), + ('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none})) + if not chat_room_url: + return + comment_page = self._download_webpage( + chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page', + fatal=False, headers={'Referer': self._PLAYER_ROOT_URL}) + if not comment_page: + return + yield from traverse_obj(self._search_json( + r'var\s+_history\s*=', comment_page, 'comment list', + program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., { + 'timestamp': (0, {int}), + 'author_is_uploader': (1, {lambda x: x == 2}), + 'author': (2, {str}), + 'text': (3, {str}), + 'id': (4, {str_or_none}), + })) diff --git a/yt_dlp/extractor/piaulizaportal.py b/yt_dlp/extractor/piaulizaportal.py deleted file mode 100644 index 1eb6d92b7..000000000 --- a/yt_dlp/extractor/piaulizaportal.py +++ /dev/null @@ -1,70 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_qs, - time_seconds, - traverse_obj, -) - - -class PIAULIZAPortalIE(InfoExtractor): - IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM' - _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' - _TESTS = [{ - 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', - 'info_dict': { - 'id': '005f18b7-e810-5618-cb82-0987c5755d44', - 'title': 'プレゼンテーションプレイヤーのサンプル', - 'live_status': 'not_live', - }, - 'params': { - 'skip_download': True, - 'ignore_no_formats_error': True, - }, - }, { - 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', - 'info_dict': { - 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', - 'title': '【確認用】視聴サンプルページ(ULIZA)', - 'live_status': 'not_live', - }, - 'params': { - 'skip_download': True, - 'ignore_no_formats_error': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) - if expires and expires <= time_seconds(): - raise ExtractorError('The link is expired.', video_id=video_id, expected=True) - - webpage = self._download_webpage(url, video_id) - - player_data = self._download_webpage( - self._search_regex( - r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"', - webpage, 'player data url'), - video_id, headers={'Referer': 'https://ulizaportal.jp/'}, - note='Fetching player data', errnote='Unable to fetch player data') - - formats = self._extract_m3u8_formats( - self._search_regex( - r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, - 'm3u8 url', default=None), - video_id, fatal=False) - m3u8_type = self._search_regex( - r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None) - - return { - 'id': video_id, - 'title': self._html_extract_title(webpage), - 'formats': formats, - 'live_status': { - 'video': 'is_live', - 'dvr': 'was_live', # short-term archives - }.get(m3u8_type, 'not_live'), # VOD or long-term archives - } diff --git a/yt_dlp/extractor/uliza.py b/yt_dlp/extractor/uliza.py new file mode 100644 index 000000000..5766bd378 --- /dev/null +++ b/yt_dlp/extractor/uliza.py @@ -0,0 +1,113 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + make_archive_id, + parse_qs, + time_seconds, +) +from ..utils.traversal import traverse_obj + + +class UlizaPlayerIE(InfoExtractor): + _VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)' + _TESTS = [{ + 'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true', + 'info_dict': { + 'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84', + 'ext': 'mp4', + 'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84', + 'live_status': 'was_live', + '_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'], + }, + }, { + 'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1', + 'info_dict': { + 'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800', + 'ext': 'mp4', + 'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800', + 'live_status': 'not_live', + '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'], + }, + }, { + 'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true', + 'info_dict': { + 'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1', + 'ext': 'mp4', + 'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1', + 'live_status': 'not_live', + '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'], + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + player_data = self._download_webpage( + url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'}, + note='Fetching player data', errnote='Unable to fetch player data') + + m3u8_url = self._search_regex( + r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url') + video_id = parse_qs(m3u8_url).get('ss', [display_id])[0] + + formats = self._extract_m3u8_formats(m3u8_url, video_id) + m3u8_type = self._search_regex( + r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None) + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + 'live_status': { + 'video': 'is_live', + 'dvr': 'was_live', # short-term archives + }.get(m3u8_type, 'not_live'), # VOD or long-term archives + '_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)], + } + + +class UlizaPortalIE(InfoExtractor): + IE_DESC = 'ulizaportal.jp' + _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', + 'info_dict': { + 'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800', + 'display_id': '005f18b7-e810-5618-cb82-0987c5755d44', + 'title': 'プレゼンテーションプレイヤーのサンプル', + 'live_status': 'not_live', + '_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'], + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', + 'info_dict': { + 'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1', + 'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', + 'title': '【確認用】視聴サンプルページ(ULIZA)', + 'live_status': 'not_live', + '_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'], + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) + if expires and expires <= time_seconds(): + raise ExtractorError('The link is expired', video_id=video_id, expected=True) + + webpage = self._download_webpage(url, video_id) + + player_data_url = self._search_regex( + r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"', + webpage, 'player data url') + return self.url_result( + player_data_url, UlizaPlayerIE, url_transparent=True, + display_id=video_id, video_title=self._html_extract_title(webpage)) |