diff options
author | YoshichikaAAA <154937389+YoshichikaAAA@users.noreply.github.com> | 2024-02-04 03:44:17 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-03 18:44:17 +0000 |
commit | e3ce2b385ec1f03fac9d4210c57fda77134495fc (patch) | |
tree | 06849ce20fdea4b796792c533e1443d1448cac6f /yt_dlp/extractor/radiko.py | |
parent | 4253e3b7f483127bd812bdac02466f4a5b47ff34 (diff) |
[ie/radiko] Extract more metadata (#9115)
Authored by: YoshichikaAAA
Diffstat (limited to 'yt_dlp/extractor/radiko.py')
-rw-r--r-- | yt_dlp/extractor/radiko.py | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index c363d9ba5..2b6405999 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,5 +1,6 @@ import base64 import random +import re import urllib.parse from .common import InfoExtractor @@ -11,6 +12,7 @@ from ..utils import ( unified_timestamp, update_url_query, ) +from ..utils.traversal import traverse_obj class RadikoBaseIE(InfoExtractor): @@ -159,6 +161,12 @@ class RadikoBaseIE(InfoExtractor): return formats + def _extract_performers(self, prog): + performers = traverse_obj(prog, ( + 'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) + # TODO: change 'artist' fields to 'artists' and return traversal list instead of str + return ', '.join(performers) or None + class RadikoIE(RadikoBaseIE): _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' @@ -186,10 +194,12 @@ class RadikoIE(RadikoBaseIE): return { 'id': video_id, 'title': try_call(lambda: prog.find('title').text), + 'artist': self._extract_performers(prog), 'description': clean_html(try_call(lambda: prog.find('info').text)), 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, + 'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)), 'is_live': True, 'formats': self._extract_formats( video_id=video_id, station=station, is_onair=False, @@ -243,6 +253,7 @@ class RadikoRadioIE(RadikoBaseIE): return { 'id': station, 'title': title, + 'artist': self._extract_performers(prog), 'description': description, 'uploader': station_name, 'uploader_id': station, |