aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJ.Luis <neonmann@gmail.com>2025-04-06 20:48:07 +0200
committerGitHub <noreply@github.com>2025-04-06 20:48:07 +0200
commit7faa18b83dcfc74a1a1e2034e6b0369c495ca645 (patch)
tree7d293ecb68116ab6f0c7749454ccd61607edf418
parenta473e592337edb8ca40cde52c1fcaee261c54df9 (diff)
[ie/ivoox] Add extractor (#12768)
Authored by: NeonMan, seproDev Co-authored-by: sepro <sepro@sepr0.com>
-rw-r--r--yt_dlp/extractor/_extractors.py1
-rw-r--r--yt_dlp/extractor/ivoox.py78
2 files changed, 79 insertions, 0 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index a206a38e4..5ae7a8a97 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -903,6 +903,7 @@ from .ivi import (
IviIE,
)
from .ivideon import IvideonIE
+from .ivoox import IvooxIE
from .iwara import (
IwaraIE,
IwaraPlaylistIE,
diff --git a/yt_dlp/extractor/ivoox.py b/yt_dlp/extractor/ivoox.py
new file mode 100644
index 000000000..36e02493a
--- /dev/null
+++ b/yt_dlp/extractor/ivoox.py
@@ -0,0 +1,78 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, parse_iso8601, url_or_none, urljoin
+from ..utils.traversal import traverse_obj
+
+
+class IvooxIE(InfoExtractor):
+ _VALID_URL = (
+ r'https?://(?:www\.)?ivoox\.com/(?:\w{2}/)?[^/?#]+_rf_(?P<id>[0-9]+)_1\.html',
+ r'https?://go\.ivoox\.com/rf/(?P<id>[0-9]+)',
+ )
+ _TESTS = [{
+ 'url': 'https://www.ivoox.com/dex-08x30-rostros-del-mal-los-asesinos-en-audios-mp3_rf_143594959_1.html',
+ 'md5': '993f712de5b7d552459fc66aa3726885',
+ 'info_dict': {
+ 'id': '143594959',
+ 'ext': 'mp3',
+ 'timestamp': 1742731200,
+ 'channel': 'DIAS EXTRAÑOS con Santiago Camacho',
+ 'title': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
+ 'description': 'md5:eae8b4b9740d0216d3871390b056bb08',
+ 'uploader': 'Santiago Camacho',
+ 'thumbnail': 'https://static-1.ivoox.com/audios/c/d/5/2/cd52f46783fe735000c33a803dce2554_XXL.jpg',
+ 'upload_date': '20250323',
+ 'episode': 'DEx 08x30 Rostros del mal: Los asesinos en serie que aterrorizaron España',
+ 'duration': 11837,
+ 'tags': ['españa', 'asesinos en serie', 'arropiero', 'historia criminal', 'mataviejas'],
+ },
+ }, {
+ 'url': 'https://go.ivoox.com/rf/143594959',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ivoox.com/en/campodelgas-28-03-2025-audios-mp3_rf_144036942_1.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ webpage = self._download_webpage(url, media_id, fatal=False)
+
+ data = self._search_nuxt_data(
+ webpage, media_id, fatal=False, traverse=('data', 0, 'data', 'audio'))
+
+ direct_download = self._download_json(
+ f'https://vcore-web.ivoox.com/v1/public/audios/{media_id}/download-url', media_id, fatal=False,
+ note='Fetching direct download link', headers={'Referer': url})
+
+ download_paths = {
+ *traverse_obj(direct_download, ('data', 'downloadUrl', {str}, filter, all)),
+ *traverse_obj(data, (('downloadUrl', 'mediaUrl'), {str}, filter)),
+ }
+
+ formats = []
+ for path in download_paths:
+ formats.append({
+ 'url': urljoin('https://ivoox.com', path),
+ 'http_headers': {'Referer': url},
+ })
+
+ return {
+ 'id': media_id,
+ 'formats': formats,
+ 'uploader': self._html_search_regex(r'data-prm-author="([^"]+)"', webpage, 'author', default=None),
+ 'timestamp': parse_iso8601(
+ self._html_search_regex(r'data-prm-pubdate="([^"]+)"', webpage, 'timestamp', default=None)),
+ 'channel': self._html_search_regex(r'data-prm-podname="([^"]+)"', webpage, 'channel', default=None),
+ 'title': self._html_search_regex(r'data-prm-title="([^"]+)"', webpage, 'title', default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'description': self._og_search_description(webpage, default=None),
+ **self._search_json_ld(webpage, media_id, default={}),
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'thumbnail': ('image', {url_or_none}),
+ 'timestamp': ('uploadDate', {parse_iso8601(delimiter=' ')}),
+ 'duration': ('duration', {int_or_none}),
+ 'tags': ('tags', ..., 'name', {str}),
+ }),
+ }