aboutsummaryrefslogtreecommitdiff
path: root/yt_dlp/extractor/arte.py
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-03-04 22:31:04 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-03-05 02:24:17 +0530
commit50e93e03a7ca6ae35a319ea310104f7d6d91eee3 (patch)
tree2ed09999b84e49cd1f7c7ca986f6bdab09879e99 /yt_dlp/extractor/arte.py
parent72e995f1226a2fce3cd7f5fda50ebe024fe2a57e (diff)
Update to ytdl-commit-6508688
Make default upload_/release_date a compat_str https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a Except: * "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531 - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9 - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798 - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446
Diffstat (limited to 'yt_dlp/extractor/arte.py')
-rw-r--r--yt_dlp/extractor/arte.py42
1 files changed, 42 insertions, 0 deletions
diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
index 296b169d2..a7ffdc24c 100644
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -12,6 +12,7 @@ from ..utils import (
int_or_none,
parse_qs,
qualities,
+ strip_or_none,
try_get,
unified_strdate,
url_or_none,
@@ -253,3 +254,44 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText')
return self.playlist_result(entries, playlist_id, title, description)
+
+
+class ArteTVCategoryIE(ArteTVBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
+ _TESTS = [{
+ 'url': 'https://www.arte.tv/en/videos/politics-and-society/',
+ 'info_dict': {
+ 'id': 'politics-and-society',
+ 'title': 'Politics and society',
+ 'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
+ },
+ 'playlist_mincount': 13,
+ },
+ ]
+
+ @classmethod
+ def suitable(cls, url):
+ return (
+ not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
+ and super(ArteTVCategoryIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ lang, playlist_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, playlist_id)
+
+ items = []
+ for video in re.finditer(
+ r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
+ webpage):
+ video = video.group('url')
+ if video == url:
+ continue
+ if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
+ items.append(video)
+
+ title = (self._og_search_title(webpage, default=None)
+ or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
+ title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
+
+ return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
+ description=self._og_search_description(webpage, default=None))