diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-12-09 00:46:28 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-12-09 00:48:04 +0700 |
commit | 1115271ac61b89cc4ac1ca922eff8a4bed0fbf57 (patch) | |
tree | 013d5d049b3ccc56710bad5c48b0ef891788a5fb /youtube_dl/extractor | |
parent | d21d0ba6c14e8a6696130090641da4e2028e1bb3 (diff) |
[raiplay:playlist] Fix issues and improve (closes #14563)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/extractors.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/rai.py | 62 |
2 files changed, 37 insertions, 27 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8f9f94cc..b9c97fac4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -856,8 +856,8 @@ from .radiofrance import RadioFranceIE from .rai import ( RaiPlayIE, RaiPlayLiveIE, + RaiPlayPlaylistIE, RaiIE, - RaiPlaylistIE, ) from .rbmaradio import RBMARadioIE from .rds import RDSIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 625458380..d22311031 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -17,6 +17,7 @@ from ..utils import ( parse_duration, strip_or_none, try_get, + unescapeHTML, unified_strdate, unified_timestamp, update_url_query, @@ -249,6 +250,41 @@ class RaiPlayLiveIE(RaiBaseIE): } +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_meta( + ('programma', 'nomeProgramma'), webpage, 'title') + description = unescapeHTML(self._html_search_meta( + ('description', 'og:description'), webpage, 'description')) + print(description) + + entries = [] + for mobj in re.finditer( + r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', + webpage): + video_url = urljoin(url, mobj.group('path')) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result(entries, playlist_id, title, description) + + class RaiIE(RaiBaseIE): _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE _TESTS = [{ @@ -455,29 +491,3 @@ class RaiIE(RaiBaseIE): info.update(relinker_info) return info - - -class RaiPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/]+)' - _TESTS = [{ - 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', - 'info_dict': { - 'id': 'nondirloalmiocapo', - 'title': 'Non dirlo al mio capo', - }, - 'playlist_mincount': 12, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - title = self._html_search_meta('programma', webpage, default=None) - video_urls = re.findall(' href="(/raiplay/video.+)"', webpage) - video_urls = [urljoin(url, video_url) for video_url in video_urls] - entries = [ - self.url_result( - video_url, - RaiPlayIE.ie_key()) - for video_url in video_urls if RaiPlayIE.suitable(video_url) - ] - return self.playlist_result(entries, playlist_id, title) |