diff options
Diffstat (limited to 'youtube_dl/extractor/crunchyroll.py')
-rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 28 |
1 files changed, 24 insertions, 4 deletions
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ce123482e..95952bc29 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -31,7 +31,23 @@ from ..aes import ( ) -class CrunchyrollIE(InfoExtractor): +class CrunchyrollBaseIE(InfoExtractor): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): + request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + else compat_urllib_request.Request(url_or_request)) + # Accept-Language must be set explicitly to accept any language to avoid issues + # similar to https://github.com/rg3/youtube-dl/issues/6797. + # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction + # should be imposed or not (from what I can see it just takes the first language + # ignoring the priority and requires it to correspond the IP). By the way this causes + # Crunchyroll to not work in georestriction cases in some browsers that don't place + # the locale lang first in header. However allowing any language seems to workaround the issue. + request.add_header('Accept-Language', '*') + return super(CrunchyrollBaseIE, self)._download_webpage( + request, video_id, note, errnote, fatal, tries, timeout, encoding) + + +class CrunchyrollIE(CrunchyrollBaseIE): _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)' _NETRC_MACHINE = 'crunchyroll' _TESTS = [{ @@ -259,10 +275,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') if not video_description: video_description = None - video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) + video_upload_date = self._html_search_regex( + [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'], + webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) if video_upload_date: video_upload_date = unified_strdate(video_upload_date) - video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) + video_uploader = self._html_search_regex( + r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, + 'video_uploader', fatal=False) playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = compat_urllib_request.Request(playerdata_url) @@ -330,7 +350,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text } -class CrunchyrollShowPlaylistIE(InfoExtractor): +class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): IE_NAME = "crunchyroll:playlist" _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?$' |