diff options
Diffstat (limited to 'youtube_dl/extractor/crunchyroll.py')
| -rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 24 | 
1 files changed, 15 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 0c9b8ca02..00d943f77 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -21,7 +21,9 @@ from ..utils import (      bytes_to_intlist,      intlist_to_bytes,      int_or_none, +    lowercase_escape,      remove_end, +    sanitized_Request,      unified_strdate,      urlencode_postdata,      xpath_text, @@ -45,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):              'name': username,              'password': password,          }) -        login_request = compat_urllib_request.Request(login_url, data) +        login_request = sanitized_Request(login_url, data)          login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')          self._download_webpage(login_request, None, False, 'Wrong login info') @@ -54,7 +56,7 @@ class CrunchyrollBaseIE(InfoExtractor):      def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):          request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) -                   else compat_urllib_request.Request(url_or_request)) +                   else sanitized_Request(url_or_request))          # Accept-Language must be set explicitly to accept any language to avoid issues          # similar to https://github.com/rg3/youtube-dl/issues/6797.          # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction @@ -104,7 +106,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):              'id': '589804',              'ext': 'flv',              'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11', -            'description': 'md5:fe2743efedb49d279552926d0bd0cd9e', +            'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',              'thumbnail': 're:^https?://.*\.jpg$',              'uploader': 'Danny Choo Network',              'upload_date': '20120213', @@ -287,11 +289,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text          if 'To view this, please log in to verify you are 18 or older.' in webpage:              self.raise_login_required() -        video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL) +        video_title = self._html_search_regex( +            r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>', +            webpage, 'video_title')          video_title = re.sub(r' {2,}', ' ', video_title) -        video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') -        if not video_description: -            video_description = None +        video_description = self._html_search_regex( +            r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id, +            webpage, 'description', default=None) +        if video_description: +            video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))          video_upload_date = self._html_search_regex(              [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],              webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) @@ -302,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text              'video_uploader', fatal=False)          playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) -        playerdata_req = compat_urllib_request.Request(playerdata_url) +        playerdata_req = sanitized_Request(playerdata_url)          playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})          playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')          playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') @@ -314,7 +320,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text          for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):              stream_quality, stream_format = self._FORMAT_IDS[fmt]              video_format = fmt + 'p' -            streamdata_req = compat_urllib_request.Request( +            streamdata_req = sanitized_Request(                  'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'                  % (stream_id, stream_format, stream_quality),                  compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) | 
