aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-02-15 18:21:42 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-02-16 21:51:07 +0100
commitb5857f62e2c2ca70316e041212aa9e89d54cc253 (patch)
tree24be6c3c3d406cd9b19574192027df7414f81344 /youtube_dl/extractor
parenta504ced097e703a9bc6c18b6e31bcafb4783ed80 (diff)
[crunchyroll] Convert to new subtitles system
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/crunchyroll.py66
1 files changed, 35 insertions, 31 deletions
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 1680f532f..f1da7d09b 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
from hashlib import sha1
from math import pow, sqrt, floor
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
@@ -25,10 +25,9 @@ from ..aes import (
aes_cbc_decrypt,
inc,
)
-from .common import InfoExtractor
-class CrunchyrollIE(SubtitlesInfoExtractor):
+class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return output
+ def _get_subtitles(self, video_id, webpage):
+ subtitles = {}
+ for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+ sub_page = self._download_webpage(
+ 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+ video_id, note='Downloading subtitles for ' + sub_name)
+ id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+ iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+ data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
+ if not id or not iv or not data:
+ continue
+ id = int(id)
+ iv = base64.b64decode(iv)
+ data = base64.b64decode(data)
+
+ subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+ lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+ if not lang_code:
+ continue
+ sub_root = xml.etree.ElementTree.fromstring(subtitle)
+ subtitles[lang_code] = [
+ {
+ 'ext': 'srt',
+ 'data': self._convert_subtitles_to_srt(sub_root),
+ },
+ {
+ 'ext': 'ass',
+ 'data': self._convert_subtitles_to_ass(sub_root),
+ },
+ ]
+ return subtitles
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
@@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'format_id': video_format,
})
- subtitles = {}
- sub_format = self._downloader.params.get('subtitlesformat', 'srt')
- for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
- sub_page = self._download_webpage(
- 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
- video_id, note='Downloading subtitles for ' + sub_name)
- id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
- iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
- data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
- if not id or not iv or not data:
- continue
- id = int(id)
- iv = base64.b64decode(iv)
- data = base64.b64decode(data)
-
- subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
- lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
- if not lang_code:
- continue
- sub_root = xml.etree.ElementTree.fromstring(subtitle)
- if sub_format == 'ass':
- subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
- else:
- subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
-
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,