From 953e32b2c1be077e65bba844010a5a2707af2e2b Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Wed, 7 Aug 2013 18:59:11 +0200 Subject: [dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. --- youtube_dl/extractor/dailymotion.py | 67 +++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 6 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 9bf7a28ca..eb2322d54 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,14 +1,49 @@ import re import json +import itertools +import socket from .common import InfoExtractor +from .subtitles import SubtitlesIE + from ..utils import ( + compat_http_client, + compat_urllib_error, compat_urllib_request, + compat_str, + get_element_by_attribute, + get_element_by_id, ExtractorError, ) -class DailymotionIE(InfoExtractor): + +class DailyMotionSubtitlesIE(SubtitlesIE): + + def _get_available_subtitles(self, video_id): + request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) + try: + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + info = json.loads(sub_list) + if (info['total'] > 0): + sub_lang_list = dict((l['language'], l['url']) for l in info['list']) + return sub_lang_list + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + + def _get_subtitle_url(self, sub_lang, sub_name, video_id, format): + sub_lang_list = self._get_available_subtitles(video_id) + return sub_lang_list[sub_lang] + + def _request_automatic_caption(self, video_id, webpage): + self._downloader.report_warning(u'Automatic Captions not supported by dailymotion') + return {} + + +class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' @@ -18,7 +53,7 @@ class DailymotionIE(InfoExtractor): u'file': u'x33vw9.mp4', u'md5': u'392c4b85a60a90dc4792da41ce3144eb', u'info_dict': { - u"uploader": u"Alex and Van .", + u"uploader": u"Alex and Van .", u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" } } @@ -57,17 +92,36 @@ class DailymotionIE(InfoExtractor): # TODO: support choosing qualities - for key in ['stream_h264_hd1080_url','stream_h264_hd_url', - 'stream_h264_hq_url','stream_h264_url', + for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url', + 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url']: - if info.get(key):#key in info and info[key]: + if info.get(key): # key in info and info[key]: max_quality = key - self.to_screen(u'Using %s' % key) + self.to_screen(u'%s: Using %s' % (video_id, key)) break else: raise ExtractorError(u'Unable to extract video URL') video_url = info[max_quality] + # subtitles + video_subtitles = None + video_webpage = None + + if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_subtitles(video_id) + elif self._downloader.params.get('writeautomaticsub', False): + video_subtitles = self._request_automatic_caption(video_id, video_webpage) + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id) + return + + if 'length_seconds' not in info: + self._downloader.report_warning(u'unable to extract video duration') + video_duration = '' + else: + video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) + return [{ 'id': video_id, 'url': video_url, @@ -75,5 +129,6 @@ class DailymotionIE(InfoExtractor): 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'ext': video_extension, + 'subtitles': video_subtitles, 'thumbnail': info['thumbnail_url'] }] -- cgit v1.2.3 From 8377574c9cb8740e24d45e9b3d30921fd6ec846c Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Thu, 8 Aug 2013 08:54:10 +0200 Subject: [internal] Improved subtitle architecture + (update in youtube/dailymotion) The structure of subtitles was refined, you only need to implement one method that returns a dictionnary of the available subtitles (lang, url) to support all the subtitle options in a website. I updated the subtitle downloaders for youtube/dailymotion to show how it works. --- youtube_dl/extractor/dailymotion.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index eb2322d54..97003ee35 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,6 +1,5 @@ import re import json -import itertools import socket from .common import InfoExtractor @@ -34,16 +33,12 @@ class DailyMotionSubtitlesIE(SubtitlesIE): self._downloader.report_warning(u'video doesn\'t have subtitles') return {} - def _get_subtitle_url(self, sub_lang, sub_name, video_id, format): - sub_lang_list = self._get_available_subtitles(video_id) - return sub_lang_list[sub_lang] - def _request_automatic_caption(self, video_id, webpage): - self._downloader.report_warning(u'Automatic Captions not supported by dailymotion') + self._downloader.report_warning(u'Automatic Captions not supported by this server') return {} -class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor): +class DailymotionIE(DailyMotionSubtitlesIE): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' @@ -116,12 +111,6 @@ class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor): self._list_available_subtitles(video_id) return - if 'length_seconds' not in info: - self._downloader.report_warning(u'unable to extract video duration') - video_duration = '' - else: - video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) - return [{ 'id': video_id, 'url': video_url, -- cgit v1.2.3 From 69df680b973841b61594c246a9cf4a708f09cb17 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Thu, 8 Aug 2013 11:20:56 +0200 Subject: [subtitles] Improved docs + new class for servers who don't support auto-caption --- youtube_dl/extractor/dailymotion.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 97003ee35..8fab16005 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -3,7 +3,7 @@ import json import socket from .common import InfoExtractor -from .subtitles import SubtitlesIE +from .subtitles import NoAutoSubtitlesIE from ..utils import ( compat_http_client, @@ -17,7 +17,7 @@ from ..utils import ( ) -class DailyMotionSubtitlesIE(SubtitlesIE): +class DailyMotionSubtitlesIE(NoAutoSubtitlesIE): def _get_available_subtitles(self, video_id): request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) @@ -33,11 +33,6 @@ class DailyMotionSubtitlesIE(SubtitlesIE): self._downloader.report_warning(u'video doesn\'t have subtitles') return {} - def _request_automatic_caption(self, video_id, webpage): - self._downloader.report_warning(u'Automatic Captions not supported by this server') - return {} - - class DailymotionIE(DailyMotionSubtitlesIE): """Information Extractor for Dailymotion""" -- cgit v1.2.3 From f8e52269c1a27c28aef606f010e2c64ff9a946d3 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Wed, 11 Sep 2013 15:21:09 +0200 Subject: [subtitles] made inheritance hierarchy flat as requested --- youtube_dl/extractor/dailymotion.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index f7dffd4cc..c7bcf6e8e 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -18,23 +18,7 @@ from ..utils import ( ) -class DailyMotionSubtitlesIE(NoAutoSubtitlesIE): - - def _get_available_subtitles(self, video_id): - request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) - try: - sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) - return {} - info = json.loads(sub_list) - if (info['total'] > 0): - sub_lang_list = dict((l['language'], l['url']) for l in info['list']) - return sub_lang_list - self._downloader.report_warning(u'video doesn\'t have subtitles') - return {} - -class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor): +class DailymotionIE(NoAutoSubtitlesIE): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' @@ -120,6 +104,20 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor): 'thumbnail': info['thumbnail_url'] }] + def _get_available_subtitles(self, video_id): + request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) + try: + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + info = json.loads(sub_list) + if (info['total'] > 0): + sub_lang_list = dict((l['language'], l['url']) for l in info['list']) + return sub_lang_list + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + class DailymotionPlaylistIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' -- cgit v1.2.3 From 54d39d8b2f7a9fe148a24dd2785108b7d3823d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 11 Sep 2013 15:51:04 +0200 Subject: [subtitles] rename SubitlesIE to SubtitlesInfoExtractor Otherwise it can be automatically detected as a IE ready for use. --- youtube_dl/extractor/dailymotion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index c7bcf6e8e..d73023b9e 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -4,7 +4,7 @@ import itertools import socket from .common import InfoExtractor -from .subtitles import NoAutoSubtitlesIE +from .subtitles import NoAutoSubtitlesInfoExtractor from ..utils import ( compat_http_client, @@ -18,7 +18,7 @@ from ..utils import ( ) -class DailymotionIE(NoAutoSubtitlesIE): +class DailymotionIE(NoAutoSubtitlesInfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' -- cgit v1.2.3 From d82134c3395c0912157c7ccae9f21d4b3375910b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 11 Sep 2013 16:05:49 +0200 Subject: [subtitles] Simplify the extraction of subtitles in subclasses and remove NoAutoSubtitlesInfoExtractor Subclasses just need to call the method extract_subtitles, which will call _extract_subtitles and _request_automatic_caption Now the default implementation of _request_automatic_caption returns {}. --- youtube_dl/extractor/dailymotion.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index d73023b9e..abd6a36ee 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -4,7 +4,7 @@ import itertools import socket from .common import InfoExtractor -from .subtitles import NoAutoSubtitlesInfoExtractor +from .subtitles import SubtitlesInfoExtractor from ..utils import ( compat_http_client, @@ -18,7 +18,7 @@ from ..utils import ( ) -class DailymotionIE(NoAutoSubtitlesInfoExtractor): +class DailymotionIE(SubtitlesInfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' @@ -81,14 +81,7 @@ class DailymotionIE(NoAutoSubtitlesInfoExtractor): video_url = info[max_quality] # subtitles - video_subtitles = None - video_webpage = None - - if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False): - video_subtitles = self._extract_subtitles(video_id) - elif self._downloader.params.get('writeautomaticsub', False): - video_subtitles = self._request_automatic_caption(video_id, video_webpage) - + video_subtitles = self.extract_subtitles(video_id) if self._downloader.params.get('listsubtitles', False): self._list_available_subtitles(video_id) return -- cgit v1.2.3 From 7fad1c6328b02ba9f23d37f374a05255abfe38a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 11 Sep 2013 16:24:47 +0200 Subject: [subtitles] Use self._download_webpage for extracting the subtitles It raises ExtractorError for the same exceptions we have to catch. --- youtube_dl/extractor/dailymotion.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'youtube_dl/extractor/dailymotion.py') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index abd6a36ee..360113f9c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,14 +1,11 @@ import re import json import itertools -import socket from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor from ..utils import ( - compat_http_client, - compat_urllib_error, compat_urllib_request, compat_str, get_element_by_attribute, @@ -98,10 +95,11 @@ class DailymotionIE(SubtitlesInfoExtractor): }] def _get_available_subtitles(self, video_id): - request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) try: - sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + sub_list = self._download_webpage( + 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, + video_id, note=False) + except ExtractorError as err: self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) return {} info = json.loads(sub_list) -- cgit v1.2.3