diff options
Diffstat (limited to 'youtube_dl/InfoExtractors.py')
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 59 |
1 files changed, 58 insertions, 1 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 086aa5da3..fe9bd97d0 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3967,7 +3967,7 @@ class KeekIE(InfoExtractor): 'uploader': uploader } return [info] - + class TEDIE(InfoExtractor): _VALID_URL=r'http://www.ted.com/talks/(?P<videoName>\w+)' def _real_extract(self, url): @@ -3992,6 +3992,62 @@ class TEDIE(InfoExtractor): } return [info] +class MySpassIE(InfoExtractor): + _VALID_URL = r'http://www.myspass.de/.*' + + def _real_extract(self, url): + META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' + + # video id is the last path element of the URL + # usually there is a trailing slash, so also try the second but last + url_path = compat_urllib_parse_urlparse(url).path + url_parent_path, video_id = os.path.split(url_path) + if not video_id: + _, video_id = os.path.split(url_parent_path) + + # get metadata + metadata_url = META_DATA_URL_TEMPLATE % video_id + metadata_text = self._download_webpage(metadata_url, video_id) + metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) + + # extract values from metadata + url_flv_el = metadata.find('url_flv') + if url_flv_el is None: + self._downloader.trouble(u'ERROR: unable to extract download url') + return + video_url = url_flv_el.text + extension = os.path.splitext(video_url)[1][1:] + title_el = metadata.find('title') + if title_el is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + title = title_el.text + format_id_el = metadata.find('format_id') + if format_id_el is None: + format = ext + else: + format = format_id_el.text + description_el = metadata.find('description') + if description_el is not None: + description = description_el.text + else: + description = None + imagePreview_el = metadata.find('imagePreview') + if imagePreview_el is not None: + thumbnail = imagePreview_el.text + else: + thumbnail = None + info = { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': extension, + 'format': format, + 'thumbnail': thumbnail, + 'description': description + } + return [info] + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@ -4040,6 +4096,7 @@ def gen_extractors(): EightTracksIE(), KeekIE(), TEDIE(), + MySpassIE(), GenericIE() ] |