diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/mit.py | 67 |
1 files changed, 37 insertions, 30 deletions
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 76b717fe5..f50e36cb4 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -1,24 +1,28 @@ +from __future__ import unicode_literals + import re import json from .common import InfoExtractor from ..utils import ( + compat_urlparse, clean_html, get_element_by_id, ) class TechTVMITIE(InfoExtractor): - IE_NAME = u'techtv.mit.edu' + IE_NAME = 'techtv.mit.edu' _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' _TEST = { - u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', - u'file': u'25418.mp4', - u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f', - u'info_dict': { - u'title': u'MIT DNA Learning Center Set', - u'description': u'md5:82313335e8a8a3f243351ba55bc1b474', + 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', + 'md5': '1f8cb3e170d41fd74add04d3c9330e5f', + 'info_dict': { + 'id': '25418', + 'ext': 'mp4', + 'title': 'MIT DNA Learning Center Set', + 'description': 'md5:82313335e8a8a3f243351ba55bc1b474', }, } @@ -27,12 +31,12 @@ class TechTVMITIE(InfoExtractor): video_id = mobj.group('id') raw_page = self._download_webpage( 'http://techtv.mit.edu/videos/%s' % video_id, video_id) - clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) + clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page) - base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', - raw_page, u'base url') - formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, - u'video formats') + base_url = self._search_regex( + r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url') + formats_json = self._search_regex( + r'bitrates: (\[.+?\])', raw_page, 'video formats') formats_mit = json.loads(formats_json) formats = [ { @@ -48,28 +52,32 @@ class TechTVMITIE(InfoExtractor): title = get_element_by_id('edit-title', clean_page) description = clean_html(get_element_by_id('edit-description', clean_page)) - thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', - raw_page, u'thumbnail', flags=re.DOTALL) + thumbnail = self._search_regex( + r'playlist:.*?url: \'(.+?)\'', + raw_page, 'thumbnail', flags=re.DOTALL) - return {'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'thumbnail': thumbnail, + } class MITIE(TechTVMITIE): - IE_NAME = u'video.mit.edu' + IE_NAME = 'video.mit.edu' _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)' _TEST = { - u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', - u'file': u'21783.mp4', - u'md5': u'7db01d5ccc1895fc5010e9c9e13648da', - u'info_dict': { - u'title': u'The Government is Profiling You', - u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd', + 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', + 'file': '.mp4', + 'md5': '7db01d5ccc1895fc5010e9c9e13648da', + 'info_dict': { + 'id': '21783', + 'ext': 'mp4', + 'title': 'The Government is Profiling You', + 'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd', }, } @@ -77,7 +85,6 @@ class MITIE(TechTVMITIE): mobj = re.match(self._VALID_URL, url) page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) - self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME)) - embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage, - u'embed url') + embed_url = self._search_regex( + r'<iframe .*?src="(.+?)"', webpage, 'embed url') return self.url_result(embed_url, ie='TechTVMIT') |