diff options
author | Allan Zhou <allanzp@gmail.com> | 2013-08-30 17:51:59 -0700 |
---|---|---|
committer | Allan Zhou <allanzp@gmail.com> | 2013-08-30 17:51:59 -0700 |
commit | 85f03346ebe38c0b4cd46ab64bf9b5825d12895a (patch) | |
tree | 2aa3c34c3380174cbb10df809c7607f26c19b018 /youtube_dl/extractor/mit.py | |
parent | bdc6b3fc64a03045b8130cdc824ee3f6c15eeff1 (diff) | |
parent | 10f5c016ec6262e5d29327e97fe4f3d1127ccdff (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'youtube_dl/extractor/mit.py')
-rw-r--r-- | youtube_dl/extractor/mit.py | 16 |
1 files changed, 7 insertions, 9 deletions
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index d09d03e36..52be9232f 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage( + raw_page = self._download_webpage( 'http://techtv.mit.edu/videos/%s' % video_id, video_id) - embed_page = self._download_webpage( - 'http://techtv.mit.edu/embeds/%s/' % video_id, video_id, - note=u'Downloading embed page') + clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page) base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)', - embed_page, u'base url') - formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page, + raw_page, u'base url') + formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page, u'video formats') formats = json.loads(formats_json) formats = sorted(formats, key=lambda f: f['bitrate']) - title = get_element_by_id('edit-title', webpage) - description = clean_html(get_element_by_id('edit-description', webpage)) + title = get_element_by_id('edit-title', clean_page) + description = clean_html(get_element_by_id('edit-description', clean_page)) thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'', - embed_page, u'thumbnail', flags=re.DOTALL) + raw_page, u'thumbnail', flags=re.DOTALL) return {'id': video_id, 'title': title, |