diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/teletask.py | 50 |
1 files changed, 18 insertions, 32 deletions
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py index aa5535042..e54145105 100644 --- a/youtube_dl/extractor/teletask.py +++ b/youtube_dl/extractor/teletask.py @@ -1,13 +1,13 @@ -# coding: utf-8 from __future__ import unicode_literals + import re -import datetime from .common import InfoExtractor +from ..utils import unified_strdate class TeleTaskIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.tele-task.de/archive/video/html5/26168/', 'info_dict': { @@ -16,52 +16,38 @@ class TeleTaskIE(InfoExtractor): 'playlist': [{ 'md5': '290ef69fb2792e481169c3958dbfbd57', 'info_dict': { + 'id': '26168-speaker', + 'ext': 'mp4', 'title': 'Duplicate Detection', 'upload_date': '20141218', - 'id': 'speaker_26168', - 'ext': 'mp4', } - }, - { + }, { 'md5': 'e1e7218c5f0e4790015a437fcf6c71b4', 'info_dict': { + 'id': '26168-slides', + 'ext': 'mp4', 'title': 'Duplicate Detection', 'upload_date': '20141218', - 'id': 'slides_26168', - 'ext': 'mp4', } }] } def _real_extract(self, url): lecture_id = self._match_id(url) + webpage = self._download_webpage(url, lecture_id) title = self._html_search_regex( - r'itemprop="name">([^"]+)</a>', webpage, 'title') - url_speaker = self._html_search_regex( - r'class="speaker".*?src="([^"]+)"', webpage, 'video_url_speaker', flags=re.DOTALL) - url_slides = self._html_search_regex( - r'class="slides".*?src="([^"]+)"', webpage, 'video_url_slides', flags=re.DOTALL) - date = self._html_search_regex( - r'<td class="label">Date:</td><td>([^"]+)</td>', webpage, 'date') - date = datetime.datetime.strptime(date, '%d.%m.%Y').strftime('%Y%m%d') + r'itemprop="name">([^<]+)</a>', webpage, 'title') + upload_date = unified_strdate(self._html_search_regex( + r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False)) entries = [{ + 'id': '%s-%s' % (lecture_id, format_id), + 'url': video_url, 'title': title, - 'upload_date': date, - 'id': "speaker_"+lecture_id, - 'url': url_speaker, - }, - { - 'title': title, - 'upload_date': date, - 'id': "slides_"+lecture_id, - 'url': url_slides}] + 'upload_date': upload_date, + } for format_id, video_url in re.findall( + r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)] - return { - '_type': "playlist", - 'id': lecture_id, - 'title': title, - 'entries': entries, - } + return self.playlist_result(entries, lecture_id, title) |