diff options
Diffstat (limited to 'youtube_dl/extractor/teletask.py')
| -rw-r--r-- | youtube_dl/extractor/teletask.py | 50 | 
1 files changed, 18 insertions, 32 deletions
| diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py index aa5535042..e54145105 100644 --- a/youtube_dl/extractor/teletask.py +++ b/youtube_dl/extractor/teletask.py @@ -1,13 +1,13 @@ -# coding: utf-8  from __future__ import unicode_literals +  import re -import datetime  from .common import InfoExtractor +from ..utils import unified_strdate  class TeleTaskIE(InfoExtractor): -    _VALID_URL = r'http?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)/' +    _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'      _TEST = {          'url': 'http://www.tele-task.de/archive/video/html5/26168/',          'info_dict': { @@ -16,52 +16,38 @@ class TeleTaskIE(InfoExtractor):          'playlist': [{              'md5': '290ef69fb2792e481169c3958dbfbd57',              'info_dict': { +                'id': '26168-speaker', +                'ext': 'mp4',                  'title': 'Duplicate Detection',                  'upload_date': '20141218', -                'id': 'speaker_26168', -                'ext': 'mp4',              } -        }, -            { +        }, {              'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',              'info_dict': { +                'id': '26168-slides', +                'ext': 'mp4',                  'title': 'Duplicate Detection',                  'upload_date': '20141218', -                'id': 'slides_26168', -                'ext': 'mp4',              }          }]      }      def _real_extract(self, url):          lecture_id = self._match_id(url) +          webpage = self._download_webpage(url, lecture_id)          title = self._html_search_regex( -            r'itemprop="name">([^"]+)</a>', webpage, 'title') -        url_speaker = self._html_search_regex( -            r'class="speaker".*?src="([^"]+)"', webpage, 'video_url_speaker', flags=re.DOTALL) -        url_slides = self._html_search_regex( -            r'class="slides".*?src="([^"]+)"', webpage, 'video_url_slides', flags=re.DOTALL) -        date = self._html_search_regex( -            r'<td class="label">Date:</td><td>([^"]+)</td>', webpage, 'date') -        date = datetime.datetime.strptime(date, '%d.%m.%Y').strftime('%Y%m%d') +            r'itemprop="name">([^<]+)</a>', webpage, 'title') +        upload_date = unified_strdate(self._html_search_regex( +            r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))          entries = [{ +            'id': '%s-%s' % (lecture_id, format_id), +            'url': video_url,              'title': title, -            'upload_date': date, -            'id': "speaker_"+lecture_id, -            'url': url_speaker, -        }, -            { -            'title': title, -            'upload_date': date, -            'id': "slides_"+lecture_id, -            'url': url_slides}] +            'upload_date': upload_date, +        } for format_id, video_url in re.findall( +            r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)] -        return { -            '_type': "playlist", -            'id': lecture_id, -            'title': title, -            'entries': entries, -        } +        return self.playlist_result(entries, lecture_id, title) | 
