diff options
Diffstat (limited to 'youtube_dl/extractor/lecture2go.py')
| -rw-r--r-- | youtube_dl/extractor/lecture2go.py | 62 | 
1 files changed, 62 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dl/extractor/lecture2go.py new file mode 100644 index 000000000..40a3d2346 --- /dev/null +++ b/youtube_dl/extractor/lecture2go.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    determine_ext, +    parse_duration, +    int_or_none, +) + + +class Lecture2GoIE(InfoExtractor): +    _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)' +    _TEST = { +        'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473', +        'md5': 'ac02b570883020d208d405d5a3fd2f7f', +        'info_dict': { +            'id': '17473', +            'ext': 'flv', +            'title': '2 - Endliche Automaten und reguläre Sprachen', +            'creator': 'Frank Heitmann', +            'duration': 5220, +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title') + +        formats = [] +        for url in set(re.findall(r'"src","([^"]+)"', webpage)): +            ext = determine_ext(url) +            if ext == 'f4m': +                formats.extend(self._extract_f4m_formats(url, video_id)) +            elif ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats(url, video_id)) +            else: +                formats.append({ +                    'url': url, +                }) + +        self._sort_formats(formats) + +        creator = self._html_search_regex( +            r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False) +        duration = parse_duration(self._html_search_regex( +            r'Duration:\s*</em>\s*<em[^>]*>([^<]+)</em>', webpage, 'duration', fatal=False)) +        view_count = int_or_none(self._html_search_regex( +            r'Views:\s*</em>\s*<em[^>]+>(\d+)</em>', webpage, 'view count', fatal=False)) + +        return { +            'id': video_id, +            'title': title, +            'formats': formats, +            'creator': creator, +            'duration': duration, +            'view_count': view_count, +        } | 
