aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/rbgtum.py
blob: da48ebbc49493c636b915f675974e052041553ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor


class RbgTumIE(InfoExtractor):
    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
    _TESTS = [{
        # Combined view
        'url': 'https://live.rbg.tum.de/w/cpp/22128',
        'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
        'info_dict': {
            'id': 'cpp/22128',
            'ext': 'mp4',
            'title': 'Lecture: October 18. 2022',
            'series': 'Concepts of C++ programming (IN2377)',
        }
    }, {
        # Presentation only
        'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
        'md5': '36c584272179f3e56b0db5d880639cba',
        'info_dict': {
            'id': 'I2DL/12349/PRES',
            'ext': 'mp4',
            'title': 'Lecture 3: Introduction to Neural Networks',
            'series': 'Introduction to Deep Learning (IN2346)',
        }
    }, {
        # Camera only
        'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
        'md5': 'e04189d92ff2f56aedf5cede65d37aad',
        'info_dict': {
            'id': 'fvv-info/16130/CAM',
            'ext': 'mp4',
            'title': 'Fachschaftsvollversammlung',
            'series': 'Fachschaftsvollversammlung Informatik',
        }
    }, ]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
        lecture_series_title = self._html_search_regex(
            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')

        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': lecture_title,
            'series': lecture_series_title,
            'formats': formats,
        }


class RbgTumCourseIE(InfoExtractor):
    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
    _TESTS = [{
        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 13,
    }, {
        'url': 'https://live.rbg.tum.de/course/2022/W/set',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 6,
    }, ]

    def _real_extract(self, url):
        course_id = self._match_id(url)
        webpage = self._download_webpage(url, course_id)

        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')

        lecture_urls = []
        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))

        return self.playlist_result(lecture_urls, course_id, lecture_series_title)