diff options
author | sepro <4618135+seproDev@users.noreply.github.com> | 2023-11-26 04:09:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-26 03:09:59 +0000 |
commit | 9751a457cfdb18bf99d9ee0d10e4e6a594502bbf (patch) | |
tree | 72d8f0b497ec27b3bfafc64194ec3882ee1c5a49 /yt_dlp/extractor/linuxacademy.py | |
parent | 5a230233d6fce06f4abd1fce0dc92b948e6f780b (diff) |
[cleanup] Remove dead extractors (#8604)
Closes #1609, Closes #3232, Closes #4763, Closes #6026, Closes #6322, Closes #7912
Authored by: seproDev
Diffstat (limited to 'yt_dlp/extractor/linuxacademy.py')
-rw-r--r-- | yt_dlp/extractor/linuxacademy.py | 238 |
1 files changed, 0 insertions, 238 deletions
diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py deleted file mode 100644 index 0b1644293..000000000 --- a/yt_dlp/extractor/linuxacademy.py +++ /dev/null @@ -1,238 +0,0 @@ -import json -import random - -from .common import InfoExtractor -from ..compat import compat_b64decode, compat_str -from ..networking.exceptions import HTTPError -from ..utils import ( - clean_html, - ExtractorError, - js_to_json, - parse_duration, - try_get, - unified_timestamp, - urlencode_postdata, - urljoin, -) - - -class LinuxAcademyIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?linuxacademy\.com/cp/ - (?: - courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| - modules/view/id/(?P<course_id>\d+) - ) - ''' - _TESTS = [{ - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', - 'info_dict': { - 'id': '7971-2', - 'ext': 'mp4', - 'title': 'What Is Data Science', - 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', - 'timestamp': int, # The timestamp and upload date changes - 'upload_date': r're:\d+', - 'duration': 304, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Requires Linux Academy account credentials', - }, { - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', - 'only_matching': True, - }, { - 'url': 'https://linuxacademy.com/cp/modules/view/id/154', - 'info_dict': { - 'id': '154', - 'title': 'AWS Certified Cloud Practitioner', - 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', - 'duration': 28835, - }, - 'playlist_count': 41, - 'skip': 'Requires Linux Academy account credentials', - }, { - 'url': 'https://linuxacademy.com/cp/modules/view/id/39', - 'info_dict': { - 'id': '39', - 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)', - 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f', - 'duration': 89280, - }, - 'playlist_count': 73, - 'skip': 'Requires Linux Academy account credentials', - }] - - _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' - _ORIGIN_URL = 'https://linuxacademy.com' - _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' - _NETRC_MACHINE = 'linuxacademy' - - def _perform_login(self, username, password): - def random_string(): - return ''.join(random.choices( - '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32)) - - webpage, urlh = self._download_webpage_handle( - self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ - 'client_id': self._CLIENT_ID, - 'response_type': 'token id_token', - 'response_mode': 'web_message', - 'redirect_uri': self._ORIGIN_URL, - 'scope': 'openid email user_impersonation profile', - 'audience': self._ORIGIN_URL, - 'state': random_string(), - 'nonce': random_string(), - }) - - login_data = self._parse_json( - self._search_regex( - r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, - 'login info', group='value'), None, - transform_source=lambda x: compat_b64decode(x).decode('utf-8') - )['extraParams'] - - login_data.update({ - 'client_id': self._CLIENT_ID, - 'redirect_uri': self._ORIGIN_URL, - 'tenant': 'lacausers', - 'connection': 'Username-Password-ACG-Proxy', - 'username': username, - 'password': password, - 'sso': 'true', - }) - - login_state_url = urlh.url - - try: - login_page = self._download_webpage( - 'https://login.linuxacademy.com/usernamepassword/login', None, - 'Downloading login page', data=json.dumps(login_data).encode(), - headers={ - 'Content-Type': 'application/json', - 'Origin': 'https://login.linuxacademy.com', - 'Referer': login_state_url, - }) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 401: - error = self._parse_json(e.cause.response.read(), None) - message = error.get('description') or error['code'] - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, message), expected=True) - raise - - callback_page, urlh = self._download_webpage_handle( - 'https://login.linuxacademy.com/login/callback', None, - 'Downloading callback page', - data=urlencode_postdata(self._hidden_inputs(login_page)), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Origin': 'https://login.linuxacademy.com', - 'Referer': login_state_url, - }) - - access_token = self._search_regex( - r'access_token=([^=&]+)', urlh.url, - 'access token', default=None) - if not access_token: - access_token = self._parse_json( - self._search_regex( - r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, - 'authorization response'), None, - transform_source=js_to_json)['response']['access_token'] - - self._download_webpage( - 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' - % access_token, None, 'Downloading token validation page') - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') - item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) - - webpage = self._download_webpage(url, item_id) - - # course path - if course_id: - module = self._parse_json( - self._search_regex( - r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'), - item_id) - entries = [] - chapter_number = None - chapter = None - chapter_id = None - for item in module['items']: - if not isinstance(item, dict): - continue - - def type_field(key): - return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() - type_fields = (type_field('name'), type_field('slug')) - # Move to next module section - if 'section' in type_fields: - chapter = item.get('course_name') - chapter_id = item.get('course_module') - chapter_number = 1 if not chapter_number else chapter_number + 1 - continue - # Skip non-lessons - if 'lesson' not in type_fields: - continue - lesson_url = urljoin(url, item.get('url')) - if not lesson_url: - continue - title = item.get('title') or item.get('lesson_name') - description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) - entries.append({ - '_type': 'url_transparent', - 'url': lesson_url, - 'ie_key': LinuxAcademyIE.ie_key(), - 'title': title, - 'description': description, - 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), - 'duration': parse_duration(item.get('duration')), - 'chapter': chapter, - 'chapter_id': chapter_id, - 'chapter_number': chapter_number, - }) - return { - '_type': 'playlist', - 'entries': entries, - 'id': course_id, - 'title': module.get('title'), - 'description': module.get('md_desc') or clean_html(module.get('desc')), - 'duration': parse_duration(module.get('duration')), - } - - # single video path - m3u8_url = self._parse_json( - self._search_regex( - r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), - item_id)[0]['file'] - formats = self._extract_m3u8_formats( - m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - info = { - 'id': item_id, - 'formats': formats, - } - lesson = self._parse_json( - self._search_regex( - (r'window\.lesson\s*=\s*({.+?})\s*;', - r'player\.lesson\s*=\s*({.+?})\s*;'), - webpage, 'lesson', default='{}'), item_id, fatal=False) - if lesson: - info.update({ - 'title': lesson.get('lesson_name'), - 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), - 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), - 'duration': parse_duration(lesson.get('duration')), - }) - if not info.get('title'): - info['title'] = self._search_regex( - (r'>Lecture\s*:\s*(?P<value>[^<]+)', - r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, - 'title', group='value') - return info |