aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkrichbanana <77071421+krichbanana@users.noreply.github.com>2024-11-17 09:12:26 -0500
committerGitHub <noreply@github.com>2024-11-17 14:12:26 +0000
commiteb15fd5a32d8b35ef515f7a3d1158c03025648ff (patch)
tree8c9b001eec7cb5aa16adac9a1af9d4a745ffb451
parent7cecd299e4a5ef1f0f044b2fedc26f17e41f15e3 (diff)
[ie/kenh14] Add extractor (#3996)
Closes #3937 Authored by: krichbanana, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
-rw-r--r--yt_dlp/extractor/_extractors.py4
-rw-r--r--yt_dlp/extractor/kenh14.py160
2 files changed, 164 insertions, 0 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 25a233a2d..af903f307 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -946,6 +946,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE
+from .kenh14 import (
+ Kenh14PlaylistIE,
+ Kenh14VideoIE,
+)
from .khanacademy import (
KhanAcademyIE,
KhanAcademyUnitIE,
diff --git a/yt_dlp/extractor/kenh14.py b/yt_dlp/extractor/kenh14.py
new file mode 100644
index 000000000..3c46020e8
--- /dev/null
+++ b/yt_dlp/extractor/kenh14.py
@@ -0,0 +1,160 @@
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_attribute,
+ get_elements_html_by_class,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ remove_start,
+ strip_or_none,
+ unescapeHTML,
+ update_url,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class Kenh14VideoIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
+ _TESTS = [{
+ 'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
+ 'md5': '1ed67f9c3a1e74acf15db69590cf6210',
+ 'info_dict': {
+ 'id': '316173',
+ 'ext': 'mp4',
+ 'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
+ 'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
+ 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
+ 'tags': [],
+ 'uploader': 'Unbox Therapy',
+ 'upload_date': '20220517',
+ 'view_count': int,
+ 'duration': 722.86,
+ 'timestamp': 1652764468,
+ },
+ }, {
+ 'url': 'https://video.kenh14.vn/video-316174.chn',
+ 'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
+ 'info_dict': {
+ 'id': '316174',
+ 'ext': 'mp4',
+ 'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
+ 'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
+ 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
+ 'tags': [],
+ 'upload_date': '20220517',
+ 'view_count': int,
+ 'duration': 70.04,
+ 'timestamp': 1652766021,
+ },
+ }, {
+ 'url': 'https://video.kenh14.vn/0-344740.chn',
+ 'md5': 'b843495d5e728142c8870c09b46df2a9',
+ 'info_dict': {
+ 'id': '344740',
+ 'ext': 'mov',
+ 'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
+ 'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
+ 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
+ 'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
+ 'uploader': 'Quang Vũ',
+ 'upload_date': '20241024',
+ 'view_count': int,
+ 'duration': 198.88,
+ 'timestamp': 1729741590,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
+ direct_url = attrs['data-vid']
+
+ metadata = self._download_json(
+ 'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
+ remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
+
+ formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
+ subtitles = {}
+ video_data = self._download_json(
+ f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
+ if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ hls_url, video_id, m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ dash_url, video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ **traverse_obj(metadata, {
+ 'duration': ('duration', {parse_duration}),
+ 'uploader': ('author', {strip_or_none}),
+ 'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
+ 'view_count': ('views', {int_or_none}),
+ }),
+ 'id': video_id,
+ 'title': (
+ traverse_obj(metadata, ('title', {strip_or_none}))
+ or clean_html(self._og_search_title(webpage))
+ or clean_html(get_element_by_class('vdbw-title', webpage))),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': (
+ clean_html(self._og_search_description(webpage))
+ or clean_html(get_element_by_class('vdbw-sapo', webpage))),
+ 'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
+ 'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
+ {lambda x: x.split(';')}, ..., filter)),
+ }
+
+
+class Kenh14PlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
+ _TESTS = [{
+ 'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
+ 'info_dict': {
+ 'id': '71',
+ 'title': 'Trần Tình (Naked love) mùa 2',
+ 'description': 'md5:e9522339304956dea931722dd72eddb2',
+ 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
+ },
+ 'playlist_count': 9,
+ }, {
+ 'url': 'https://video.kenh14.vn/playlist/0-72.chn',
+ 'info_dict': {
+ 'id': '72',
+ 'title': 'Lau Lại Đầu Từ',
+ 'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
+ 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
+ },
+ 'playlist_count': 6,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ category_detail = get_element_by_class('category-detail', webpage) or ''
+ embed_info = traverse_obj(
+ self._yield_json_ld(webpage, playlist_id),
+ (lambda _, v: v['name'] and v['alternateName'], any)) or {}
+
+ return self.playlist_from_matches(
+ get_elements_html_by_class('video-item', webpage), playlist_id,
+ (clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
+ getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
+ ie=Kenh14VideoIE, playlist_description=(
+ clean_html(get_element_by_class('description', category_detail))
+ or unescapeHTML(embed_info.get('alternateName'))),
+ thumbnail=traverse_obj(
+ self._og_search_thumbnail(webpage),
+ ({url_or_none}, {update_url(query=None)})))