From 67299f23d8b1894120e875edf97440de87e22308 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Feb 2021 14:30:59 +0100 Subject: [youtube] Rewrite Extractor - improve format sorting - remove unused code(swf parsing, ...) - fix series metadata extraction - fix trailer video extraction - improve error reporting - extract video location --- youtube_dl/extractor/common.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d5faa0eb7..8eb110f4e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2064,7 +2064,7 @@ class InfoExtractor(object): }) return entries - def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}): + def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): res = self._download_xml_handle( mpd_url, video_id, note=note or 'Downloading MPD manifest', @@ -2078,10 +2078,9 @@ class InfoExtractor(object): mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( - mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url, - formats_dict=formats_dict, mpd_url=mpd_url) + mpd_doc, mpd_id, mpd_base_url, mpd_url) - def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): + def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): """ Parse formats from MPD manifest. References: @@ -2359,15 +2358,7 @@ class InfoExtractor(object): else: # Assuming direct URL to unfragmented media. f['url'] = base_url - - # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation - # is not necessarily unique within a Period thus formats with - # the same `format_id` are quite possible. There are numerous examples - # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111, - # https://github.com/ytdl-org/youtube-dl/issues/13919) - full_info = formats_dict.get(representation_id, {}).copy() - full_info.update(f) - formats.append(full_info) + formats.append(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats -- cgit v1.2.3 From 477bff69065872fff6bab5c3a1b0512018fbb6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Mar 2021 03:36:31 +0700 Subject: Introduce release_timestamp meta field (refs #28386) --- youtube_dl/extractor/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8eb110f4e..d3b6724df 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -230,8 +230,10 @@ class InfoExtractor(object): uploader: Full name of the video uploader. license: License name the video is licensed under. creator: The creator of the video. + release_timestamp: UNIX timestamp of the moment the video was released. release_date: The date (YYYYMMDD) when the video was released. - timestamp: UNIX timestamp of the moment the video became available. + timestamp: UNIX timestamp of the moment the video became available + (uploaded). upload_date: Video upload date (YYYYMMDD). If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. -- cgit v1.2.3 From 1df2596f81695bf452ffbfd89596d115d9b2daf5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 3 Apr 2021 07:54:02 +0100 Subject: [extractor/common] fix _get_cookies method for python 2(#20673, #23256, #20326, closes #28640) --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d3b6724df..fcbf18ee6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2899,7 +2899,10 @@ class InfoExtractor(object): """ Return a compat_cookies.SimpleCookie with the cookies for the url """ req = sanitized_Request(url) self._downloader.cookiejar.add_cookie_header(req) - return compat_cookies.SimpleCookie(req.get_header('Cookie')) + cookie = req.get_header('Cookie') + if cookie and sys.version_info[0] == 2: + cookie = str(cookie) + return compat_cookies.SimpleCookie(cookie) def _apply_first_set_cookie_header(self, url_handle, cookie): """ -- cgit v1.2.3 From e165f5641fdf62975d3b6a40132a475c9cbaea2a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Apr 2021 15:52:14 +0100 Subject: [extractor/common] fix JSON-LD VideoObject author extraction --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fcbf18ee6..8ef22779a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -70,6 +70,7 @@ from ..utils import ( str_or_none, str_to_int, strip_or_none, + try_get, unescapeHTML, unified_strdate, unified_timestamp, @@ -1282,7 +1283,7 @@ class InfoExtractor(object): 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), - 'uploader': str_or_none(e.get('author')), + 'uploader': try_get(e, lambda x: x['author']['name'], compat_str), 'filesize': float_or_none(e.get('contentSize')), 'tbr': int_or_none(e.get('bitrate')), 'width': int_or_none(e.get('width')), -- cgit v1.2.3 From 6beb1ac65b03415764c487fd139298f22e1e0313 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Apr 2021 19:16:17 +0100 Subject: [extractor/common] keep support for non standard JSON-LD VideoObject author values --- youtube_dl/extractor/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8ef22779a..78ff5b6d0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -70,7 +70,6 @@ from ..utils import ( str_or_none, str_to_int, strip_or_none, - try_get, unescapeHTML, unified_strdate, unified_timestamp, @@ -1276,6 +1275,7 @@ class InfoExtractor(object): def extract_video_object(e): assert e['@type'] == 'VideoObject' + author = e.get('author') info.update({ 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), @@ -1283,7 +1283,11 @@ class InfoExtractor(object): 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), - 'uploader': try_get(e, lambda x: x['author']['name'], compat_str), + # author can be an instance of 'Organization' or 'Person' types. + # both types can have 'name' property(inherited from 'Thing' type). [1] + # however some websites are using 'Text' type instead. + # 1. https://schema.org/VideoObject + 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None, 'filesize': float_or_none(e.get('contentSize')), 'tbr': int_or_none(e.get('bitrate')), 'width': int_or_none(e.get('width')), -- cgit v1.2.3 From 162bf9e10a4e6a08f5ed156a68054ef9b4d2b60e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Apr 2021 19:49:24 +0100 Subject: [compat] add compat_SimpleCookie --- youtube_dl/extractor/common.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 78ff5b6d0..af289d705 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -17,13 +17,13 @@ import math from ..compat import ( compat_cookiejar_Cookie, - compat_cookies, compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, compat_http_client, compat_os_name, + compat_SimpleCookie, compat_str, compat_urllib_error, compat_urllib_parse_unquote, @@ -2901,13 +2901,10 @@ class InfoExtractor(object): self._downloader.cookiejar.set_cookie(cookie) def _get_cookies(self, url): - """ Return a compat_cookies.SimpleCookie with the cookies for the url """ + """ Return a compat_SimpleCookie with the cookies for the url """ req = sanitized_Request(url) self._downloader.cookiejar.add_cookie_header(req) - cookie = req.get_header('Cookie') - if cookie and sys.version_info[0] == 2: - cookie = str(cookie) - return compat_cookies.SimpleCookie(cookie) + return compat_SimpleCookie(req.get_header('Cookie')) def _apply_first_set_cookie_header(self, url_handle, cookie): """ -- cgit v1.2.3 From 70d0d4f9beba0e5b6d95ee50ad62ae7ab5be9be1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 6 Apr 2021 14:22:28 +0700 Subject: [compat] Use more conventional name for compat SimpleCookie --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index af289d705..797c35fd5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -17,13 +17,13 @@ import math from ..compat import ( compat_cookiejar_Cookie, + compat_cookies_SimpleCookie, compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, compat_http_client, compat_os_name, - compat_SimpleCookie, compat_str, compat_urllib_error, compat_urllib_parse_unquote, @@ -2901,10 +2901,10 @@ class InfoExtractor(object): self._downloader.cookiejar.set_cookie(cookie) def _get_cookies(self, url): - """ Return a compat_SimpleCookie with the cookies for the url """ + """ Return a compat_cookies_SimpleCookie with the cookies for the url """ req = sanitized_Request(url) self._downloader.cookiejar.add_cookie_header(req) - return compat_SimpleCookie(req.get_header('Cookie')) + return compat_cookies_SimpleCookie(req.get_header('Cookie')) def _apply_first_set_cookie_header(self, url_handle, cookie): """ -- cgit v1.2.3