From 67299f23d8b1894120e875edf97440de87e22308 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Mon, 1 Feb 2021 14:30:59 +0100
Subject: [youtube] Rewrite Extractor

- improve format sorting
- remove unused code(swf parsing, ...)
- fix series metadata extraction
- fix trailer video extraction
- improve error reporting
- extract video location
---
 youtube_dl/extractor/common.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index d5faa0eb7..8eb110f4e 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2064,7 +2064,7 @@ class InfoExtractor(object):
             })
         return entries
 
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(
             mpd_url, video_id,
             note=note or 'Downloading MPD manifest',
@@ -2078,10 +2078,9 @@ class InfoExtractor(object):
         mpd_base_url = base_url(urlh.geturl())
 
         return self._parse_mpd_formats(
-            mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
-            formats_dict=formats_dict, mpd_url=mpd_url)
+            mpd_doc, mpd_id, mpd_base_url, mpd_url)
 
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
+    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
         """
         Parse formats from MPD manifest.
         References:
@@ -2359,15 +2358,7 @@ class InfoExtractor(object):
                         else:
                             # Assuming direct URL to unfragmented media.
                             f['url'] = base_url
-
-                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
-                        # is not necessarily unique within a Period thus formats with
-                        # the same `format_id` are quite possible. There are numerous examples
-                        # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
-                        # https://github.com/ytdl-org/youtube-dl/issues/13919)
-                        full_info = formats_dict.get(representation_id, {}).copy()
-                        full_info.update(f)
-                        formats.append(full_info)
+                        formats.append(f)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
-- 
cgit v1.2.3


From 477bff69065872fff6bab5c3a1b0512018fbb6eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Wed, 10 Mar 2021 03:36:31 +0700
Subject: Introduce release_timestamp meta field (refs #28386)

---
 youtube_dl/extractor/common.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8eb110f4e..d3b6724df 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -230,8 +230,10 @@ class InfoExtractor(object):
     uploader:       Full name of the video uploader.
     license:        License name the video is licensed under.
     creator:        The creator of the video.
+    release_timestamp: UNIX timestamp of the moment the video was released.
     release_date:   The date (YYYYMMDD) when the video was released.
-    timestamp:      UNIX timestamp of the moment the video became available.
+    timestamp:      UNIX timestamp of the moment the video became available
+                    (uploaded).
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
-- 
cgit v1.2.3


From 1df2596f81695bf452ffbfd89596d115d9b2daf5 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Sat, 3 Apr 2021 07:54:02 +0100
Subject: [extractor/common] fix _get_cookies method for python 2(#20673,
 #23256, #20326, closes #28640)

---
 youtube_dl/extractor/common.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index d3b6724df..fcbf18ee6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2899,7 +2899,10 @@ class InfoExtractor(object):
         """ Return a compat_cookies.SimpleCookie with the cookies for the url """
         req = sanitized_Request(url)
         self._downloader.cookiejar.add_cookie_header(req)
-        return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+        cookie = req.get_header('Cookie')
+        if cookie and sys.version_info[0] == 2:
+            cookie = str(cookie)
+        return compat_cookies.SimpleCookie(cookie)
 
     def _apply_first_set_cookie_header(self, url_handle, cookie):
         """
-- 
cgit v1.2.3


From e165f5641fdf62975d3b6a40132a475c9cbaea2a Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Sun, 4 Apr 2021 15:52:14 +0100
Subject: [extractor/common] fix JSON-LD VideoObject author extraction

---
 youtube_dl/extractor/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index fcbf18ee6..8ef22779a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -70,6 +70,7 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
+    try_get,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -1282,7 +1283,7 @@ class InfoExtractor(object):
                 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
-                'uploader': str_or_none(e.get('author')),
+                'uploader': try_get(e, lambda x: x['author']['name'], compat_str),
                 'filesize': float_or_none(e.get('contentSize')),
                 'tbr': int_or_none(e.get('bitrate')),
                 'width': int_or_none(e.get('width')),
-- 
cgit v1.2.3


From 6beb1ac65b03415764c487fd139298f22e1e0313 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Sun, 4 Apr 2021 19:16:17 +0100
Subject: [extractor/common] keep support for non standard JSON-LD VideoObject
 author values

---
 youtube_dl/extractor/common.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8ef22779a..78ff5b6d0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -70,7 +70,6 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
-    try_get,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -1276,6 +1275,7 @@ class InfoExtractor(object):
 
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
+            author = e.get('author')
             info.update({
                 'url': url_or_none(e.get('contentUrl')),
                 'title': unescapeHTML(e.get('name')),
@@ -1283,7 +1283,11 @@ class InfoExtractor(object):
                 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
-                'uploader': try_get(e, lambda x: x['author']['name'], compat_str),
+                # author can be an instance of 'Organization' or 'Person' types.
+                # both types can have 'name' property(inherited from 'Thing' type). [1]
+                # however some websites are using 'Text' type instead.
+                # 1. https://schema.org/VideoObject
+                'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
                 'filesize': float_or_none(e.get('contentSize')),
                 'tbr': int_or_none(e.get('bitrate')),
                 'width': int_or_none(e.get('width')),
-- 
cgit v1.2.3


From 162bf9e10a4e6a08f5ed156a68054ef9b4d2b60e Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Sun, 4 Apr 2021 19:49:24 +0100
Subject: [compat] add compat_SimpleCookie

---
 youtube_dl/extractor/common.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 78ff5b6d0..af289d705 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -17,13 +17,13 @@ import math
 
 from ..compat import (
     compat_cookiejar_Cookie,
-    compat_cookies,
     compat_etree_Element,
     compat_etree_fromstring,
     compat_getpass,
     compat_integer_types,
     compat_http_client,
     compat_os_name,
+    compat_SimpleCookie,
     compat_str,
     compat_urllib_error,
     compat_urllib_parse_unquote,
@@ -2901,13 +2901,10 @@ class InfoExtractor(object):
         self._downloader.cookiejar.set_cookie(cookie)
 
     def _get_cookies(self, url):
-        """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+        """ Return a compat_SimpleCookie with the cookies for the url """
         req = sanitized_Request(url)
         self._downloader.cookiejar.add_cookie_header(req)
-        cookie = req.get_header('Cookie')
-        if cookie and sys.version_info[0] == 2:
-            cookie = str(cookie)
-        return compat_cookies.SimpleCookie(cookie)
+        return compat_SimpleCookie(req.get_header('Cookie'))
 
     def _apply_first_set_cookie_header(self, url_handle, cookie):
         """
-- 
cgit v1.2.3


From 70d0d4f9beba0e5b6d95ee50ad62ae7ab5be9be1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Tue, 6 Apr 2021 14:22:28 +0700
Subject: [compat] Use more conventional name for compat SimpleCookie

---
 youtube_dl/extractor/common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index af289d705..797c35fd5 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -17,13 +17,13 @@ import math
 
 from ..compat import (
     compat_cookiejar_Cookie,
+    compat_cookies_SimpleCookie,
     compat_etree_Element,
     compat_etree_fromstring,
     compat_getpass,
     compat_integer_types,
     compat_http_client,
     compat_os_name,
-    compat_SimpleCookie,
     compat_str,
     compat_urllib_error,
     compat_urllib_parse_unquote,
@@ -2901,10 +2901,10 @@ class InfoExtractor(object):
         self._downloader.cookiejar.set_cookie(cookie)
 
     def _get_cookies(self, url):
-        """ Return a compat_SimpleCookie with the cookies for the url """
+        """ Return a compat_cookies_SimpleCookie with the cookies for the url """
         req = sanitized_Request(url)
         self._downloader.cookiejar.add_cookie_header(req)
-        return compat_SimpleCookie(req.get_header('Cookie'))
+        return compat_cookies_SimpleCookie(req.get_header('Cookie'))
 
     def _apply_first_set_cookie_header(self, url_handle, cookie):
         """
-- 
cgit v1.2.3