aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2023-05-05 19:25:42 +0100
committerdirkf <fieldhouse@gmx.net>2023-07-19 22:14:50 +0100
commitb2741f2654e6ddfebc1771b5d5fadb5fd6fe3863 (patch)
treecaf46c5f7dd2af308ba0a69797097c8cd8ce77ac /test
parent846522204104e3078c597fa1872465024a684ad6 (diff)
downloadyoutube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.tar.xz
[InfoExtractor] Add search methods for Next/Nuxt.js from yt-dlp
* add _search_nextjs_data(), from https://github.com/yt-dlp/yt-dlp/pull/1386 thanks selfisekai * add _search_nuxt_data(), from https://github.com/yt-dlp/yt-dlp/pull/1921, thanks Lesmiscore, pukkandan * add tests for the above * also fix HTML5 type recognition and tests, from https://github.com/yt-dlp/yt-dlp/commit/222a230871fe4fe63f35c49590379c9a77116819, thanks Lesmiscore * update extractors in PR using above, fix tests.
Diffstat (limited to 'test')
-rw-r--r--test/test_InfoExtractor.py111
1 files changed, 106 insertions, 5 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 6d25441db..34773fbd0 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -7,15 +7,33 @@ import io
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from youtube_dl.compat import compat_etree_fromstring, compat_http_server
-from youtube_dl.extractor.common import InfoExtractor
-from youtube_dl.extractor import YoutubeIE, get_info_extractor
-from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
import threading
+from test.helper import (
+ expect_dict,
+ expect_value,
+ FakeYDL,
+ http_server_port,
+)
+from youtube_dl.compat import (
+ compat_etree_fromstring,
+ compat_http_server,
+)
+from youtube_dl.extractor.common import InfoExtractor
+from youtube_dl.extractor import (
+ get_info_extractor,
+ YoutubeIE,
+)
+from youtube_dl.utils import (
+ encode_data_uri,
+ ExtractorError,
+ RegexNotFoundError,
+ strip_jsonp,
+)
+
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
@@ -100,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
+ def test_search_nextjs_data(self):
+ html = '''
+<!DOCTYPE html>
+<html>
+<head>
+ <meta http-equiv="content-type" content=
+ "text/html; charset=utf-8">
+ <meta name="viewport" content="width=device-width">
+ <title>Test _search_nextjs_data()</title>
+</head>
+<body>
+ <div id="__next">
+ <div style="background-color:#17171E" class="FU" dir="ltr">
+ <div class="sc-93de261d-0 dyzzYE">
+ <div>
+ <header class="HD"></header>
+ <main class="MN">
+ <div style="height:0" class="HT0">
+ <div style="width:NaN%" data-testid=
+ "stream-container" class="WDN"></div>
+ </div>
+ </main>
+ </div>
+ <footer class="sc-6e5faf91-0 dEGaHS"></footer>
+ </div>
+ </div>
+ </div>
+ <script id="__NEXT_DATA__" type="application/json">
+ {"props":{"pageProps":{"video":{"id":"testid"}}}}
+ </script>
+</body>
+</html>
+'''
+ search = self.ie._search_nextjs_data(html, 'testID')
+ self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
+
+ def test_search_nuxt_data(self):
+ html = '''
+<!DOCTYPE html>
+<html>
+<head>
+ <meta http-equiv="content-type" content=
+ "text/html; charset=utf-8">
+ <title>Nuxt.js Test Page</title>
+ <meta name="viewport" content=
+ "width=device-width, initial-scale=1">
+ <meta data-hid="robots" name="robots" content="all">
+</head>
+<body class="BD">
+ <div id="__layout">
+ <h1 class="H1">Example heading</h1>
+ <div class="IN">
+ <p>Decoy text</p>
+ </div>
+ </div>
+ <script>
+ window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null));
+ </script>
+ <script src="/_nuxt/a12345b.js" defer="defer"></script>
+</body>
+</html>
+'''
+ search = self.ie._search_nuxt_data(html, 'testID')
+ self.assertEqual(search['track']['id'], 'testid')
+
def test_search_json_ld_realworld(self):
# https://github.com/ytdl-org/youtube-dl/issues/23306
expect_dict(
@@ -348,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase):
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+ <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+ controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(