diff options
| author | dirkf <fieldhouse@gmx.net> | 2023-05-05 19:25:42 +0100 | 
|---|---|---|
| committer | dirkf <fieldhouse@gmx.net> | 2023-07-19 22:14:50 +0100 | 
| commit | b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863 (patch) | |
| tree | caf46c5f7dd2af308ba0a69797097c8cd8ce77ac /test/test_InfoExtractor.py | |
| parent | 846522204104e3078c597fa1872465024a684ad6 (diff) | |
[InfoExtractor] Add search methods for Next/Nuxt.js from yt-dlp
* add _search_nextjs_data(), from https://github.com/yt-dlp/yt-dlp/pull/1386
  thanks selfisekai
* add _search_nuxt_data(), from https://github.com/yt-dlp/yt-dlp/pull/1921,
  thanks Lesmiscore, pukkandan
* add tests for the above
* also fix HTML5 type recognition and tests, from
  https://github.com/yt-dlp/yt-dlp/commit/222a230871fe4fe63f35c49590379c9a77116819,
  thanks Lesmiscore
* update extractors in PR using above, fix tests.
Diffstat (limited to 'test/test_InfoExtractor.py')
| -rw-r--r-- | test/test_InfoExtractor.py | 111 | 
1 files changed, 106 insertions, 5 deletions
| diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 6d25441db..34773fbd0 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -7,15 +7,33 @@ import io  import os  import sys  import unittest +  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value, http_server_port -from youtube_dl.compat import compat_etree_fromstring, compat_http_server -from youtube_dl.extractor.common import InfoExtractor -from youtube_dl.extractor import YoutubeIE, get_info_extractor -from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError  import threading +from test.helper import ( +    expect_dict, +    expect_value, +    FakeYDL, +    http_server_port, +) +from youtube_dl.compat import ( +    compat_etree_fromstring, +    compat_http_server, +) +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import ( +    get_info_extractor, +    YoutubeIE, +) +from youtube_dl.utils import ( +    encode_data_uri, +    ExtractorError, +    RegexNotFoundError, +    strip_jsonp, +) +  TEAPOT_RESPONSE_STATUS = 418  TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" @@ -100,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase):          self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)          self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) +    def test_search_nextjs_data(self): +        html = ''' +<!DOCTYPE html> +<html> +<head> +  <meta http-equiv="content-type" content= +  "text/html; charset=utf-8"> +  <meta name="viewport" content="width=device-width"> +  <title>Test _search_nextjs_data()</title> +</head> +<body> +  <div id="__next"> +    <div style="background-color:#17171E" class="FU" dir="ltr"> +      <div class="sc-93de261d-0 dyzzYE"> +        <div> +          <header class="HD"></header> +          <main class="MN"> +            <div style="height:0" class="HT0"> +              <div style="width:NaN%" data-testid= +              "stream-container" class="WDN"></div> +            </div> +          </main> +        </div> +        <footer class="sc-6e5faf91-0 dEGaHS"></footer> +      </div> +    </div> +  </div> +  <script id="__NEXT_DATA__" type="application/json"> +  {"props":{"pageProps":{"video":{"id":"testid"}}}} +  </script> +</body> +</html> +''' +        search = self.ie._search_nextjs_data(html, 'testID') +        self.assertEqual(search['props']['pageProps']['video']['id'], 'testid') + +    def test_search_nuxt_data(self): +        html = ''' +<!DOCTYPE html> +<html> +<head> +  <meta http-equiv="content-type" content= +  "text/html; charset=utf-8"> +  <title>Nuxt.js Test Page</title> +  <meta name="viewport" content= +  "width=device-width, initial-scale=1"> +  <meta data-hid="robots" name="robots" content="all"> +</head> +<body class="BD"> +  <div id="__layout"> +    <h1 class="H1">Example heading</h1> +    <div class="IN"> +      <p>Decoy text</p> +    </div> +  </div> +  <script> +  window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null)); +  </script> +  <script src="/_nuxt/a12345b.js" defer="defer"></script> +</body> +</html> +''' +        search = self.ie._search_nuxt_data(html, 'testID') +        self.assertEqual(search['track']['id'], 'testid') +      def test_search_json_ld_realworld(self):          # https://github.com/ytdl-org/youtube-dl/issues/23306          expect_dict( @@ -348,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase):                  }],              }) +        # from https://0000.studio/ +        # with type attribute but without extension in URL +        expect_dict( +            self, +            self.ie._parse_html5_media_entries( +                'https://0000.studio', +                r''' +                <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92" +                    controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain"> +                </video> +                ''', None)[0], +            { +                'formats': [{ +                    'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92', +                    'ext': 'mp4', +                }], +            }) +      def test_extract_jwplayer_data_realworld(self):          # from http://www.suffolk.edu/sjc/          expect_dict( | 
