aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2024-04-26 18:57:44 +0100
committerdirkf <fieldhouse@gmx.net>2024-05-30 15:46:36 +0100
commit21924742f79ccbd62d16ef4120518c6a5da8614e (patch)
tree89264cf78caf3a04808b1b03e9364e000c3d8e1c
parent768ccccd9b18bc48d129b12d14eace4ebb3655d8 (diff)
[InfoExtractor] Misc yt-dlp back-ports, etc
* add _yes_playlist() method * avoid crash using _NETRC_MACHINE * use _search_json() in _search_nextjs_data() * _search_nextjs_data() default is JSON, not text * test for above
-rw-r--r--test/test_InfoExtractor.py3
-rw-r--r--youtube_dl/extractor/common.py63
2 files changed, 50 insertions, 16 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index d55d6ad54..09100a1d6 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -153,6 +153,9 @@ class TestInfoExtractor(unittest.TestCase):
'''
search = self.ie._search_nextjs_data(html, 'testID')
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
+ search = self.ie._search_nextjs_data(
+ 'no next.js data here, move along', 'testID', default={'status': 0})
+ self.assertEqual(search['status'], 0)
def test_search_nuxt_data(self):
html = '''
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7fae9e57b..b10e84416 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1169,10 +1169,10 @@ class InfoExtractor(object):
def _get_netrc_login_info(self, netrc_machine=None):
username = None
password = None
- netrc_machine = netrc_machine or self._NETRC_MACHINE
if self._downloader.params.get('usenetrc', False):
try:
+ netrc_machine = netrc_machine or self._NETRC_MACHINE
info = netrc.netrc().authenticators(netrc_machine)
if info is not None:
username = info[0]
@@ -1180,7 +1180,7 @@ class InfoExtractor(object):
else:
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
- except (IOError, netrc.NetrcParseError) as err:
+ except (AttributeError, IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
@@ -1490,14 +1490,18 @@ class InfoExtractor(object):
return dict((k, v) for k, v in info.items() if v is not None)
def _search_nextjs_data(self, webpage, video_id, **kw):
- nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
- kw.pop('transform_source', None)
- next_data = self._search_regex(
- r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
- webpage, 'next.js data', group='nd', **kw)
- if not next_data:
- return {}
- return self._parse_json(next_data, video_id, **nkw)
+ # ..., *, transform_source=None, fatal=True, default=NO_DEFAULT
+
+ # TODO: remove this backward compat
+ default = kw.get('default', NO_DEFAULT)
+ if default == '{}':
+ kw['default'] = {}
+ kw = compat_kwargs(kw)
+
+ return self._search_json(
+ r'''<script\s[^>]*?\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>''',
+ webpage, 'next.js data', video_id, end_pattern='</script>',
+ **kw)
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
@@ -3296,12 +3300,16 @@ class InfoExtractor(object):
return ret
@classmethod
- def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
- """ Merge two subtitle dictionaries, language by language. """
- ret = dict(subtitle_dict1)
- for lang in subtitle_dict2:
- ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
- return ret
+ def _merge_subtitles(cls, subtitle_dict1, *subtitle_dicts, **kwargs):
+ """ Merge subtitle dictionaries, language by language. """
+
+ # ..., * , target=None
+ target = kwargs.get('target') or dict(subtitle_dict1)
+
+ for subtitle_dict in subtitle_dicts:
+ for lang in subtitle_dict:
+ target[lang] = cls._merge_subtitle_items(target.get(lang, []), subtitle_dict[lang])
+ return target
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False)
@@ -3334,6 +3342,29 @@ class InfoExtractor(object):
def _generic_title(self, url):
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ def _yes_playlist(self, playlist_id, video_id, *args, **kwargs):
+ # smuggled_data=None, *, playlist_label='playlist', video_label='video'
+ smuggled_data = args[0] if len(args) == 1 else kwargs.get('smuggled_data')
+ playlist_label = kwargs.get('playlist_label', 'playlist')
+ video_label = kwargs.get('video_label', 'video')
+
+ if not playlist_id or not video_id:
+ return not video_id
+
+ no_playlist = (smuggled_data or {}).get('force_noplaylist')
+ if no_playlist is not None:
+ return not no_playlist
+
+ video_id = '' if video_id is True else ' ' + video_id
+ noplaylist = self.get_param('noplaylist')
+ self.to_screen(
+ 'Downloading just the {0}{1} because of --no-playlist'.format(video_label, video_id)
+ if noplaylist else
+ 'Downloading {0}{1} - add --no-playlist to download just the {2}{3}'.format(
+ playlist_label, '' if playlist_id is True else ' ' + playlist_id,
+ video_label, video_id))
+ return not noplaylist
+
class SearchInfoExtractor(InfoExtractor):
"""