diff options
author | dirkf <fieldhouse@gmx.net> | 2023-05-03 13:08:58 +0100 |
---|---|---|
committer | dirkf <fieldhouse@gmx.net> | 2023-07-19 22:14:50 +0100 |
commit | 1e8ccdd2eb77901e18feb8a9d48e62d11651cd1e (patch) | |
tree | 56349943b0401d32cbb82a41bea46e90a431628d | |
parent | cb9366eda584fde2421140adf994eadc5bb6b943 (diff) |
[InfoExtractor] Support groups in _`search_regex()`, etc
-rw-r--r-- | youtube_dl/extractor/common.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7244e5df6..dbdf456f5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1005,6 +1005,8 @@ class InfoExtractor(object): if group is None: # return the first matching group return next(g for g in mobj.groups() if g is not None) + elif isinstance(group, (list, tuple)): + return tuple(mobj.group(g) for g in group) else: return mobj.group(group) elif default is not NO_DEFAULT: @@ -1020,10 +1022,9 @@ class InfoExtractor(object): Like _search_regex, but strips HTML tags and unescapes entities. """ res = self._search_regex(pattern, string, name, default, fatal, flags, group) - if res: - return clean_html(res).strip() - else: - return res + if isinstance(res, tuple): + return tuple(map(clean_html, res)) + return clean_html(res) def _get_netrc_login_info(self, netrc_machine=None): username = None |