diff options
author | bashonly <88596187+bashonly@users.noreply.github.com> | 2024-11-03 18:19:45 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-03 18:19:45 +0000 |
commit | b103aca24d35b72b405c340357dc01a0ed534281 (patch) | |
tree | 5635526925bd1b8bf618e0b8746a0553a2aaff8f /yt_dlp/utils/traversal.py | |
parent | 5c7a5aaab27e9c3cb367b663a6136ca58866e547 (diff) |
[utils] Fix and improve `find_element` and `find_elements` (#11443)
Fix d710a6ca7c622705c0c8c8a3615916f531137d5d
Authored by: bashonly, Grub4K
Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
Diffstat (limited to 'yt_dlp/utils/traversal.py')
-rw-r--r-- | yt_dlp/utils/traversal.py | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py index bc313d5c4..361f239ba 100644 --- a/yt_dlp/utils/traversal.py +++ b/yt_dlp/utils/traversal.py @@ -20,6 +20,7 @@ from ._utils import ( get_elements_html_by_class, get_elements_html_by_attribute, get_elements_by_attribute, + get_element_by_class, get_element_html_by_attribute, get_element_by_attribute, get_element_html_by_id, @@ -373,7 +374,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): @typing.overload -def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ... +def find_element(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ... @typing.overload @@ -381,14 +382,14 @@ def find_element(*, cls: str, html=False): ... @typing.overload -def find_element(*, id: str, tag: str | None = None, html=False): ... +def find_element(*, id: str, tag: str | None = None, html=False, regex=False): ... @typing.overload -def find_element(*, tag: str, html=False): ... +def find_element(*, tag: str, html=False, regex=False): ... -def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False): +def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False, regex=False): # deliberately using `id=` and `cls=` for ease of readability assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required' ANY_TAG = r'[\w:.-]+' @@ -397,17 +398,18 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal assert not cls, 'Cannot match both attr and cls' assert not id, 'Cannot match both attr and id' func = get_element_html_by_attribute if html else get_element_by_attribute - return functools.partial(func, attr, value, tag=tag or ANY_TAG) + return functools.partial(func, attr, value, tag=tag or ANY_TAG, escape_value=not regex) elif cls: assert not id, 'Cannot match both cls and id' assert tag is None, 'Cannot match both cls and tag' - func = get_element_html_by_class if html else get_elements_by_class + assert not regex, 'Cannot use regex with cls' + func = get_element_html_by_class if html else get_element_by_class return functools.partial(func, cls) elif id: func = get_element_html_by_id if html else get_element_by_id - return functools.partial(func, id, tag=tag or ANY_TAG) + return functools.partial(func, id, tag=tag or ANY_TAG, escape_value=not regex) index = int(bool(html)) return lambda html: get_element_text_and_html_by_tag(tag, html)[index] @@ -418,19 +420,20 @@ def find_elements(*, cls: str, html=False): ... @typing.overload -def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ... +def find_elements(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ... -def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False): +def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False, regex=False): # deliberately using `cls=` for ease of readability assert cls or (attr and value), 'One of cls or (attr AND value) is required' if attr and value: assert not cls, 'Cannot match both attr and cls' func = get_elements_html_by_attribute if html else get_elements_by_attribute - return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+') + return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+', escape_value=not regex) assert not tag, 'Cannot match both cls and tag' + assert not regex, 'Cannot use regex with cls' func = get_elements_html_by_class if html else get_elements_by_class return functools.partial(func, cls) |