aboutsummaryrefslogtreecommitdiff
path: root/yt_dlp/utils/traversal.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/utils/traversal.py')
-rw-r--r--yt_dlp/utils/traversal.py23
1 files changed, 13 insertions, 10 deletions
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py
index bc313d5c4..361f239ba 100644
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@@ -20,6 +20,7 @@ from ._utils import (
get_elements_html_by_class,
get_elements_html_by_attribute,
get_elements_by_attribute,
+ get_element_by_class,
get_element_html_by_attribute,
get_element_by_attribute,
get_element_html_by_id,
@@ -373,7 +374,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
@typing.overload
-def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
+def find_element(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
@typing.overload
@@ -381,14 +382,14 @@ def find_element(*, cls: str, html=False): ...
@typing.overload
-def find_element(*, id: str, tag: str | None = None, html=False): ...
+def find_element(*, id: str, tag: str | None = None, html=False, regex=False): ...
@typing.overload
-def find_element(*, tag: str, html=False): ...
+def find_element(*, tag: str, html=False, regex=False): ...
-def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
+def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False, regex=False):
# deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
ANY_TAG = r'[\w:.-]+'
@@ -397,17 +398,18 @@ def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=Fal
assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute
- return functools.partial(func, attr, value, tag=tag or ANY_TAG)
+ return functools.partial(func, attr, value, tag=tag or ANY_TAG, escape_value=not regex)
elif cls:
assert not id, 'Cannot match both cls and id'
assert tag is None, 'Cannot match both cls and tag'
- func = get_element_html_by_class if html else get_elements_by_class
+ assert not regex, 'Cannot use regex with cls'
+ func = get_element_html_by_class if html else get_element_by_class
return functools.partial(func, cls)
elif id:
func = get_element_html_by_id if html else get_element_by_id
- return functools.partial(func, id, tag=tag or ANY_TAG)
+ return functools.partial(func, id, tag=tag or ANY_TAG, escape_value=not regex)
index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
@@ -418,19 +420,20 @@ def find_elements(*, cls: str, html=False): ...
@typing.overload
-def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
+def find_elements(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
-def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
+def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False, regex=False):
# deliberately using `cls=` for ease of readability
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
if attr and value:
assert not cls, 'Cannot match both attr and cls'
func = get_elements_html_by_attribute if html else get_elements_by_attribute
- return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
+ return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+', escape_value=not regex)
assert not tag, 'Cannot match both cls and tag'
+ assert not regex, 'Cannot use regex with cls'
func = get_elements_html_by_class if html else get_elements_by_class
return functools.partial(func, cls)