aboutsummaryrefslogtreecommitdiff
path: root/yt_dlp/utils.py
diff options
context:
space:
mode:
authorFrancesco Frassinelli <fraph24@gmail.com>2021-11-05 17:54:56 +0100
committerGitHub <noreply@github.com>2021-11-05 22:24:56 +0530
commit73673ccff3fdc915bd6b54a298393eaf668b6506 (patch)
tree5286568f1d83cc43347da0d4a54010773af9325d /yt_dlp/utils.py
parentaeb2a9ad27ba8e70803a0960f8bd3d8ac2d2aa2b (diff)
[RaiplayRadio] Add extractors (#780)
Original PR: https://github.com/ytdl-org/youtube-dl/pull/21837 Authored by: frafra
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r--yt_dlp/utils.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 55e452a15..17f34a853 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2006,6 +2006,23 @@ class HTMLAttributeParser(compat_HTMLParser):
self.attrs = dict(attrs)
+class HTMLListAttrsParser(compat_HTMLParser):
+ """HTML parser to gather the attributes for the elements of a list"""
+
+ def __init__(self):
+ compat_HTMLParser.__init__(self)
+ self.items = []
+ self._level = 0
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'li' and self._level == 0:
+ self.items.append(dict(attrs))
+ self._level += 1
+
+ def handle_endtag(self, tag):
+ self._level -= 1
+
+
def extract_attributes(html_element):
"""Given a string for an HTML element such as
<el
@@ -2032,6 +2049,15 @@ def extract_attributes(html_element):
return parser.attrs
+def parse_list(webpage):
+ """Given a string for an series of HTML <li> elements,
+ return a dictionary of their attributes"""
+ parser = HTMLListAttrsParser()
+ parser.feed(webpage)
+ parser.close()
+ return parser.items
+
+
def clean_html(html):
"""Clean an HTML snippet into a readable string"""