diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 30 | 
1 files changed, 30 insertions, 0 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 210c47fce..a0234a3a8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -35,6 +35,7 @@ import xml.etree.ElementTree  import zlib  from .compat import ( +    compat_HTMLParser,      compat_basestring,      compat_chr,      compat_etree_fromstring, @@ -272,6 +273,35 @@ def get_element_by_attribute(attribute, value, html):      return unescapeHTML(res) +class HTMLAttributeParser(compat_HTMLParser): +    """Trivial HTML parser to gather the attributes for a single element""" +    def __init__(self): +        self.attrs = { } +        compat_HTMLParser.__init__(self) + +    def handle_starttag(self, tag, attrs): +        self.attrs = dict(attrs) + +def extract_attributes(html_element): +    """Given a string for an HTML element such as +    <el +         a="foo" B="bar" c="&98;az" d=boz +         empty= noval entity="&" +         sq='"' dq="'" +    > +    Decode and return a dictionary of attributes. +    { +        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz', +        'empty': '', 'noval': None, 'entity': '&', +        'sq': '"', 'dq': '\'' +    }. +    NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, +    but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. +    """ +    parser = HTMLAttributeParser() +    parser.feed(html_element) +    parser.close() +    return parser.attrs  def clean_html(html):      """Clean an HTML snippet into a readable string""" | 
