diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 19 | 
1 files changed, 12 insertions, 7 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4e64f327a..a5196b0ae 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -214,10 +214,11 @@ def htmlentity_transform(matchobj):      return (u'&%s;' % entity)  compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix -class IDParser(compat_html_parser.HTMLParser): -    """Modified HTMLParser that isolates a tag with the specified id""" -    def __init__(self, id): -        self.id = id +class AttrParser(compat_html_parser.HTMLParser): +    """Modified HTMLParser that isolates a tag with the specified attribute""" +    def __init__(self, attribute, value): +        self.attribute = attribute +        self.value = value          self.result = None          self.started = False          self.depth = {} @@ -242,7 +243,7 @@ class IDParser(compat_html_parser.HTMLParser):          attrs = dict(attrs)          if self.started:              self.find_startpos(None) -        if 'id' in attrs and attrs['id'] == self.id: +        if self.attribute in attrs and attrs[self.attribute] == self.value:              self.result = [tag]              self.started = True              self.watch_startpos = True @@ -280,8 +281,12 @@ class IDParser(compat_html_parser.HTMLParser):          return '\n'.join(lines).strip()  def get_element_by_id(id, html): -    """Return the content of the tag with the specified id in the passed HTML document""" -    parser = IDParser(id) +    """Return the content of the tag with the specified ID in the passed HTML document""" +    return get_element_by_attribute("id", id, html) + +def get_element_by_attribute(attribute, value, html): +    """Return the content of the tag with the specified attribute in the passed HTML document""" +    parser = AttrParser(attribute, value)      try:          parser.loads(html)      except compat_html_parser.HTMLParseError: | 
