diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 46 | 
1 files changed, 17 insertions, 29 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 558c9c7d5..d39f313a4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import zlib  from .compat import (      compat_basestring,      compat_chr, +    compat_etree_fromstring,      compat_html_entities,      compat_http_client,      compat_kwargs, @@ -178,10 +179,19 @@ def xpath_with_ns(path, ns_map):  def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): -    if sys.version_info < (2, 7):  # Crazy 2.6 -        xpath = xpath.encode('ascii') +    def _find_xpath(xpath): +        if sys.version_info < (2, 7):  # Crazy 2.6 +            xpath = xpath.encode('ascii') +        return node.find(xpath) + +    if isinstance(xpath, (str, compat_str)): +        n = _find_xpath(xpath) +    else: +        for xp in xpath: +            n = _find_xpath(xp) +            if n is not None: +                break -    n = node.find(xpath)      if n is None:          if default is not NO_DEFAULT:              return default @@ -356,7 +366,7 @@ def sanitize_path(s):      if drive_or_unc:          norm_path.pop(0)      sanitized_path = [ -        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) +        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)          for path_part in norm_path]      if drive_or_unc:          sanitized_path.insert(0, drive_or_unc + os.path.sep) @@ -901,7 +911,8 @@ def unified_strdate(date_str, day_first=True):          timetuple = email.utils.parsedate_tz(date_str)          if timetuple:              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') -    return upload_date +    if upload_date is not None: +        return compat_str(upload_date)  def determine_ext(url, default_ext='unknown_video'): @@ -1656,29 +1667,6 @@ def encode_dict(d, encoding='utf-8'):      return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items()) -try: -    etree_iter = xml.etree.ElementTree.Element.iter -except AttributeError:  # Python <=2.6 -    etree_iter = lambda n: n.findall('.//*') - - -def parse_xml(s): -    class TreeBuilder(xml.etree.ElementTree.TreeBuilder): -        def doctype(self, name, pubid, system): -            pass  # Ignore doctypes - -    parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) -    kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} -    tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) -    # Fix up XML parser in Python 2.x -    if sys.version_info < (3, 0): -        for n in etree_iter(tree): -            if n.text is not None: -                if not isinstance(n.text, compat_str): -                    n.text = n.text.decode('utf-8') -    return tree - -  US_RATINGS = {      'G': 0,      'PG': 10, @@ -1979,7 +1967,7 @@ def dfxp2srt(dfxp_data):          return out -    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) +    dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))      out = []      paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') | 
