diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2011-11-21 21:50:39 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2011-11-21 21:50:39 +0100 |
commit | af8e8d63f9ed804a258bde933b26c16ff48e0c84 (patch) | |
tree | 608f1c75950a956727765f8eef87d49bc87898b2 | |
parent | e092418d8b360aaaf8c7eba67956010f4e363121 (diff) |
Allow non-ASCII characters in simplified titles(Closes #220)
-rw-r--r-- | test/test_div.py | 5 | ||||
-rwxr-xr-x | youtube_dl/__init__.py | 10 |
2 files changed, 9 insertions, 6 deletions
diff --git a/test/test_div.py b/test/test_div.py index 4525c8be6..4d4819b3c 100644 --- a/test/test_div.py +++ b/test/test_div.py @@ -16,13 +16,14 @@ def test_simplify_title(): assert u'/' not in youtube_dl._simplify_title(u'abc/de') assert u'abc' in youtube_dl._simplify_title(u'abc/de') assert u'de' in youtube_dl._simplify_title(u'abc/de') + assert u'/' not in youtube_dl._simplify_title(u'abc/de///') assert u'\\' not in youtube_dl._simplify_title(u'abc\\de') assert u'abc' in youtube_dl._simplify_title(u'abc\\de') assert u'de' in youtube_dl._simplify_title(u'abc\\de') - # TODO: Fix #220 - #assert youtube_dl._simplify_title(u'ä') == u'ä' + assert youtube_dl._simplify_title(u'ä') == u'ä' + assert youtube_dl._simplify_title(u'кириллица') == u'кириллица' # Strip underlines assert youtube_dl._simplify_title(u'\'a_') == u'a' diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index d4eadc905..36520c594 100755 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -278,7 +278,8 @@ def timeconvert(timestr): return timestamp def _simplify_title(title): - return re.sub(ur'[^\w\d_\-]+', u'_', title).strip(u'_') + expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE) + return expr.sub(u'_', title).strip(u'_') class DownloadError(Exception): """Download Error exception. @@ -2937,6 +2938,7 @@ class BlipTVIE(InfoExtractor): if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download basename = url.split('/')[-1] title,ext = os.path.splitext(basename) + title = title.decode('UTF-8') ext = ext.replace('.', '') self.report_direct_download(title) info = { @@ -3089,9 +3091,9 @@ class ComedyCentralIE(InfoExtractor): if mobj.group('shortname'): if mobj.group('shortname') in ('tds', 'thedailyshow'): - url = 'http://www.thedailyshow.com/full-episodes/' + url = u'http://www.thedailyshow.com/full-episodes/' else: - url = 'http://www.colbertnation.com/full-episodes/' + url = u'http://www.colbertnation.com/full-episodes/' mobj = re.match(self._VALID_URL, url) assert mobj is not None @@ -3177,7 +3179,7 @@ class ComedyCentralIE(InfoExtractor): self._downloader.increment_downloads() - effTitle = showId + '-' + epTitle + effTitle = showId + u'-' + epTitle info = { 'id': shortMediaId, 'url': video_url, |