aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2011-11-21 21:50:39 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2011-11-21 21:50:39 +0100
commitaf8e8d63f9ed804a258bde933b26c16ff48e0c84 (patch)
tree608f1c75950a956727765f8eef87d49bc87898b2
parente092418d8b360aaaf8c7eba67956010f4e363121 (diff)
Allow non-ASCII characters in simplified titles(Closes #220)
-rw-r--r--test/test_div.py5
-rwxr-xr-xyoutube_dl/__init__.py10
2 files changed, 9 insertions, 6 deletions
diff --git a/test/test_div.py b/test/test_div.py
index 4525c8be6..4d4819b3c 100644
--- a/test/test_div.py
+++ b/test/test_div.py
@@ -16,13 +16,14 @@ def test_simplify_title():
assert u'/' not in youtube_dl._simplify_title(u'abc/de')
assert u'abc' in youtube_dl._simplify_title(u'abc/de')
assert u'de' in youtube_dl._simplify_title(u'abc/de')
+ assert u'/' not in youtube_dl._simplify_title(u'abc/de///')
assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
assert u'de' in youtube_dl._simplify_title(u'abc\\de')
- # TODO: Fix #220
- #assert youtube_dl._simplify_title(u'ä') == u'ä'
+ assert youtube_dl._simplify_title(u'ä') == u'ä'
+ assert youtube_dl._simplify_title(u'кириллица') == u'кириллица'
# Strip underlines
assert youtube_dl._simplify_title(u'\'a_') == u'a'
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index d4eadc905..36520c594 100755
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -278,7 +278,8 @@ def timeconvert(timestr):
return timestamp
def _simplify_title(title):
- return re.sub(ur'[^\w\d_\-]+', u'_', title).strip(u'_')
+ expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
+ return expr.sub(u'_', title).strip(u'_')
class DownloadError(Exception):
"""Download Error exception.
@@ -2937,6 +2938,7 @@ class BlipTVIE(InfoExtractor):
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
+ title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
info = {
@@ -3089,9 +3091,9 @@ class ComedyCentralIE(InfoExtractor):
if mobj.group('shortname'):
if mobj.group('shortname') in ('tds', 'thedailyshow'):
- url = 'http://www.thedailyshow.com/full-episodes/'
+ url = u'http://www.thedailyshow.com/full-episodes/'
else:
- url = 'http://www.colbertnation.com/full-episodes/'
+ url = u'http://www.colbertnation.com/full-episodes/'
mobj = re.match(self._VALID_URL, url)
assert mobj is not None
@@ -3177,7 +3179,7 @@ class ComedyCentralIE(InfoExtractor):
self._downloader.increment_downloads()
- effTitle = showId + '-' + epTitle
+ effTitle = showId + u'-' + epTitle
info = {
'id': shortMediaId,
'url': video_url,