aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnisse Astier <anisse@astier.eu>2014-04-07 23:09:53 +0200
committerAnisse Astier <anisse@astier.eu>2014-04-07 23:10:16 +0200
commitec0fafbb1915b830035f621f4660c7e46b1a53ed (patch)
treebc61a4c5573a6f779d980f3095936ec62e9e4066
parenta5863bdf331e6a54068912ea216612e812d7100d (diff)
[extractor/common] fallback on utf-8 when charset is not found
fixes #2721
-rw-r--r--youtube_dl/extractor/common.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index da4193734..9653d44eb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -251,7 +251,10 @@ class InfoExtractor(object):
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
- content = webpage_bytes.decode(encoding, 'replace')
+ try:
+ content = webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ content = webpage_bytes.decode('utf-8', 'replace')
if (u'<title>Access to this site is blocked</title>' in content and
u'Websense' in content[:512]):