aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-01-21 01:39:39 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-01-21 01:39:40 +0100
commitb60016e83139ace517fc823cf2b22756e64c2e63 (patch)
tree74c5a65f6a3b34c0f862c2be0de6602a7ccf507e /youtube_dl/extractor
parent5aafe895fce2a7be9595cb2e56b7bd73a748e6b6 (diff)
Deal with implicitly UTF-16 decoded webpages
These webpages don't specify an encoding and rely on the BOM
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 692d828da..6c5d77e58 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -220,6 +220,8 @@ class InfoExtractor(object):
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')
+ elif webpage_bytes.startswith(b'\xff\xfe'):
+ encoding = 'utf-16'
else:
encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False):