aboutsummaryrefslogtreecommitdiff
path: root/node_modules/iconv-lite/encodings/dbcs-data.js
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/iconv-lite/encodings/dbcs-data.js')
-rw-r--r--node_modules/iconv-lite/encodings/dbcs-data.js22
1 files changed, 14 insertions, 8 deletions
diff --git a/node_modules/iconv-lite/encodings/dbcs-data.js b/node_modules/iconv-lite/encodings/dbcs-data.js
index 2bf741528..a9e719b39 100644
--- a/node_modules/iconv-lite/encodings/dbcs-data.js
+++ b/node_modules/iconv-lite/encodings/dbcs-data.js
@@ -38,7 +38,6 @@ module.exports = {
//
// Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html
-
'shiftjis': {
type: '_dbcs',
table: function() { return require('./tables/shiftjis.json') },
@@ -49,8 +48,10 @@ module.exports = {
'mskanji': 'shiftjis',
'sjis': 'shiftjis',
'windows31j': 'shiftjis',
+ 'ms31j': 'shiftjis',
'xsjis': 'shiftjis',
'windows932': 'shiftjis',
+ 'ms932': 'shiftjis',
'932': 'shiftjis',
'cp932': 'shiftjis',
@@ -64,8 +65,10 @@ module.exports = {
// TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes.
// TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars.
+
// == Chinese/GBK ==========================================================
// http://en.wikipedia.org/wiki/GBK
+ // We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder
// Oldest GB2312 (1981, ~7600 chars) is a subset of CP936
'gb2312': 'cp936',
@@ -74,11 +77,10 @@ module.exports = {
'csgb2312': 'cp936',
'csiso58gb231280': 'cp936',
'euccn': 'cp936',
- 'isoir58': 'gbk',
// Microsoft's CP936 is a subset and approximation of GBK.
- // TODO: Euro = 0x80 in cp936, but not in GBK (where it's valid but undefined)
'windows936': 'cp936',
+ 'ms936': 'cp936',
'936': 'cp936',
'cp936': {
type: '_dbcs',
@@ -91,24 +93,28 @@ module.exports = {
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
},
'xgbk': 'gbk',
+ 'isoir58': 'gbk',
// GB18030 is an algorithmic extension of GBK.
+ // Main source: https://www.w3.org/TR/encoding/#gbk-encoder
+ // http://icu-project.org/docs/papers/gb18030.html
+ // http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
+ // http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
'gb18030': {
type: '_dbcs',
table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) },
gb18030: function() { return require('./tables/gb18030-ranges.json') },
+ encodeSkipVals: [0x80],
+ encodeAdd: {'€': 0xA2E3},
},
'chinese': 'gb18030',
- // TODO: Support GB18030 (~27000 chars + whole unicode mapping, cp54936)
- // http://icu-project.org/docs/papers/gb18030.html
- // http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
- // http://www.khngai.com/chinese/charmap/tblgbk.php?page=0
// == Korean ===============================================================
// EUC-KR, KS_C_5601 and KS X 1001 are exactly the same.
'windows949': 'cp949',
+ 'ms949': 'cp949',
'949': 'cp949',
'cp949': {
type: '_dbcs',
@@ -149,6 +155,7 @@ module.exports = {
// Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong.
'windows950': 'cp950',
+ 'ms950': 'cp950',
'950': 'cp950',
'cp950': {
type: '_dbcs',
@@ -166,5 +173,4 @@ module.exports = {
'cnbig5': 'big5hkscs',
'csbig5': 'big5hkscs',
'xxbig5': 'big5hkscs',
-
};