diff options
Diffstat (limited to 'node_modules/iconv-lite/encodings/dbcs-data.js')
-rw-r--r-- | node_modules/iconv-lite/encodings/dbcs-data.js | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/node_modules/iconv-lite/encodings/dbcs-data.js b/node_modules/iconv-lite/encodings/dbcs-data.js index 2bf741528..a9e719b39 100644 --- a/node_modules/iconv-lite/encodings/dbcs-data.js +++ b/node_modules/iconv-lite/encodings/dbcs-data.js @@ -38,7 +38,6 @@ module.exports = { // // Overall, it seems that it's a mess :( http://www8.plala.or.jp/tkubota1/unicode-symbols-map2.html - 'shiftjis': { type: '_dbcs', table: function() { return require('./tables/shiftjis.json') }, @@ -49,8 +48,10 @@ module.exports = { 'mskanji': 'shiftjis', 'sjis': 'shiftjis', 'windows31j': 'shiftjis', + 'ms31j': 'shiftjis', 'xsjis': 'shiftjis', 'windows932': 'shiftjis', + 'ms932': 'shiftjis', '932': 'shiftjis', 'cp932': 'shiftjis', @@ -64,8 +65,10 @@ module.exports = { // TODO: IBM CCSID 942 = CP932, but F0-F9 custom chars and other char changes. // TODO: IBM CCSID 943 = Shift_JIS = CP932 with original Shift_JIS lower 128 chars. + // == Chinese/GBK ========================================================== // http://en.wikipedia.org/wiki/GBK + // We mostly implement W3C recommendation: https://www.w3.org/TR/encoding/#gbk-encoder // Oldest GB2312 (1981, ~7600 chars) is a subset of CP936 'gb2312': 'cp936', @@ -74,11 +77,10 @@ module.exports = { 'csgb2312': 'cp936', 'csiso58gb231280': 'cp936', 'euccn': 'cp936', - 'isoir58': 'gbk', // Microsoft's CP936 is a subset and approximation of GBK. - // TODO: Euro = 0x80 in cp936, but not in GBK (where it's valid but undefined) 'windows936': 'cp936', + 'ms936': 'cp936', '936': 'cp936', 'cp936': { type: '_dbcs', @@ -91,24 +93,28 @@ module.exports = { table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) }, }, 'xgbk': 'gbk', + 'isoir58': 'gbk', // GB18030 is an algorithmic extension of GBK. + // Main source: https://www.w3.org/TR/encoding/#gbk-encoder + // http://icu-project.org/docs/papers/gb18030.html + // http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml + // http://www.khngai.com/chinese/charmap/tblgbk.php?page=0 'gb18030': { type: '_dbcs', table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) }, gb18030: function() { return require('./tables/gb18030-ranges.json') }, + encodeSkipVals: [0x80], + encodeAdd: {'€': 0xA2E3}, }, 'chinese': 'gb18030', - // TODO: Support GB18030 (~27000 chars + whole unicode mapping, cp54936) - // http://icu-project.org/docs/papers/gb18030.html - // http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml - // http://www.khngai.com/chinese/charmap/tblgbk.php?page=0 // == Korean =============================================================== // EUC-KR, KS_C_5601 and KS X 1001 are exactly the same. 'windows949': 'cp949', + 'ms949': 'cp949', '949': 'cp949', 'cp949': { type: '_dbcs', @@ -149,6 +155,7 @@ module.exports = { // Unicode mapping (http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT) is said to be wrong. 'windows950': 'cp950', + 'ms950': 'cp950', '950': 'cp950', 'cp950': { type: '_dbcs', @@ -166,5 +173,4 @@ module.exports = { 'cnbig5': 'big5hkscs', 'csbig5': 'big5hkscs', 'xxbig5': 'big5hkscs', - }; |