diff options
author | Florian Dold <florian.dold@gmail.com> | 2016-10-10 03:47:49 +0200 |
---|---|---|
committer | Florian Dold <florian.dold@gmail.com> | 2016-10-10 03:47:49 +0200 |
commit | d5194154335d6cb30edca9b648083069faf9778c (patch) | |
tree | bcbefa300067dfd79fc5c770862005129406024d /utils/sld.js |
Squashed 'thirdparty/URI.js/' content from commit b77c167
git-subtree-dir: thirdparty/URI.js
git-subtree-split: b77c167bc201575956ad409333ff032e504b8044
Diffstat (limited to 'utils/sld.js')
-rw-r--r-- | utils/sld.js | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/utils/sld.js b/utils/sld.js new file mode 100644 index 000000000..b0c95db06 --- /dev/null +++ b/utils/sld.js @@ -0,0 +1,101 @@ +var fs = require('fs'); +var url = require('url'); +var http = require('http'); +var domains = {}; + +/* + Problem with PublicSuffix: + The list not only contains TLDs/SLDs, but also domains like "dyndns.org". + While this may be useful for Cookie-Origin-Policy, these domains are possibly + being handled by URI.js, considering URI("//dyndns.org").tld("com"). + The list does not distinguish "official" TLDs from such domains. + (At least I have problems with treating "cc.ga.us" as a SLD) +*/ + +http.get("http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1", function(res) { + res.on('data', function(data) { + data.toString().replace(/\r/g, "").split("\n").forEach(function(line) { + // skip empty lines, comments and TLDs + if (!line || (line[0] === "/" && line[1] === "/") || line.indexOf('.') === -1) { + return; + } + + var parts = line.split('.'); + var sld = parts.slice(0, -1).join('.'); + var tld = parts.slice(-1); + + if (parts.length < 2) { + return; + } + + if (!domains[tld]) { + domains[tld] = []; + } + + domains[tld].push(sld); + }); + }).on('end', function() { + //file.end(); + for (var tld in domains) { + domains[tld].sort(); + + // ! and * are sorted to the top + if (domains[tld][0][0] == '!') { + // we have wildcards and exclusions + } else if (domains[tld][0][0] == '*') { + // we have wildcards + } else { + // simple list + } + } + + console.log(JSON.stringify(domains, null, 2)); + //console.log(domains.jp); + }); +}); + +/* + + +// https://github.com/oncletom/tld.js +// generates a 430KB file, which is inacceptible for the web + +build a regex pattern from this -- http://publicsuffix.org/list/ +"!exclusion" +"*" wildcard + +uk: [ '!bl', + '!british-library', + '!jet', + '!mod', + '!national-library-scotland', + '!nel', + '!nic', + '!nls', + '!parliament', + '*', + '*.nhs', + '*.police', + '*.sch', + 'blogspot.co' ] + +jp: [ '!city.kawasaki', + '!city.kitakyushu', + '!city.kobe', + '!city.nagoya', + '!city.sapporo', + '!city.sendai', + '!city.yokohama', + '*.kawasaki', + '*.kitakyushu', + '*.kobe', + '*.nagoya', + '*.sapporo', + '*.sendai', + '*.yokohama', + 'abashiri.hokkaido', + 'abeno.osaka', + 'abiko.chiba', + … ] + +*/ |