aboutsummaryrefslogtreecommitdiff
path: root/utils/sld.js
diff options
context:
space:
mode:
authorFlorian Dold <florian.dold@gmail.com>2016-10-10 03:47:49 +0200
committerFlorian Dold <florian.dold@gmail.com>2016-10-10 03:47:49 +0200
commitd5194154335d6cb30edca9b648083069faf9778c (patch)
treebcbefa300067dfd79fc5c770862005129406024d /utils/sld.js
Squashed 'thirdparty/URI.js/' content from commit b77c167
git-subtree-dir: thirdparty/URI.js git-subtree-split: b77c167bc201575956ad409333ff032e504b8044
Diffstat (limited to 'utils/sld.js')
-rw-r--r--utils/sld.js101
1 files changed, 101 insertions, 0 deletions
diff --git a/utils/sld.js b/utils/sld.js
new file mode 100644
index 000000000..b0c95db06
--- /dev/null
+++ b/utils/sld.js
@@ -0,0 +1,101 @@
+var fs = require('fs');
+var url = require('url');
+var http = require('http');
+var domains = {};
+
+/*
+ Problem with PublicSuffix:
+ The list not only contains TLDs/SLDs, but also domains like "dyndns.org".
+ While this may be useful for Cookie-Origin-Policy, these domains are possibly
+ being handled by URI.js, considering URI("//dyndns.org").tld("com").
+ The list does not distinguish "official" TLDs from such domains.
+ (At least I have problems with treating "cc.ga.us" as a SLD)
+*/
+
+http.get("http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1", function(res) {
+ res.on('data', function(data) {
+ data.toString().replace(/\r/g, "").split("\n").forEach(function(line) {
+ // skip empty lines, comments and TLDs
+ if (!line || (line[0] === "/" && line[1] === "/") || line.indexOf('.') === -1) {
+ return;
+ }
+
+ var parts = line.split('.');
+ var sld = parts.slice(0, -1).join('.');
+ var tld = parts.slice(-1);
+
+ if (parts.length < 2) {
+ return;
+ }
+
+ if (!domains[tld]) {
+ domains[tld] = [];
+ }
+
+ domains[tld].push(sld);
+ });
+ }).on('end', function() {
+ //file.end();
+ for (var tld in domains) {
+ domains[tld].sort();
+
+ // ! and * are sorted to the top
+ if (domains[tld][0][0] == '!') {
+ // we have wildcards and exclusions
+ } else if (domains[tld][0][0] == '*') {
+ // we have wildcards
+ } else {
+ // simple list
+ }
+ }
+
+ console.log(JSON.stringify(domains, null, 2));
+ //console.log(domains.jp);
+ });
+});
+
+/*
+
+
+// https://github.com/oncletom/tld.js
+// generates a 430KB file, which is inacceptible for the web
+
+build a regex pattern from this -- http://publicsuffix.org/list/
+"!exclusion"
+"*" wildcard
+
+uk: [ '!bl',
+ '!british-library',
+ '!jet',
+ '!mod',
+ '!national-library-scotland',
+ '!nel',
+ '!nic',
+ '!nls',
+ '!parliament',
+ '*',
+ '*.nhs',
+ '*.police',
+ '*.sch',
+ 'blogspot.co' ]
+
+jp: [ '!city.kawasaki',
+ '!city.kitakyushu',
+ '!city.kobe',
+ '!city.nagoya',
+ '!city.sapporo',
+ '!city.sendai',
+ '!city.yokohama',
+ '*.kawasaki',
+ '*.kitakyushu',
+ '*.kobe',
+ '*.nagoya',
+ '*.sapporo',
+ '*.sendai',
+ '*.yokohama',
+ 'abashiri.hokkaido',
+ 'abeno.osaka',
+ 'abiko.chiba',
+ … ]
+
+*/