aboutsummaryrefslogtreecommitdiff
path: root/thirdparty/URI.js/utils/sld.js
blob: b0c95db0620b72f9939d88cc3940649a1774bebb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
var fs = require('fs');
var url = require('url');
var http = require('http');
var domains = {};

/*
    Problem with PublicSuffix:
    The list not only contains TLDs/SLDs, but also domains like "dyndns.org".
    While this may be useful for Cookie-Origin-Policy, these domains are possibly
    being handled by URI.js, considering URI("//dyndns.org").tld("com").
    The list does not distinguish "official" TLDs from such domains.
    (At least I have problems with treating "cc.ga.us" as a SLD)
*/

http.get("http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1", function(res) {
    res.on('data', function(data) {
        data.toString().replace(/\r/g, "").split("\n").forEach(function(line) {
            // skip empty lines, comments and TLDs
            if (!line || (line[0] === "/" && line[1] === "/") || line.indexOf('.') === -1) {
                return;
            }
            
            var parts = line.split('.');
            var sld = parts.slice(0, -1).join('.');
            var tld = parts.slice(-1);

            if (parts.length < 2) {
                return;
            }
            
            if (!domains[tld]) {
                domains[tld] = [];
            }
            
            domains[tld].push(sld);
        });
    }).on('end', function() {
        //file.end();
        for (var tld in domains) {
            domains[tld].sort();
            
            // ! and * are sorted to the top
            if (domains[tld][0][0] == '!') {
                // we have wildcards and exclusions
            } else if (domains[tld][0][0] == '*') {
                // we have wildcards
            } else {
                // simple list 
            }
        }
        
        console.log(JSON.stringify(domains, null, 2));
        //console.log(domains.jp);
    });
});

/*


// https://github.com/oncletom/tld.js
// generates a 430KB file, which is inacceptible for the web

build a regex pattern from this -- http://publicsuffix.org/list/
"!exclusion"
"*" wildcard
  
uk: [ '!bl',
      '!british-library',
      '!jet',
      '!mod',
      '!national-library-scotland',
      '!nel',
      '!nic',
      '!nls',
      '!parliament',
      '*',
      '*.nhs',
      '*.police',
      '*.sch',
      'blogspot.co' ]
    
jp: [ '!city.kawasaki',
      '!city.kitakyushu',
      '!city.kobe',
      '!city.nagoya',
      '!city.sapporo',
      '!city.sendai',
      '!city.yokohama',
      '*.kawasaki',
      '*.kitakyushu',
      '*.kobe',
      '*.nagoya',
      '*.sapporo',
      '*.sendai',
      '*.yokohama',
      'abashiri.hokkaido',
      'abeno.osaka',
      'abiko.chiba',
      … ]

*/