aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorFlorian Dold <florian.dold@gmail.com>2016-10-10 03:47:49 +0200
committerFlorian Dold <florian.dold@gmail.com>2016-10-10 03:47:49 +0200
commitd5194154335d6cb30edca9b648083069faf9778c (patch)
treebcbefa300067dfd79fc5c770862005129406024d /utils
downloadwallet-core-d5194154335d6cb30edca9b648083069faf9778c.tar.xz
Squashed 'thirdparty/URI.js/' content from commit b77c167
git-subtree-dir: thirdparty/URI.js git-subtree-split: b77c167bc201575956ad409333ff032e504b8044
Diffstat (limited to 'utils')
-rw-r--r--utils/SLDs.php37
-rw-r--r--utils/sld.js101
2 files changed, 138 insertions, 0 deletions
diff --git a/utils/SLDs.php b/utils/SLDs.php
new file mode 100644
index 000000000..3f451e764
--- /dev/null
+++ b/utils/SLDs.php
@@ -0,0 +1,37 @@
+<?php
+
+$map = array();
+
+// grab list of known SLDs from https://github.com/gavingmiller/second-level-domains
+// using curl since file_get_contents() won't do SSL...
+$url = 'https://raw.github.com/gavingmiller/second-level-domains/master/SLDs.csv';
+$curl = curl_init($url);
+curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
+curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
+curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
+$source = curl_exec($curl);
+
+
+// $source is a CSV, but a rather simple one I wouldn't go through the hassle of using str_getcsv() for
+$source = preg_split("/\r\n|\r|\n/", $source);
+foreach ($source as $line) {
+ $t = explode(',', $line);
+ $tld = strtolower(substr($t[0], 1)); // skip the leading dot
+ $sld = strtolower(substr($t[1], 1, strrpos($t[1], '.') - 1));
+ if (!$tld || !$sld || strpos($sld, '.') !== false) {
+ continue;
+ }
+
+ $map[$tld][] = $sld;
+}
+
+// source seems to be tainted with duplicates (and false SLDs like "govt.uk")
+// for now we don't care about false (or missing) SLDs
+foreach ($map as $tld => &$slds) {
+ $slds = array_unique($slds);
+ sort($slds);
+ $slds = join('|', $slds);
+}
+
+echo json_encode($map); \ No newline at end of file
diff --git a/utils/sld.js b/utils/sld.js
new file mode 100644
index 000000000..b0c95db06
--- /dev/null
+++ b/utils/sld.js
@@ -0,0 +1,101 @@
+var fs = require('fs');
+var url = require('url');
+var http = require('http');
+var domains = {};
+
+/*
+ Problem with PublicSuffix:
+ The list not only contains TLDs/SLDs, but also domains like "dyndns.org".
+ While this may be useful for Cookie-Origin-Policy, these domains are possibly
+ being handled by URI.js, considering URI("//dyndns.org").tld("com").
+ The list does not distinguish "official" TLDs from such domains.
+ (At least I have problems with treating "cc.ga.us" as a SLD)
+*/
+
+http.get("http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1", function(res) {
+ res.on('data', function(data) {
+ data.toString().replace(/\r/g, "").split("\n").forEach(function(line) {
+ // skip empty lines, comments and TLDs
+ if (!line || (line[0] === "/" && line[1] === "/") || line.indexOf('.') === -1) {
+ return;
+ }
+
+ var parts = line.split('.');
+ var sld = parts.slice(0, -1).join('.');
+ var tld = parts.slice(-1);
+
+ if (parts.length < 2) {
+ return;
+ }
+
+ if (!domains[tld]) {
+ domains[tld] = [];
+ }
+
+ domains[tld].push(sld);
+ });
+ }).on('end', function() {
+ //file.end();
+ for (var tld in domains) {
+ domains[tld].sort();
+
+ // ! and * are sorted to the top
+ if (domains[tld][0][0] == '!') {
+ // we have wildcards and exclusions
+ } else if (domains[tld][0][0] == '*') {
+ // we have wildcards
+ } else {
+ // simple list
+ }
+ }
+
+ console.log(JSON.stringify(domains, null, 2));
+ //console.log(domains.jp);
+ });
+});
+
+/*
+
+
+// https://github.com/oncletom/tld.js
+// generates a 430KB file, which is inacceptible for the web
+
+build a regex pattern from this -- http://publicsuffix.org/list/
+"!exclusion"
+"*" wildcard
+
+uk: [ '!bl',
+ '!british-library',
+ '!jet',
+ '!mod',
+ '!national-library-scotland',
+ '!nel',
+ '!nic',
+ '!nls',
+ '!parliament',
+ '*',
+ '*.nhs',
+ '*.police',
+ '*.sch',
+ 'blogspot.co' ]
+
+jp: [ '!city.kawasaki',
+ '!city.kitakyushu',
+ '!city.kobe',
+ '!city.nagoya',
+ '!city.sapporo',
+ '!city.sendai',
+ '!city.yokohama',
+ '*.kawasaki',
+ '*.kitakyushu',
+ '*.kobe',
+ '*.nagoya',
+ '*.sapporo',
+ '*.sendai',
+ '*.yokohama',
+ 'abashiri.hokkaido',
+ 'abeno.osaka',
+ 'abiko.chiba',
+ … ]
+
+*/