Squashed 'thirdparty/URI.js/' content from commit b77c167

git-subtree-dir: thirdparty/URI.js git-subtree-split: b77c167bc201575956ad409333ff032e504b8044
author: Florian Dold <florian.dold@gmail.com> 2016-10-10 03:47:49 +0200
committer: Florian Dold <florian.dold@gmail.com> 2016-10-10 03:47:49 +0200
commit: d5194154335d6cb30edca9b648083069faf9778c (patch)
tree: bcbefa300067dfd79fc5c770862005129406024d /utils
download: wallet-core-d5194154335d6cb30edca9b648083069faf9778c.tar.xz
2 files changed, 138 insertions, 0 deletions
diff --git a/utils/SLDs.php b/utils/SLDs.php
new file mode 100644
index 000000000..3f451e764
--- /dev/null
+++ b/utils/SLDs.php
@@ -0,0 +1,37 @@
+<?php
+
+$map = array();
+
+// grab list of known SLDs from https://github.com/gavingmiller/second-level-domains
+// using curl since file_get_contents() won't do SSL...
+$url = 'https://raw.github.com/gavingmiller/second-level-domains/master/SLDs.csv';
+$curl = curl_init($url);
+curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
+curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
+curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
+$source = curl_exec($curl);
+
+
+// $source is a CSV, but a rather simple one I wouldn't go through the hassle of using str_getcsv() for
+$source = preg_split("/\r\n|\r|\n/", $source);
+foreach ($source as $line) {
+    $t = explode(',', $line);
+    $tld = strtolower(substr($t[0], 1)); // skip the leading dot
+    $sld = strtolower(substr($t[1], 1, strrpos($t[1], '.') - 1));
+    if (!$tld || !$sld || strpos($sld, '.') !== false) {
+        continue;
+    }
+    
+    $map[$tld][] = $sld;
+}
+
+// source seems to be tainted with duplicates (and false SLDs like "govt.uk")
+// for now we don't care about false (or missing) SLDs
+foreach ($map as $tld => &$slds) {
+    $slds = array_unique($slds);
+    sort($slds);
+    $slds = join('|', $slds);
+}
+
+echo json_encode($map);
+\ No newline at end of file
diff --git a/utils/sld.js b/utils/sld.js
new file mode 100644
index 000000000..b0c95db06
--- /dev/null
+++ b/utils/sld.js
@@ -0,0 +1,101 @@
+var fs = require('fs');
+var url = require('url');
+var http = require('http');
+var domains = {};
+
+/*
+    Problem with PublicSuffix:
+    The list not only contains TLDs/SLDs, but also domains like "dyndns.org".
+    While this may be useful for Cookie-Origin-Policy, these domains are possibly
+    being handled by URI.js, considering URI("//dyndns.org").tld("com").
+    The list does not distinguish "official" TLDs from such domains.
+    (At least I have problems with treating "cc.ga.us" as a SLD)
+*/
+
+http.get("http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1", function(res) {
+    res.on('data', function(data) {
+        data.toString().replace(/\r/g, "").split("\n").forEach(function(line) {
+            // skip empty lines, comments and TLDs
+            if (!line || (line[0] === "/" && line[1] === "/") || line.indexOf('.') === -1) {
+                return;
+            }
+            
+            var parts = line.split('.');
+            var sld = parts.slice(0, -1).join('.');
+            var tld = parts.slice(-1);
+
+            if (parts.length < 2) {
+                return;
+            }
+            
+            if (!domains[tld]) {
+                domains[tld] = [];
+            }
+            
+            domains[tld].push(sld);
+        });
+    }).on('end', function() {
+        //file.end();
+        for (var tld in domains) {
+            domains[tld].sort();
+            
+            // ! and * are sorted to the top
+            if (domains[tld][0][0] == '!') {
+                // we have wildcards and exclusions
+            } else if (domains[tld][0][0] == '*') {
+                // we have wildcards
+            } else {
+                // simple list 
+            }
+        }
+        
+        console.log(JSON.stringify(domains, null, 2));
+        //console.log(domains.jp);
+    });
+});
+
+/*
+
+
+// https://github.com/oncletom/tld.js
+// generates a 430KB file, which is inacceptible for the web
+
+build a regex pattern from this -- http://publicsuffix.org/list/
+"!exclusion"
+"*" wildcard
+  
+uk: [ '!bl',
+      '!british-library',
+      '!jet',
+      '!mod',
+      '!national-library-scotland',
+      '!nel',
+      '!nic',
+      '!nls',
+      '!parliament',
+      '*',
+      '*.nhs',
+      '*.police',
+      '*.sch',
+      'blogspot.co' ]
+    
+jp: [ '!city.kawasaki',
+      '!city.kitakyushu',
+      '!city.kobe',
+      '!city.nagoya',
+      '!city.sapporo',
+      '!city.sendai',
+      '!city.yokohama',
+      '*.kawasaki',
+      '*.kitakyushu',
+      '*.kobe',
+      '*.nagoya',
+      '*.sapporo',
+      '*.sendai',
+      '*.yokohama',
+      'abashiri.hokkaido',
+      'abeno.osaka',
+      'abiko.chiba',
+      … ]
+
+*/
author	Florian Dold <florian.dold@gmail.com>	2016-10-10 03:47:49 +0200
committer	Florian Dold <florian.dold@gmail.com>	2016-10-10 03:47:49 +0200
commit	d5194154335d6cb30edca9b648083069faf9778c (patch)
tree	bcbefa300067dfd79fc5c770862005129406024d /utils
download	wallet-core-d5194154335d6cb30edca9b648083069faf9778c.tar.xz