aboutsummaryrefslogtreecommitdiff
path: root/contrib/seeds/makeseeds.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/seeds/makeseeds.py')
-rwxr-xr-xcontrib/seeds/makeseeds.py135
1 files changed, 70 insertions, 65 deletions
diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py
index 2b377f6c01..37c6f5fd7c 100755
--- a/contrib/seeds/makeseeds.py
+++ b/contrib/seeds/makeseeds.py
@@ -6,22 +6,23 @@
# Generate seeds.txt from Pieter's DNS seeder
#
+import argparse
+import ipaddress
import re
import sys
-import dns.resolver
import collections
+from typing import List, Dict, Union
-NSEEDS=512
-
-MAX_SEEDS_PER_ASN=2
+from asmap import ASMap, net_to_prefix
-MIN_BLOCKS = 337600
+NSEEDS=512
-# These are hosts that have been observed to be behaving strangely (e.g.
-# aggressively connecting to every node).
-with open("suspicious_hosts.txt", mode="r", encoding="utf-8") as f:
- SUSPICIOUS_HOSTS = {s.strip() for s in f if s.strip()}
+MAX_SEEDS_PER_ASN = {
+ 'ipv4': 2,
+ 'ipv6': 10,
+}
+MIN_BLOCKS = 730000
PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$")
PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$")
@@ -40,10 +41,14 @@ PATTERN_AGENT = re.compile(
r"23.99"
r")")
-def parseline(line):
+def parseline(line: str) -> Union[dict, None]:
+ """ Parses a line from `seeds_main.txt` into a dictionary of details for that line.
+ or `None`, if the line could not be parsed.
+ """
sline = line.split()
if len(sline) < 11:
- return None
+ # line too short to be valid, skip it.
+ return None
m = PATTERN_IPV4.match(sline[0])
sortkey = None
ip = None
@@ -107,98 +112,95 @@ def parseline(line):
'sortkey': sortkey,
}
-def dedup(ips):
- '''deduplicate by address,port'''
+def dedup(ips: List[Dict]) -> List[Dict]:
+ """ Remove duplicates from `ips` where multiple ips share address and port. """
d = {}
for ip in ips:
d[ip['ip'],ip['port']] = ip
return list(d.values())
-def filtermultiport(ips):
- '''Filter out hosts with more nodes per IP'''
+def filtermultiport(ips: List[Dict]) -> List[Dict]:
+ """ Filter out hosts with more nodes per IP"""
hist = collections.defaultdict(list)
for ip in ips:
hist[ip['sortkey']].append(ip)
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
-def lookup_asn(net, ip):
- '''
- Look up the asn for an IP (4 or 6) address by querying cymru.com, or None
- if it could not be found.
- '''
- try:
- if net == 'ipv4':
- ipaddr = ip
- prefix = '.origin'
- else: # http://www.team-cymru.com/IP-ASN-mapping.html
- res = str() # 2001:4860:b002:23::68
- for nb in ip.split(':')[:4]: # pick the first 4 nibbles
- for c in nb.zfill(4): # right padded with '0'
- res += c + '.' # 2001 4860 b002 0023
- ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
- prefix = '.origin6'
-
- asn = int([x.to_text() for x in dns.resolver.resolve('.'.join(
- reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com',
- 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
- return asn
- except Exception as e:
- sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n')
- return None
-
# Based on Greg Maxwell's seed_filter.py
-def filterbyasn(ips, max_per_asn, max_per_net):
+def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
+ """ Prunes `ips` by
+ (a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
+ (b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
+ """
# Sift out ips by type
ips_ipv46 = [ip for ip in ips if ip['net'] in ['ipv4', 'ipv6']]
ips_onion = [ip for ip in ips if ip['net'] == 'onion']
# Filter IPv46 by ASN, and limit to max_per_net per network
result = []
- net_count = collections.defaultdict(int)
- asn_count = collections.defaultdict(int)
- for ip in ips_ipv46:
+ net_count: Dict[str, int] = collections.defaultdict(int)
+ asn_count: Dict[int, int] = collections.defaultdict(int)
+
+ for i, ip in enumerate(ips_ipv46):
if net_count[ip['net']] == max_per_net:
+ # do not add this ip as we already too many
+ # ips from this network
continue
- asn = lookup_asn(ip['net'], ip['ip'])
- if asn is None or asn_count[asn] == max_per_asn:
+ asn = asmap.lookup(net_to_prefix(ipaddress.ip_network(ip['ip'])))
+ if not asn or asn_count[ip['net'], asn] == max_per_asn[ip['net']]:
+ # do not add this ip as we already have too many
+ # ips from this ASN on this network
continue
- asn_count[asn] += 1
+ asn_count[ip['net'], asn] += 1
net_count[ip['net']] += 1
+ ip['asn'] = asn
result.append(ip)
# Add back Onions (up to max_per_net)
result.extend(ips_onion[0:max_per_net])
return result
-def ip_stats(ips):
- hist = collections.defaultdict(int)
+def ip_stats(ips: List[Dict]) -> str:
+ """ Format and return pretty string from `ips`. """
+ hist: Dict[str, int] = collections.defaultdict(int)
for ip in ips:
if ip is not None:
hist[ip['net']] += 1
- return '%6d %6d %6d' % (hist['ipv4'], hist['ipv6'], hist['onion'])
+ return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}"
+
+def parse_args():
+ argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.')
+ argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True)
+ return argparser.parse_args()
def main():
+ args = parse_args()
+
+ print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True)
+ with open(args.asmap, 'rb') as f:
+ asmap = ASMap.from_binary(f.read())
+ print('Done.', file=sys.stderr)
+
+ print('Loading and parsing DNS seeds…', end='', file=sys.stderr, flush=True)
lines = sys.stdin.readlines()
ips = [parseline(line) for line in lines]
+ print('Done.', file=sys.stderr)
print('\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', file=sys.stderr)
- print('%s Initial' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Initial', file=sys.stderr)
# Skip entries with invalid address.
ips = [ip for ip in ips if ip is not None]
- print('%s Skip entries with invalid address' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Skip entries with invalid address', file=sys.stderr)
# Skip duplicates (in case multiple seeds files were concatenated)
ips = dedup(ips)
- print('%s After removing duplicates' % (ip_stats(ips)), file=sys.stderr)
- # Skip entries from suspicious hosts.
- ips = [ip for ip in ips if ip['ip'] not in SUSPICIOUS_HOSTS]
- print('%s Skip entries from suspicious hosts' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} After removing duplicates', file=sys.stderr)
# Enforce minimal number of blocks.
ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS]
- print('%s Enforce minimal number of blocks' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Enforce minimal number of blocks', file=sys.stderr)
# Require service bit 1.
ips = [ip for ip in ips if (ip['service'] & 1) == 1]
- print('%s Require service bit 1' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Require service bit 1', file=sys.stderr)
# Require at least 50% 30-day uptime for clearnet, 10% for onion.
req_uptime = {
'ipv4': 50,
@@ -206,25 +208,28 @@ def main():
'onion': 10,
}
ips = [ip for ip in ips if ip['uptime'] > req_uptime[ip['net']]]
- print('%s Require minimum uptime' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Require minimum uptime', file=sys.stderr)
# Require a known and recent user agent.
ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])]
- print('%s Require a known and recent user agent' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Require a known and recent user agent', file=sys.stderr)
# Sort by availability (and use last success as tie breaker)
ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True)
# Filter out hosts with multiple bitcoin ports, these are likely abusive
ips = filtermultiport(ips)
- print('%s Filter out hosts with multiple bitcoin ports' % (ip_stats(ips)), file=sys.stderr)
+ print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr)
# Look up ASNs and limit results, both per ASN and globally.
- ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
- print('%s Look up ASNs and limit results per ASN and per net' % (ip_stats(ips)), file=sys.stderr)
+ ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS)
+ print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr)
# Sort the results by IP address (for deterministic output).
ips.sort(key=lambda x: (x['net'], x['sortkey']))
for ip in ips:
if ip['net'] == 'ipv6':
- print('[%s]:%i' % (ip['ip'], ip['port']))
+ print(f"[{ip['ip']}]:{ip['port']}", end="")
else:
- print('%s:%i' % (ip['ip'], ip['port']))
+ print(f"{ip['ip']}:{ip['port']}", end="")
+ if 'asn' in ip:
+ print(f" # AS{ip['asn']}", end="")
+ print()
if __name__ == '__main__':
main()