diff options
Diffstat (limited to 'contrib/seeds/makeseeds.py')
-rwxr-xr-x | contrib/seeds/makeseeds.py | 135 |
1 files changed, 70 insertions, 65 deletions
diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py index 2b377f6c01..37c6f5fd7c 100755 --- a/contrib/seeds/makeseeds.py +++ b/contrib/seeds/makeseeds.py @@ -6,22 +6,23 @@ # Generate seeds.txt from Pieter's DNS seeder # +import argparse +import ipaddress import re import sys -import dns.resolver import collections +from typing import List, Dict, Union -NSEEDS=512 - -MAX_SEEDS_PER_ASN=2 +from asmap import ASMap, net_to_prefix -MIN_BLOCKS = 337600 +NSEEDS=512 -# These are hosts that have been observed to be behaving strangely (e.g. -# aggressively connecting to every node). -with open("suspicious_hosts.txt", mode="r", encoding="utf-8") as f: - SUSPICIOUS_HOSTS = {s.strip() for s in f if s.strip()} +MAX_SEEDS_PER_ASN = { + 'ipv4': 2, + 'ipv6': 10, +} +MIN_BLOCKS = 730000 PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$") PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$") @@ -40,10 +41,14 @@ PATTERN_AGENT = re.compile( r"23.99" r")") -def parseline(line): +def parseline(line: str) -> Union[dict, None]: + """ Parses a line from `seeds_main.txt` into a dictionary of details for that line. + or `None`, if the line could not be parsed. + """ sline = line.split() if len(sline) < 11: - return None + # line too short to be valid, skip it. + return None m = PATTERN_IPV4.match(sline[0]) sortkey = None ip = None @@ -107,98 +112,95 @@ def parseline(line): 'sortkey': sortkey, } -def dedup(ips): - '''deduplicate by address,port''' +def dedup(ips: List[Dict]) -> List[Dict]: + """ Remove duplicates from `ips` where multiple ips share address and port. """ d = {} for ip in ips: d[ip['ip'],ip['port']] = ip return list(d.values()) -def filtermultiport(ips): - '''Filter out hosts with more nodes per IP''' +def filtermultiport(ips: List[Dict]) -> List[Dict]: + """ Filter out hosts with more nodes per IP""" hist = collections.defaultdict(list) for ip in ips: hist[ip['sortkey']].append(ip) return [value[0] for (key,value) in list(hist.items()) if len(value)==1] -def lookup_asn(net, ip): - ''' - Look up the asn for an IP (4 or 6) address by querying cymru.com, or None - if it could not be found. - ''' - try: - if net == 'ipv4': - ipaddr = ip - prefix = '.origin' - else: # http://www.team-cymru.com/IP-ASN-mapping.html - res = str() # 2001:4860:b002:23::68 - for nb in ip.split(':')[:4]: # pick the first 4 nibbles - for c in nb.zfill(4): # right padded with '0' - res += c + '.' # 2001 4860 b002 0023 - ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 - prefix = '.origin6' - - asn = int([x.to_text() for x in dns.resolver.resolve('.'.join( - reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', - 'TXT').response.answer][0].split('\"')[1].split(' ')[0]) - return asn - except Exception as e: - sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n') - return None - # Based on Greg Maxwell's seed_filter.py -def filterbyasn(ips, max_per_asn, max_per_net): +def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]: + """ Prunes `ips` by + (a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and + (b) trimming ips to have at most `max_per_asn` ips from each asn in each net. + """ # Sift out ips by type ips_ipv46 = [ip for ip in ips if ip['net'] in ['ipv4', 'ipv6']] ips_onion = [ip for ip in ips if ip['net'] == 'onion'] # Filter IPv46 by ASN, and limit to max_per_net per network result = [] - net_count = collections.defaultdict(int) - asn_count = collections.defaultdict(int) - for ip in ips_ipv46: + net_count: Dict[str, int] = collections.defaultdict(int) + asn_count: Dict[int, int] = collections.defaultdict(int) + + for i, ip in enumerate(ips_ipv46): if net_count[ip['net']] == max_per_net: + # do not add this ip as we already too many + # ips from this network continue - asn = lookup_asn(ip['net'], ip['ip']) - if asn is None or asn_count[asn] == max_per_asn: + asn = asmap.lookup(net_to_prefix(ipaddress.ip_network(ip['ip']))) + if not asn or asn_count[ip['net'], asn] == max_per_asn[ip['net']]: + # do not add this ip as we already have too many + # ips from this ASN on this network continue - asn_count[asn] += 1 + asn_count[ip['net'], asn] += 1 net_count[ip['net']] += 1 + ip['asn'] = asn result.append(ip) # Add back Onions (up to max_per_net) result.extend(ips_onion[0:max_per_net]) return result -def ip_stats(ips): - hist = collections.defaultdict(int) +def ip_stats(ips: List[Dict]) -> str: + """ Format and return pretty string from `ips`. """ + hist: Dict[str, int] = collections.defaultdict(int) for ip in ips: if ip is not None: hist[ip['net']] += 1 - return '%6d %6d %6d' % (hist['ipv4'], hist['ipv6'], hist['onion']) + return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}" + +def parse_args(): + argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.') + argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True) + return argparser.parse_args() def main(): + args = parse_args() + + print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True) + with open(args.asmap, 'rb') as f: + asmap = ASMap.from_binary(f.read()) + print('Done.', file=sys.stderr) + + print('Loading and parsing DNS seeds…', end='', file=sys.stderr, flush=True) lines = sys.stdin.readlines() ips = [parseline(line) for line in lines] + print('Done.', file=sys.stderr) print('\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', file=sys.stderr) - print('%s Initial' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Initial', file=sys.stderr) # Skip entries with invalid address. ips = [ip for ip in ips if ip is not None] - print('%s Skip entries with invalid address' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Skip entries with invalid address', file=sys.stderr) # Skip duplicates (in case multiple seeds files were concatenated) ips = dedup(ips) - print('%s After removing duplicates' % (ip_stats(ips)), file=sys.stderr) - # Skip entries from suspicious hosts. - ips = [ip for ip in ips if ip['ip'] not in SUSPICIOUS_HOSTS] - print('%s Skip entries from suspicious hosts' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} After removing duplicates', file=sys.stderr) # Enforce minimal number of blocks. ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS] - print('%s Enforce minimal number of blocks' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Enforce minimal number of blocks', file=sys.stderr) # Require service bit 1. ips = [ip for ip in ips if (ip['service'] & 1) == 1] - print('%s Require service bit 1' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Require service bit 1', file=sys.stderr) # Require at least 50% 30-day uptime for clearnet, 10% for onion. req_uptime = { 'ipv4': 50, @@ -206,25 +208,28 @@ def main(): 'onion': 10, } ips = [ip for ip in ips if ip['uptime'] > req_uptime[ip['net']]] - print('%s Require minimum uptime' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Require minimum uptime', file=sys.stderr) # Require a known and recent user agent. ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])] - print('%s Require a known and recent user agent' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Require a known and recent user agent', file=sys.stderr) # Sort by availability (and use last success as tie breaker) ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True) # Filter out hosts with multiple bitcoin ports, these are likely abusive ips = filtermultiport(ips) - print('%s Filter out hosts with multiple bitcoin ports' % (ip_stats(ips)), file=sys.stderr) + print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr) # Look up ASNs and limit results, both per ASN and globally. - ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) - print('%s Look up ASNs and limit results per ASN and per net' % (ip_stats(ips)), file=sys.stderr) + ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS) + print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr) # Sort the results by IP address (for deterministic output). ips.sort(key=lambda x: (x['net'], x['sortkey'])) for ip in ips: if ip['net'] == 'ipv6': - print('[%s]:%i' % (ip['ip'], ip['port'])) + print(f"[{ip['ip']}]:{ip['port']}", end="") else: - print('%s:%i' % (ip['ip'], ip['port'])) + print(f"{ip['ip']}:{ip['port']}", end="") + if 'asn' in ip: + print(f" # AS{ip['asn']}", end="") + print() if __name__ == '__main__': main() |