diff options
author | laanwj <126646+laanwj@users.noreply.github.com> | 2022-06-16 21:43:35 +0200 |
---|---|---|
committer | laanwj <126646+laanwj@users.noreply.github.com> | 2022-06-16 21:44:52 +0200 |
commit | 7f2c983e1cfdb58b6f84eabe5ff6a16f143f39aa (patch) | |
tree | b32995de59adb54e95099abfa0834d9bc353c78b /contrib/seeds/makeseeds.py | |
parent | b0c830634907076480528f1829e212eeb0e764e3 (diff) | |
parent | 667e316bcb300eec131727a7ce54dd038031e267 (diff) |
Merge bitcoin/bitcoin#24864: contrib: Use asmap for ASN lookup in makeseeds
667e316bcb300eec131727a7ce54dd038031e267 contrib: Update makeseeds to asmap-nextgen (laanwj)
ae00b9e02c819a69293fdcdab91f57b33e9275d9 contrib: add seeds progress indicator and remove asmap one in makeseeds script (Jon Atack)
b54180303de0df9be7023577f35648225cf95443 contrib: Use asmap for ASN lookup in makeseeds (laanwj)
Pull request description:
Add an argument `-a` to provide a asmap file to do the IP to ASN lookups.
This speeds up the script greatly, and makes the output deterministic. Also removes the dependency on `dns.lookup`.
I've annotated the output with ASxxxx comments to provide a way to verify the functionality.
For now I've added instructions in README.md to download and use the `demo.map` from the asmap repository. When we have some other mechanism for distributing asmap files we could switch to that.
This continues #24824. I've removed the fallbacks and extra complexity, as everyone will be using the same instructions anyway.
Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com>
Co-authored-by: russeree <reese.russell@ymail.com>
ACKs for top commit:
sipa:
ACK 667e316bcb300eec131727a7ce54dd038031e267
dunxen:
re-ACK 667e316
Tree-SHA512: c4cedfbd1dee6be7547aa92dd9e262c46f0ff8099e647559b2a40eab0cc9874e9a813706630dd5c880390d23f432e789fb3e7e8a09f376f567071e68f5904c65
Diffstat (limited to 'contrib/seeds/makeseeds.py')
-rwxr-xr-x | contrib/seeds/makeseeds.py | 69 |
1 files changed, 30 insertions, 39 deletions
diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py index 78eb04a836..37c6f5fd7c 100755 --- a/contrib/seeds/makeseeds.py +++ b/contrib/seeds/makeseeds.py @@ -6,12 +6,15 @@ # Generate seeds.txt from Pieter's DNS seeder # +import argparse +import ipaddress import re import sys -import dns.resolver import collections from typing import List, Dict, Union +from asmap import ASMap, net_to_prefix + NSEEDS=512 MAX_SEEDS_PER_ASN = { @@ -45,7 +48,7 @@ def parseline(line: str) -> Union[dict, None]: sline = line.split() if len(sline) < 11: # line too short to be valid, skip it. - return None + return None m = PATTERN_IPV4.match(sline[0]) sortkey = None ip = None @@ -123,34 +126,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]: hist[ip['sortkey']].append(ip) return [value[0] for (key,value) in list(hist.items()) if len(value)==1] -def lookup_asn(net: str, ip: str) -> Union[int, None]: - """ Look up the asn for an `ip` address by querying cymru.com - on network `net` (e.g. ipv4 or ipv6). - - Returns in integer ASN or None if it could not be found. - """ - try: - if net == 'ipv4': - ipaddr = ip - prefix = '.origin' - else: # http://www.team-cymru.com/IP-ASN-mapping.html - res = str() # 2001:4860:b002:23::68 - for nb in ip.split(':')[:4]: # pick the first 4 nibbles - for c in nb.zfill(4): # right padded with '0' - res += c + '.' # 2001 4860 b002 0023 - ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 - prefix = '.origin6' - - asn = int([x.to_text() for x in dns.resolver.resolve('.'.join( - reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', - 'TXT').response.answer][0].split('\"')[1].split(' ')[0]) - return asn - except Exception as e: - sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n') - return None - # Based on Greg Maxwell's seed_filter.py -def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]: +def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]: """ Prunes `ips` by (a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and (b) trimming ips to have at most `max_per_asn` ips from each asn in each net. @@ -165,21 +142,18 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di asn_count: Dict[int, int] = collections.defaultdict(int) for i, ip in enumerate(ips_ipv46): - if i % 10 == 0: - # give progress update - print(f"{i:6d}/{len(ips_ipv46)} [{100*i/len(ips_ipv46):04.1f}%]\r", file=sys.stderr, end='', flush=True) - if net_count[ip['net']] == max_per_net: # do not add this ip as we already too many # ips from this network continue - asn = lookup_asn(ip['net'], ip['ip']) - if asn is None or asn_count[asn] == max_per_asn[ip['net']]: + asn = asmap.lookup(net_to_prefix(ipaddress.ip_network(ip['ip']))) + if not asn or asn_count[ip['net'], asn] == max_per_asn[ip['net']]: # do not add this ip as we already have too many # ips from this ASN on this network continue - asn_count[asn] += 1 + asn_count[ip['net'], asn] += 1 net_count[ip['net']] += 1 + ip['asn'] = asn result.append(ip) # Add back Onions (up to max_per_net) @@ -195,9 +169,23 @@ def ip_stats(ips: List[Dict]) -> str: return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}" +def parse_args(): + argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.') + argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True) + return argparser.parse_args() + def main(): + args = parse_args() + + print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True) + with open(args.asmap, 'rb') as f: + asmap = ASMap.from_binary(f.read()) + print('Done.', file=sys.stderr) + + print('Loading and parsing DNS seeds…', end='', file=sys.stderr, flush=True) lines = sys.stdin.readlines() ips = [parseline(line) for line in lines] + print('Done.', file=sys.stderr) print('\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', file=sys.stderr) print(f'{ip_stats(ips):s} Initial', file=sys.stderr) @@ -230,15 +218,18 @@ def main(): ips = filtermultiport(ips) print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr) # Look up ASNs and limit results, both per ASN and globally. - ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) + ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS) print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr) # Sort the results by IP address (for deterministic output). ips.sort(key=lambda x: (x['net'], x['sortkey'])) for ip in ips: if ip['net'] == 'ipv6': - print('[%s]:%i' % (ip['ip'], ip['port'])) + print(f"[{ip['ip']}]:{ip['port']}", end="") else: - print('%s:%i' % (ip['ip'], ip['port'])) + print(f"{ip['ip']}:{ip['port']}", end="") + if 'asn' in ip: + print(f" # AS{ip['asn']}", end="") + print() if __name__ == '__main__': main() |