diff options
author | laanwj <126646+laanwj@users.noreply.github.com> | 2022-04-15 17:14:40 +0200 |
---|---|---|
committer | laanwj <126646+laanwj@users.noreply.github.com> | 2022-05-31 11:57:49 +0200 |
commit | b54180303de0df9be7023577f35648225cf95443 (patch) | |
tree | fc4986f60f2b760edffa18d9865f12e4a0ffe021 /contrib/seeds | |
parent | bd6c5e410829764ea0a3c4251e42f86242e043a2 (diff) |
contrib: Use asmap for ASN lookup in makeseeds
Add an argument `-a` to provide a asmap file to do the IP to ASN
lookups.
This speeds up the script greatly, and makes the output deterministic.
Also removes the dependency on `dns.lookup`.
I've annotated the output with ASxxxx comments to provide a way to
verify the functionality.
For now I've added instructions in README.md to download and use the
`demo.map` from the asmap repository. When we have some other mechanism
for distributing asmap files we could switch to that.
This continues #24824. I've removed all the fallbacks and extra
complexity, as everyone will be using the same instructions anyway.
Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com>
Co-authored-by: James O'Beirne <james.obeirne@pm.me>
Co-authored-by: russeree <reese.russell@ymail.com>
Diffstat (limited to 'contrib/seeds')
-rw-r--r-- | contrib/seeds/.gitignore | 1 | ||||
-rw-r--r-- | contrib/seeds/README.md | 18 | ||||
-rw-r--r-- | contrib/seeds/asmap.py | 90 | ||||
-rwxr-xr-x | contrib/seeds/makeseeds.py | 59 |
4 files changed, 120 insertions, 48 deletions
diff --git a/contrib/seeds/.gitignore b/contrib/seeds/.gitignore index e4a39d6093..d9a2451f70 100644 --- a/contrib/seeds/.gitignore +++ b/contrib/seeds/.gitignore @@ -1 +1,2 @@ seeds_main.txt +asmap-filled.dat diff --git a/contrib/seeds/README.md b/contrib/seeds/README.md index c53446bfb0..b2ea7522ac 100644 --- a/contrib/seeds/README.md +++ b/contrib/seeds/README.md @@ -8,21 +8,11 @@ and remove old versions as necessary (at a minimum when GetDesirableServiceFlags changes its default return value, as those are the services which seeds are added to addrman with). -The seeds compiled into the release are created from sipa's DNS seed data, like this: +The seeds compiled into the release are created from sipa's DNS seed and AS map +data. Run the following commands from the `/contrib/seeds` directory: curl https://bitcoin.sipa.be/seeds.txt.gz | gzip -dc > seeds_main.txt - python3 makeseeds.py < seeds_main.txt > nodes_main.txt + curl https://bitcoin.sipa.be/asmap-filled.dat > asmap-filled.dat + python3 makeseeds.py -a asmap-filled.dat < seeds_main.txt > nodes_main.txt cat nodes_main_manual.txt >> nodes_main.txt python3 generate-seeds.py . > ../../src/chainparamsseeds.h - -## Dependencies - -Ubuntu, Debian: - - sudo apt-get install python3-dnspython - -and/or for other operating systems: - - pip install dnspython - -See https://dnspython.readthedocs.io/en/latest/installation.html for more information. diff --git a/contrib/seeds/asmap.py b/contrib/seeds/asmap.py new file mode 100644 index 0000000000..e7e05a1d10 --- /dev/null +++ b/contrib/seeds/asmap.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# Copyright (c) 2013-2020 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +import ipaddress + +# Convert a byte array to a bit array +def DecodeBytes(byts): + return [(byt >> i) & 1 for byt in byts for i in range(8)] + +def DecodeBits(stream, bitpos, minval, bit_sizes): + val = minval + for pos in range(len(bit_sizes)): + bit_size = bit_sizes[pos] + if pos + 1 < len(bit_sizes): + bit = stream[bitpos] + bitpos += 1 + else: + bit = 0 + if bit: + val += (1 << bit_size) + else: + for b in range(bit_size): + bit = stream[bitpos] + bitpos += 1 + val += bit << (bit_size - 1 - b) + return (val, bitpos) + assert(False) + +def DecodeType(stream, bitpos): + return DecodeBits(stream, bitpos, 0, [0, 0, 1]) + +def DecodeASN(stream, bitpos): + return DecodeBits(stream, bitpos, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) + +def DecodeMatch(stream, bitpos): + return DecodeBits(stream, bitpos, 2, [1, 2, 3, 4, 5, 6, 7, 8]) + +def DecodeJump(stream, bitpos): + return DecodeBits(stream, bitpos, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]) + +def Interpret(asmap, num, bits): + pos = 0 + default = None + while True: + assert(len(asmap) >= pos + 1) + (opcode, pos) = DecodeType(asmap, pos) + if opcode == 0: + (asn, pos) = DecodeASN(asmap, pos) + return asn + elif opcode == 1: + (jump, pos) = DecodeJump(asmap, pos) + if (num >> (bits - 1)) & 1: + pos += jump + bits -= 1 + elif opcode == 2: + (match, pos) = DecodeMatch(asmap, pos) + matchlen = match.bit_length() - 1 + for bit in range(matchlen): + if ((num >> (bits - 1)) & 1) != ((match >> (matchlen - 1 - bit)) & 1): + return default + bits -= 1 + elif opcode == 3: + (default, pos) = DecodeASN(asmap, pos) + else: + assert(False) + + + +def decode_ip(ip: str) -> int: + addr = ipaddress.ip_address(ip) + if isinstance(addr, ipaddress.IPv4Address): + return int.from_bytes(addr.packed, 'big') + 0xffff00000000 + elif isinstance(addr, ipaddress.IPv6Address): + return int.from_bytes(addr.packed, 'big') + +class ASMap: + def __init__(self, filename): + ''' + Instantiate an ASMap from a file. + ''' + with open(filename, "rb") as f: + self.asmap = DecodeBytes(f.read()) + + def lookup_asn(self, ip): + ''' + Look up the ASN for an IP, returns an ASN id as integer or None if not + known. + ''' + return Interpret(self.asmap, decode_ip(ip), 128) diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py index 78eb04a836..23b40cf028 100755 --- a/contrib/seeds/makeseeds.py +++ b/contrib/seeds/makeseeds.py @@ -6,12 +6,14 @@ # Generate seeds.txt from Pieter's DNS seeder # +import argparse import re import sys -import dns.resolver import collections from typing import List, Dict, Union +from asmap import ASMap + NSEEDS=512 MAX_SEEDS_PER_ASN = { @@ -123,34 +125,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]: hist[ip['sortkey']].append(ip) return [value[0] for (key,value) in list(hist.items()) if len(value)==1] -def lookup_asn(net: str, ip: str) -> Union[int, None]: - """ Look up the asn for an `ip` address by querying cymru.com - on network `net` (e.g. ipv4 or ipv6). - - Returns in integer ASN or None if it could not be found. - """ - try: - if net == 'ipv4': - ipaddr = ip - prefix = '.origin' - else: # http://www.team-cymru.com/IP-ASN-mapping.html - res = str() # 2001:4860:b002:23::68 - for nb in ip.split(':')[:4]: # pick the first 4 nibbles - for c in nb.zfill(4): # right padded with '0' - res += c + '.' # 2001 4860 b002 0023 - ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 - prefix = '.origin6' - - asn = int([x.to_text() for x in dns.resolver.resolve('.'.join( - reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', - 'TXT').response.answer][0].split('\"')[1].split(' ')[0]) - return asn - except Exception as e: - sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n') - return None - # Based on Greg Maxwell's seed_filter.py -def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]: +def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]: """ Prunes `ips` by (a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and (b) trimming ips to have at most `max_per_asn` ips from each asn in each net. @@ -173,13 +149,14 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di # do not add this ip as we already too many # ips from this network continue - asn = lookup_asn(ip['net'], ip['ip']) - if asn is None or asn_count[asn] == max_per_asn[ip['net']]: + asn = asmap.lookup_asn(ip['ip']) + if asn is None or asn_count[ip['net'], asn] == max_per_asn[ip['net']]: # do not add this ip as we already have too many # ips from this ASN on this network continue - asn_count[asn] += 1 + asn_count[ip['net'], asn] += 1 net_count[ip['net']] += 1 + ip['asn'] = asn result.append(ip) # Add back Onions (up to max_per_net) @@ -195,7 +172,18 @@ def ip_stats(ips: List[Dict]) -> str: return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}" +def parse_args(): + argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.') + argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True) + return argparser.parse_args() + def main(): + args = parse_args() + + print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True) + asmap = ASMap(args.asmap) + print('Done.', file=sys.stderr) + lines = sys.stdin.readlines() ips = [parseline(line) for line in lines] @@ -230,15 +218,18 @@ def main(): ips = filtermultiport(ips) print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr) # Look up ASNs and limit results, both per ASN and globally. - ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) + ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS) print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr) # Sort the results by IP address (for deterministic output). ips.sort(key=lambda x: (x['net'], x['sortkey'])) for ip in ips: if ip['net'] == 'ipv6': - print('[%s]:%i' % (ip['ip'], ip['port'])) + print(f"[{ip['ip']}]:{ip['port']}", end="") else: - print('%s:%i' % (ip['ip'], ip['port'])) + print(f"{ip['ip']}:{ip['port']}", end="") + if 'asn' in ip: + print(f" # AS{ip['asn']}", end="") + print() if __name__ == '__main__': main() |