aboutsummaryrefslogtreecommitdiff
path: root/contrib/seeds
diff options
context:
space:
mode:
authorlaanwj <126646+laanwj@users.noreply.github.com>2022-04-15 17:14:40 +0200
committerlaanwj <126646+laanwj@users.noreply.github.com>2022-05-31 11:57:49 +0200
commitb54180303de0df9be7023577f35648225cf95443 (patch)
treefc4986f60f2b760edffa18d9865f12e4a0ffe021 /contrib/seeds
parentbd6c5e410829764ea0a3c4251e42f86242e043a2 (diff)
contrib: Use asmap for ASN lookup in makeseeds
Add an argument `-a` to provide a asmap file to do the IP to ASN lookups. This speeds up the script greatly, and makes the output deterministic. Also removes the dependency on `dns.lookup`. I've annotated the output with ASxxxx comments to provide a way to verify the functionality. For now I've added instructions in README.md to download and use the `demo.map` from the asmap repository. When we have some other mechanism for distributing asmap files we could switch to that. This continues #24824. I've removed all the fallbacks and extra complexity, as everyone will be using the same instructions anyway. Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com> Co-authored-by: James O'Beirne <james.obeirne@pm.me> Co-authored-by: russeree <reese.russell@ymail.com>
Diffstat (limited to 'contrib/seeds')
-rw-r--r--contrib/seeds/.gitignore1
-rw-r--r--contrib/seeds/README.md18
-rw-r--r--contrib/seeds/asmap.py90
-rwxr-xr-xcontrib/seeds/makeseeds.py59
4 files changed, 120 insertions, 48 deletions
diff --git a/contrib/seeds/.gitignore b/contrib/seeds/.gitignore
index e4a39d6093..d9a2451f70 100644
--- a/contrib/seeds/.gitignore
+++ b/contrib/seeds/.gitignore
@@ -1 +1,2 @@
seeds_main.txt
+asmap-filled.dat
diff --git a/contrib/seeds/README.md b/contrib/seeds/README.md
index c53446bfb0..b2ea7522ac 100644
--- a/contrib/seeds/README.md
+++ b/contrib/seeds/README.md
@@ -8,21 +8,11 @@ and remove old versions as necessary (at a minimum when GetDesirableServiceFlags
changes its default return value, as those are the services which seeds are added
to addrman with).
-The seeds compiled into the release are created from sipa's DNS seed data, like this:
+The seeds compiled into the release are created from sipa's DNS seed and AS map
+data. Run the following commands from the `/contrib/seeds` directory:
curl https://bitcoin.sipa.be/seeds.txt.gz | gzip -dc > seeds_main.txt
- python3 makeseeds.py < seeds_main.txt > nodes_main.txt
+ curl https://bitcoin.sipa.be/asmap-filled.dat > asmap-filled.dat
+ python3 makeseeds.py -a asmap-filled.dat < seeds_main.txt > nodes_main.txt
cat nodes_main_manual.txt >> nodes_main.txt
python3 generate-seeds.py . > ../../src/chainparamsseeds.h
-
-## Dependencies
-
-Ubuntu, Debian:
-
- sudo apt-get install python3-dnspython
-
-and/or for other operating systems:
-
- pip install dnspython
-
-See https://dnspython.readthedocs.io/en/latest/installation.html for more information.
diff --git a/contrib/seeds/asmap.py b/contrib/seeds/asmap.py
new file mode 100644
index 0000000000..e7e05a1d10
--- /dev/null
+++ b/contrib/seeds/asmap.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+# Copyright (c) 2013-2020 The Bitcoin Core developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+import ipaddress
+
+# Convert a byte array to a bit array
+def DecodeBytes(byts):
+ return [(byt >> i) & 1 for byt in byts for i in range(8)]
+
+def DecodeBits(stream, bitpos, minval, bit_sizes):
+ val = minval
+ for pos in range(len(bit_sizes)):
+ bit_size = bit_sizes[pos]
+ if pos + 1 < len(bit_sizes):
+ bit = stream[bitpos]
+ bitpos += 1
+ else:
+ bit = 0
+ if bit:
+ val += (1 << bit_size)
+ else:
+ for b in range(bit_size):
+ bit = stream[bitpos]
+ bitpos += 1
+ val += bit << (bit_size - 1 - b)
+ return (val, bitpos)
+ assert(False)
+
+def DecodeType(stream, bitpos):
+ return DecodeBits(stream, bitpos, 0, [0, 0, 1])
+
+def DecodeASN(stream, bitpos):
+ return DecodeBits(stream, bitpos, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24])
+
+def DecodeMatch(stream, bitpos):
+ return DecodeBits(stream, bitpos, 2, [1, 2, 3, 4, 5, 6, 7, 8])
+
+def DecodeJump(stream, bitpos):
+ return DecodeBits(stream, bitpos, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
+
+def Interpret(asmap, num, bits):
+ pos = 0
+ default = None
+ while True:
+ assert(len(asmap) >= pos + 1)
+ (opcode, pos) = DecodeType(asmap, pos)
+ if opcode == 0:
+ (asn, pos) = DecodeASN(asmap, pos)
+ return asn
+ elif opcode == 1:
+ (jump, pos) = DecodeJump(asmap, pos)
+ if (num >> (bits - 1)) & 1:
+ pos += jump
+ bits -= 1
+ elif opcode == 2:
+ (match, pos) = DecodeMatch(asmap, pos)
+ matchlen = match.bit_length() - 1
+ for bit in range(matchlen):
+ if ((num >> (bits - 1)) & 1) != ((match >> (matchlen - 1 - bit)) & 1):
+ return default
+ bits -= 1
+ elif opcode == 3:
+ (default, pos) = DecodeASN(asmap, pos)
+ else:
+ assert(False)
+
+
+
+def decode_ip(ip: str) -> int:
+ addr = ipaddress.ip_address(ip)
+ if isinstance(addr, ipaddress.IPv4Address):
+ return int.from_bytes(addr.packed, 'big') + 0xffff00000000
+ elif isinstance(addr, ipaddress.IPv6Address):
+ return int.from_bytes(addr.packed, 'big')
+
+class ASMap:
+ def __init__(self, filename):
+ '''
+ Instantiate an ASMap from a file.
+ '''
+ with open(filename, "rb") as f:
+ self.asmap = DecodeBytes(f.read())
+
+ def lookup_asn(self, ip):
+ '''
+ Look up the ASN for an IP, returns an ASN id as integer or None if not
+ known.
+ '''
+ return Interpret(self.asmap, decode_ip(ip), 128)
diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py
index 78eb04a836..23b40cf028 100755
--- a/contrib/seeds/makeseeds.py
+++ b/contrib/seeds/makeseeds.py
@@ -6,12 +6,14 @@
# Generate seeds.txt from Pieter's DNS seeder
#
+import argparse
import re
import sys
-import dns.resolver
import collections
from typing import List, Dict, Union
+from asmap import ASMap
+
NSEEDS=512
MAX_SEEDS_PER_ASN = {
@@ -123,34 +125,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]:
hist[ip['sortkey']].append(ip)
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
-def lookup_asn(net: str, ip: str) -> Union[int, None]:
- """ Look up the asn for an `ip` address by querying cymru.com
- on network `net` (e.g. ipv4 or ipv6).
-
- Returns in integer ASN or None if it could not be found.
- """
- try:
- if net == 'ipv4':
- ipaddr = ip
- prefix = '.origin'
- else: # http://www.team-cymru.com/IP-ASN-mapping.html
- res = str() # 2001:4860:b002:23::68
- for nb in ip.split(':')[:4]: # pick the first 4 nibbles
- for c in nb.zfill(4): # right padded with '0'
- res += c + '.' # 2001 4860 b002 0023
- ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
- prefix = '.origin6'
-
- asn = int([x.to_text() for x in dns.resolver.resolve('.'.join(
- reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com',
- 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
- return asn
- except Exception as e:
- sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n')
- return None
-
# Based on Greg Maxwell's seed_filter.py
-def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
+def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
""" Prunes `ips` by
(a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
(b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
@@ -173,13 +149,14 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di
# do not add this ip as we already too many
# ips from this network
continue
- asn = lookup_asn(ip['net'], ip['ip'])
- if asn is None or asn_count[asn] == max_per_asn[ip['net']]:
+ asn = asmap.lookup_asn(ip['ip'])
+ if asn is None or asn_count[ip['net'], asn] == max_per_asn[ip['net']]:
# do not add this ip as we already have too many
# ips from this ASN on this network
continue
- asn_count[asn] += 1
+ asn_count[ip['net'], asn] += 1
net_count[ip['net']] += 1
+ ip['asn'] = asn
result.append(ip)
# Add back Onions (up to max_per_net)
@@ -195,7 +172,18 @@ def ip_stats(ips: List[Dict]) -> str:
return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}"
+def parse_args():
+ argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.')
+ argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True)
+ return argparser.parse_args()
+
def main():
+ args = parse_args()
+
+ print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True)
+ asmap = ASMap(args.asmap)
+ print('Done.', file=sys.stderr)
+
lines = sys.stdin.readlines()
ips = [parseline(line) for line in lines]
@@ -230,15 +218,18 @@ def main():
ips = filtermultiport(ips)
print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr)
# Look up ASNs and limit results, both per ASN and globally.
- ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
+ ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS)
print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr)
# Sort the results by IP address (for deterministic output).
ips.sort(key=lambda x: (x['net'], x['sortkey']))
for ip in ips:
if ip['net'] == 'ipv6':
- print('[%s]:%i' % (ip['ip'], ip['port']))
+ print(f"[{ip['ip']}]:{ip['port']}", end="")
else:
- print('%s:%i' % (ip['ip'], ip['port']))
+ print(f"{ip['ip']}:{ip['port']}", end="")
+ if 'asn' in ip:
+ print(f" # AS{ip['asn']}", end="")
+ print()
if __name__ == '__main__':
main()