aboutsummaryrefslogtreecommitdiff
path: root/contrib/seeds/makeseeds.py
diff options
context:
space:
mode:
authorlaanwj <126646+laanwj@users.noreply.github.com>2022-06-16 21:43:35 +0200
committerlaanwj <126646+laanwj@users.noreply.github.com>2022-06-16 21:44:52 +0200
commit7f2c983e1cfdb58b6f84eabe5ff6a16f143f39aa (patch)
treeb32995de59adb54e95099abfa0834d9bc353c78b /contrib/seeds/makeseeds.py
parentb0c830634907076480528f1829e212eeb0e764e3 (diff)
parent667e316bcb300eec131727a7ce54dd038031e267 (diff)
Merge bitcoin/bitcoin#24864: contrib: Use asmap for ASN lookup in makeseeds
667e316bcb300eec131727a7ce54dd038031e267 contrib: Update makeseeds to asmap-nextgen (laanwj) ae00b9e02c819a69293fdcdab91f57b33e9275d9 contrib: add seeds progress indicator and remove asmap one in makeseeds script (Jon Atack) b54180303de0df9be7023577f35648225cf95443 contrib: Use asmap for ASN lookup in makeseeds (laanwj) Pull request description: Add an argument `-a` to provide a asmap file to do the IP to ASN lookups. This speeds up the script greatly, and makes the output deterministic. Also removes the dependency on `dns.lookup`. I've annotated the output with ASxxxx comments to provide a way to verify the functionality. For now I've added instructions in README.md to download and use the `demo.map` from the asmap repository. When we have some other mechanism for distributing asmap files we could switch to that. This continues #24824. I've removed the fallbacks and extra complexity, as everyone will be using the same instructions anyway. Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com> Co-authored-by: russeree <reese.russell@ymail.com> ACKs for top commit: sipa: ACK 667e316bcb300eec131727a7ce54dd038031e267 dunxen: re-ACK 667e316 Tree-SHA512: c4cedfbd1dee6be7547aa92dd9e262c46f0ff8099e647559b2a40eab0cc9874e9a813706630dd5c880390d23f432e789fb3e7e8a09f376f567071e68f5904c65
Diffstat (limited to 'contrib/seeds/makeseeds.py')
-rwxr-xr-xcontrib/seeds/makeseeds.py69
1 files changed, 30 insertions, 39 deletions
diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py
index 78eb04a836..37c6f5fd7c 100755
--- a/contrib/seeds/makeseeds.py
+++ b/contrib/seeds/makeseeds.py
@@ -6,12 +6,15 @@
# Generate seeds.txt from Pieter's DNS seeder
#
+import argparse
+import ipaddress
import re
import sys
-import dns.resolver
import collections
from typing import List, Dict, Union
+from asmap import ASMap, net_to_prefix
+
NSEEDS=512
MAX_SEEDS_PER_ASN = {
@@ -45,7 +48,7 @@ def parseline(line: str) -> Union[dict, None]:
sline = line.split()
if len(sline) < 11:
# line too short to be valid, skip it.
- return None
+ return None
m = PATTERN_IPV4.match(sline[0])
sortkey = None
ip = None
@@ -123,34 +126,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]:
hist[ip['sortkey']].append(ip)
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
-def lookup_asn(net: str, ip: str) -> Union[int, None]:
- """ Look up the asn for an `ip` address by querying cymru.com
- on network `net` (e.g. ipv4 or ipv6).
-
- Returns in integer ASN or None if it could not be found.
- """
- try:
- if net == 'ipv4':
- ipaddr = ip
- prefix = '.origin'
- else: # http://www.team-cymru.com/IP-ASN-mapping.html
- res = str() # 2001:4860:b002:23::68
- for nb in ip.split(':')[:4]: # pick the first 4 nibbles
- for c in nb.zfill(4): # right padded with '0'
- res += c + '.' # 2001 4860 b002 0023
- ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
- prefix = '.origin6'
-
- asn = int([x.to_text() for x in dns.resolver.resolve('.'.join(
- reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com',
- 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
- return asn
- except Exception as e:
- sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n')
- return None
-
# Based on Greg Maxwell's seed_filter.py
-def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
+def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
""" Prunes `ips` by
(a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
(b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
@@ -165,21 +142,18 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di
asn_count: Dict[int, int] = collections.defaultdict(int)
for i, ip in enumerate(ips_ipv46):
- if i % 10 == 0:
- # give progress update
- print(f"{i:6d}/{len(ips_ipv46)} [{100*i/len(ips_ipv46):04.1f}%]\r", file=sys.stderr, end='', flush=True)
-
if net_count[ip['net']] == max_per_net:
# do not add this ip as we already too many
# ips from this network
continue
- asn = lookup_asn(ip['net'], ip['ip'])
- if asn is None or asn_count[asn] == max_per_asn[ip['net']]:
+ asn = asmap.lookup(net_to_prefix(ipaddress.ip_network(ip['ip'])))
+ if not asn or asn_count[ip['net'], asn] == max_per_asn[ip['net']]:
# do not add this ip as we already have too many
# ips from this ASN on this network
continue
- asn_count[asn] += 1
+ asn_count[ip['net'], asn] += 1
net_count[ip['net']] += 1
+ ip['asn'] = asn
result.append(ip)
# Add back Onions (up to max_per_net)
@@ -195,9 +169,23 @@ def ip_stats(ips: List[Dict]) -> str:
return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}"
+def parse_args():
+ argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.')
+ argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True)
+ return argparser.parse_args()
+
def main():
+ args = parse_args()
+
+ print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True)
+ with open(args.asmap, 'rb') as f:
+ asmap = ASMap.from_binary(f.read())
+ print('Done.', file=sys.stderr)
+
+ print('Loading and parsing DNS seeds…', end='', file=sys.stderr, flush=True)
lines = sys.stdin.readlines()
ips = [parseline(line) for line in lines]
+ print('Done.', file=sys.stderr)
print('\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', file=sys.stderr)
print(f'{ip_stats(ips):s} Initial', file=sys.stderr)
@@ -230,15 +218,18 @@ def main():
ips = filtermultiport(ips)
print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr)
# Look up ASNs and limit results, both per ASN and globally.
- ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
+ ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS)
print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr)
# Sort the results by IP address (for deterministic output).
ips.sort(key=lambda x: (x['net'], x['sortkey']))
for ip in ips:
if ip['net'] == 'ipv6':
- print('[%s]:%i' % (ip['ip'], ip['port']))
+ print(f"[{ip['ip']}]:{ip['port']}", end="")
else:
- print('%s:%i' % (ip['ip'], ip['port']))
+ print(f"{ip['ip']}:{ip['port']}", end="")
+ if 'asn' in ip:
+ print(f" # AS{ip['asn']}", end="")
+ print()
if __name__ == '__main__':
main()