aboutsummaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorlaanwj <126646+laanwj@users.noreply.github.com>2022-06-16 21:43:35 +0200
committerlaanwj <126646+laanwj@users.noreply.github.com>2022-06-16 21:44:52 +0200
commit7f2c983e1cfdb58b6f84eabe5ff6a16f143f39aa (patch)
treeb32995de59adb54e95099abfa0834d9bc353c78b /contrib
parentb0c830634907076480528f1829e212eeb0e764e3 (diff)
parent667e316bcb300eec131727a7ce54dd038031e267 (diff)
downloadbitcoin-7f2c983e1cfdb58b6f84eabe5ff6a16f143f39aa.tar.xz
Merge bitcoin/bitcoin#24864: contrib: Use asmap for ASN lookup in makeseeds
667e316bcb300eec131727a7ce54dd038031e267 contrib: Update makeseeds to asmap-nextgen (laanwj) ae00b9e02c819a69293fdcdab91f57b33e9275d9 contrib: add seeds progress indicator and remove asmap one in makeseeds script (Jon Atack) b54180303de0df9be7023577f35648225cf95443 contrib: Use asmap for ASN lookup in makeseeds (laanwj) Pull request description: Add an argument `-a` to provide a asmap file to do the IP to ASN lookups. This speeds up the script greatly, and makes the output deterministic. Also removes the dependency on `dns.lookup`. I've annotated the output with ASxxxx comments to provide a way to verify the functionality. For now I've added instructions in README.md to download and use the `demo.map` from the asmap repository. When we have some other mechanism for distributing asmap files we could switch to that. This continues #24824. I've removed the fallbacks and extra complexity, as everyone will be using the same instructions anyway. Co-authored-by: Pieter Wuille <pieter.wuille@gmail.com> Co-authored-by: russeree <reese.russell@ymail.com> ACKs for top commit: sipa: ACK 667e316bcb300eec131727a7ce54dd038031e267 dunxen: re-ACK 667e316 Tree-SHA512: c4cedfbd1dee6be7547aa92dd9e262c46f0ff8099e647559b2a40eab0cc9874e9a813706630dd5c880390d23f432e789fb3e7e8a09f376f567071e68f5904c65
Diffstat (limited to 'contrib')
-rw-r--r--contrib/seeds/.gitignore1
-rw-r--r--contrib/seeds/README.md18
-rw-r--r--contrib/seeds/asmap.py815
-rwxr-xr-xcontrib/seeds/makeseeds.py69
4 files changed, 850 insertions, 53 deletions
diff --git a/contrib/seeds/.gitignore b/contrib/seeds/.gitignore
index e4a39d6093..d9a2451f70 100644
--- a/contrib/seeds/.gitignore
+++ b/contrib/seeds/.gitignore
@@ -1 +1,2 @@
seeds_main.txt
+asmap-filled.dat
diff --git a/contrib/seeds/README.md b/contrib/seeds/README.md
index c53446bfb0..b2ea7522ac 100644
--- a/contrib/seeds/README.md
+++ b/contrib/seeds/README.md
@@ -8,21 +8,11 @@ and remove old versions as necessary (at a minimum when GetDesirableServiceFlags
changes its default return value, as those are the services which seeds are added
to addrman with).
-The seeds compiled into the release are created from sipa's DNS seed data, like this:
+The seeds compiled into the release are created from sipa's DNS seed and AS map
+data. Run the following commands from the `/contrib/seeds` directory:
curl https://bitcoin.sipa.be/seeds.txt.gz | gzip -dc > seeds_main.txt
- python3 makeseeds.py < seeds_main.txt > nodes_main.txt
+ curl https://bitcoin.sipa.be/asmap-filled.dat > asmap-filled.dat
+ python3 makeseeds.py -a asmap-filled.dat < seeds_main.txt > nodes_main.txt
cat nodes_main_manual.txt >> nodes_main.txt
python3 generate-seeds.py . > ../../src/chainparamsseeds.h
-
-## Dependencies
-
-Ubuntu, Debian:
-
- sudo apt-get install python3-dnspython
-
-and/or for other operating systems:
-
- pip install dnspython
-
-See https://dnspython.readthedocs.io/en/latest/installation.html for more information.
diff --git a/contrib/seeds/asmap.py b/contrib/seeds/asmap.py
new file mode 100644
index 0000000000..7a605d0b9e
--- /dev/null
+++ b/contrib/seeds/asmap.py
@@ -0,0 +1,815 @@
+# Copyright (c) 2022 Pieter Wuille
+# Distributed under the MIT software license, see the accompanying
+# file LICENSE or http://www.opensource.org/licenses/mit-license.php.
+
+"""
+This module provides the ASNEntry and ASMap classes.
+"""
+
+import copy
+import ipaddress
+import random
+import unittest
+from enum import Enum
+from functools import total_ordering
+from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union, overload
+
+def net_to_prefix(net: Union[ipaddress.IPv4Network,ipaddress.IPv6Network]) -> List[bool]:
+ """
+ Convert an IPv4 or IPv6 network to a prefix represented as a list of bits.
+
+ IPv4 ranges are remapped to their IPv4-mapped IPv6 range (::ffff:0:0/96).
+ """
+ num_bits = net.prefixlen
+ netrange = int.from_bytes(net.network_address.packed, 'big')
+
+ # Map an IPv4 prefix into IPv6 space.
+ if isinstance(net, ipaddress.IPv4Network):
+ num_bits += 96
+ netrange += 0xffff00000000
+
+ # Strip unused bottom bits.
+ assert (netrange & ((1 << (128 - num_bits)) - 1)) == 0
+ return [((netrange >> (127 - i)) & 1) != 0 for i in range(num_bits)]
+
+def prefix_to_net(prefix: List[bool]) -> Union[ipaddress.IPv4Network,ipaddress.IPv6Network]:
+ """The reverse operation of net_to_prefix."""
+ # Convert to number
+ netrange = sum(b << (127 - i) for i, b in enumerate(prefix))
+ num_bits = len(prefix)
+ assert num_bits <= 128
+
+ # Return IPv4 range if in ::ffff:0:0/96
+ if num_bits >= 96 and (netrange >> 32) == 0xffff:
+ return ipaddress.IPv4Network((netrange & 0xffffffff, num_bits - 96), True)
+
+ # Return IPv6 range otherwise.
+ return ipaddress.IPv6Network((netrange, num_bits), True)
+
+# Shortcut for (prefix, ASN) entries.
+ASNEntry = Tuple[List[bool], int]
+
+# Shortcut for (prefix, old ASN, new ASN) entries.
+ASNDiff = Tuple[List[bool], int, int]
+
+class _VarLenCoder:
+ """
+ A class representing a custom variable-length binary encoder/decoder for
+ integers. Each object represents a different coder, with different parameters
+ minval and clsbits.
+
+ The encoding is easiest to describe using an example. Let's say minval=100 and
+ clsbits=[4,2,2,3]. In that case:
+ - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)].
+ - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)].
+ - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)].
+ - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)].
+
+ In general, every number is encoded as:
+ - First, k "1"-bits, where k is the class the number falls in (there is one class
+ per element of clsbits).
+ - Then, a "0"-bit, unless k is the highest class, in which case there is nothing.
+ - Lastly, clsbits[k] bits encoding in big endian the position in its class that
+ number falls into.
+ - Every class k consists of 2^clsbits[k] consecutive integers. k=0 starts at minval,
+ other classes start one past the last element of the class before it.
+ """
+
+ def __init__(self, minval: int, clsbits: List[int]):
+ """Construct a new _VarLenCoder."""
+ self._minval = minval
+ self._clsbits = clsbits
+ self._maxval = minval + sum(1 << b for b in clsbits) - 1
+
+ def can_encode(self, val: int) -> bool:
+ """Check whether value val is in the range this coder supports."""
+ return self._minval <= val <= self._maxval
+
+ def encode(self, val: int, ret: List[int]) -> None:
+ """Append encoding of val onto integer list ret."""
+
+ assert self._minval <= val <= self._maxval
+ val -= self._minval
+ bits = 0
+ for k, bits in enumerate(self._clsbits):
+ if val >> bits:
+ # If the value will not fit in class k, subtract its range from v,
+ # emit a "1" bit and continue with the next class.
+ val -= 1 << bits
+ ret.append(1)
+ else:
+ if k + 1 < len(self._clsbits):
+ # Unless we're in the last class, emit a "0" bit.
+ ret.append(0)
+ break
+ # And then encode v (now the position within the class) in big endian.
+ ret.extend((val >> (bits - 1 - b)) & 1 for b in range(bits))
+
+ def encode_size(self, val: int) -> int:
+ """Compute how many bits are needed to encode val."""
+ assert self._minval <= val <= self._maxval
+ val -= self._minval
+ ret = 0
+ bits = 0
+ for k, bits in enumerate(self._clsbits):
+ if val >> bits:
+ val -= 1 << bits
+ ret += 1
+ else:
+ ret += k + 1 < len(self._clsbits)
+ break
+ return ret + bits
+
+ def decode(self, stream, bitpos) -> Tuple[int,int]:
+ """Decode a number starting at bitpos in stream, returning value and new bitpos."""
+ val = self._minval
+ bits = 0
+ for k, bits in enumerate(self._clsbits):
+ bit = 0
+ if k + 1 < len(self._clsbits):
+ bit = stream[bitpos]
+ bitpos += 1
+ if not bit:
+ break
+ val += 1 << bits
+ for i in range(bits):
+ bit = stream[bitpos]
+ bitpos += 1
+ val += bit << (bits - 1 - i)
+ return val, bitpos
+
+# Variable-length encoders used in the binary asmap format.
+_CODER_INS = _VarLenCoder(0, [0, 0, 1])
+_CODER_ASN = _VarLenCoder(1, list(range(15, 25)))
+_CODER_MATCH = _VarLenCoder(2, list(range(1, 9)))
+_CODER_JUMP = _VarLenCoder(17, list(range(5, 31)))
+
+class _Instruction(Enum):
+ """One instruction in the binary asmap format."""
+ # A return instruction, encoded as [0], returns a constant ASN. It is followed by
+ # an integer using the ASN encoding.
+ RETURN = 0
+ # A jump instruction, encoded as [1,0] inspects the next unused bit in the input
+ # and either continues execution (if 0), or skips a specified number of bits (if 1).
+ # It is followed by an integer, and then two subprograms. The integer uses jump encoding
+ # and corresponds to the length of the first subprogram (so it can be skipped).
+ JUMP = 1
+ # A match instruction, encoded as [1,1,0] inspects 1 or more of the next unused bits
+ # in the input with its argument. If they all match, execution continues. If they do
+ # not, failure is returned. If a default instruction has been executed before, instead
+ # of failure the default instruction's argument is returned. It is followed by an
+ # integer in match encoding, and a subprogram. That value is at least 2 bits and at
+ # most 9 bits. An n-bit value signifies matching (n-1) bits in the input with the lower
+ # (n-1) bits in the match value.
+ MATCH = 2
+ # A default instruction, encoded as [1,1,1] sets the default variable to its argument,
+ # and continues execution. It is followed by an integer in ASN encoding, and a subprogram.
+ DEFAULT = 3
+ # Not an actual instruction, but a way to encode the empty program that fails. In the
+ # encoder, it is used more generally to represent the failure case inside MATCH instructions,
+ # which may (if used inside the context of a DEFAULT instruction) actually correspond to
+ # a succesful return. In this usage, they're always converted to an actual MATCH or RETURN
+ # before the top level is reached (see make_default below).
+ END = 4
+
+class _BinNode:
+ """A class representing a (node of) the parsed binary asmap format."""
+
+ @overload
+ def __init__(self, ins: _Instruction): ...
+ @overload
+ def __init__(self, ins: _Instruction, arg1: int): ...
+ @overload
+ def __init__(self, ins: _Instruction, arg1: "_BinNode", arg2: "_BinNode"): ...
+ @overload
+ def __init__(self, ins: _Instruction, arg1: int, arg2: "_BinNode"): ...
+
+ def __init__(self, ins: _Instruction, arg1=None, arg2=None):
+ """
+ Construct a new asmap node. Possibilities are:
+ - _BinNode(_Instruction.RETURN, asn)
+ - _BinNode(_Instruction.JUMP, node_0, node_1)
+ - _BinNode(_Instruction.MATCH, val, node)
+ - _BinNode(_Instruction.DEFAULT, asn, node)
+ - _BinNode(_Instruction.END)
+ """
+ self.ins = ins
+ self.arg1 = arg1
+ self.arg2 = arg2
+ if ins == _Instruction.RETURN:
+ assert isinstance(arg1, int)
+ assert arg2 is None
+ self.size = _CODER_INS.encode_size(ins.value) + _CODER_ASN.encode_size(arg1)
+ elif ins == _Instruction.JUMP:
+ assert isinstance(arg1, _BinNode)
+ assert isinstance(arg2, _BinNode)
+ self.size = (_CODER_INS.encode_size(ins.value) + _CODER_JUMP.encode_size(arg1.size) +
+ arg1.size + arg2.size)
+ elif ins == _Instruction.DEFAULT:
+ assert isinstance(arg1, int)
+ assert isinstance(arg2, _BinNode)
+ self.size = _CODER_INS.encode_size(ins.value) + _CODER_ASN.encode_size(arg1) + arg2.size
+ elif ins == _Instruction.MATCH:
+ assert isinstance(arg1, int)
+ assert isinstance(arg2, _BinNode)
+ self.size = (_CODER_INS.encode_size(ins.value) + _CODER_MATCH.encode_size(arg1)
+ + arg2.size)
+ elif ins == _Instruction.END:
+ assert arg1 is None
+ assert arg2 is None
+ self.size = 0
+ else:
+ assert False
+
+ @staticmethod
+ def make_end() -> "_BinNode":
+ """Constructor for a _BinNode with just an END instruction."""
+ return _BinNode(_Instruction.END)
+
+ @staticmethod
+ def make_leaf(val: int) -> "_BinNode":
+ """Constructor for a _BinNode of just a RETURN instruction."""
+ assert val is not None and val > 0
+ return _BinNode(_Instruction.RETURN, val)
+
+ @staticmethod
+ def make_branch(node0: "_BinNode", node1: "_BinNode") -> "_BinNode":
+ """
+ Construct a _BinNode corresponding to running either the node0 or node1 subprogram,
+ based on the next input bit. It exploits shortcuts that are possible in the encoding,
+ and uses either a JUMP, MATCH, or END instruction.
+ """
+ if node0.ins == _Instruction.END and node1.ins == _Instruction.END:
+ return node0
+ if node0.ins == _Instruction.END:
+ if node1.ins == _Instruction.MATCH and node1.arg1 <= 0xFF:
+ return _BinNode(node1.ins, node1.arg1 + (1 << node1.arg1.bit_length()), node1.arg2)
+ return _BinNode(_Instruction.MATCH, 3, node1)
+ if node1.ins == _Instruction.END:
+ if node0.ins == _Instruction.MATCH and node0.arg1 <= 0xFF:
+ return _BinNode(node0.ins, node0.arg1 + (1 << (node0.arg1.bit_length() - 1)),
+ node0.arg2)
+ return _BinNode(_Instruction.MATCH, 2, node0)
+ return _BinNode(_Instruction.JUMP, node0, node1)
+
+ @staticmethod
+ def make_default(val: int, sub: "_BinNode") -> "_BinNode":
+ """
+ Construct a _BinNode that corresponds to the specified subprogram, with the specified
+ default value. It exploits shortcuts that are possible in the encoding, and will use
+ either a DEFAULT or a RETURN instruction."""
+ assert val is not None and val > 0
+ if sub.ins == _Instruction.END:
+ return _BinNode(_Instruction.RETURN, val)
+ if sub.ins in (_Instruction.RETURN, _Instruction.DEFAULT):
+ return sub
+ return _BinNode(_Instruction.DEFAULT, val, sub)
+
+@total_ordering
+class ASMap:
+ """
+ A class whose objects represent a mapping from subnets to ASNs.
+
+ Internally the mapping is stored as a binary trie, but can be converted
+ from/to a list of ASNEntry objects, and from/to the binary asmap file format.
+
+ In the trie representation, nodes are represented as bare lists for efficiency
+ and ease of manipulation:
+ - [0] means an unassigned subnet (no ASN mapping for it is present)
+ - [int] means a subnet mapped entirely to the specified ASN.
+ - [node,node] means a subnet whose lower half and upper half have different
+ - mappings, represented by new trie nodes.
+ """
+
+ def update(self, prefix: List[bool], asn: int) -> None:
+ """Update this ASMap object to map prefix to the specified asn."""
+ assert asn == 0 or _CODER_ASN.can_encode(asn)
+
+ def recurse(node: List, offset: int) -> None:
+ if offset == len(prefix):
+ # Reached the end of prefix; overwrite this node.
+ node.clear()
+ node.append(asn)
+ return
+ if len(node) == 1:
+ # Need to descend into a leaf node; split it up.
+ oldasn = node[0]
+ node.clear()
+ node.append([oldasn])
+ node.append([oldasn])
+ # Descend into the node.
+ recurse(node[prefix[offset]], offset + 1)
+ # If the result is two identical leaf children, merge them.
+ if len(node[0]) == 1 and len(node[1]) == 1 and node[0] == node[1]:
+ oldasn = node[0][0]
+ node.clear()
+ node.append(oldasn)
+ recurse(self._trie, 0)
+
+ def update_multi(self, entries: List[Tuple[List[bool], int]]) -> None:
+ """Apply multiple update operations, where longer prefixes take precedence."""
+ entries.sort(key=lambda entry: len(entry[0]))
+ for prefix, asn in entries:
+ self.update(prefix, asn)
+
+ def _set_trie(self, trie) -> None:
+ """Set trie directly. Internal use only."""
+ def recurse(node: List) -> None:
+ if len(node) < 2:
+ return
+ recurse(node[0])
+ recurse(node[1])
+ if len(node[0]) == 2:
+ return
+ if node[0] == node[1]:
+ if len(node[0]) == 0:
+ node.clear()
+ else:
+ asn = node[0][0]
+ node.clear()
+ node.append(asn)
+ recurse(trie)
+ self._trie = trie
+
+ def __init__(self, entries: Optional[Iterable[ASNEntry]] = None) -> None:
+ """Construct an ASMap object from an optional list of entries."""
+ self._trie = [0]
+ if entries is not None:
+ def entry_key(entry):
+ """Sort function that places shorter prefixes first."""
+ prefix, asn = entry
+ return len(prefix), prefix, asn
+ for prefix, asn in sorted(entries, key=entry_key):
+ self.update(prefix, asn)
+
+ def lookup(self, prefix: List[bool]) -> Optional[int]:
+ """Look up a prefix. Returns ASN, or 0 if unassigned, or None if indeterminate."""
+ node = self._trie
+ for bit in prefix:
+ if len(node) == 1:
+ break
+ node = node[bit]
+ if len(node) == 1:
+ return node[0]
+ return None
+
+ def _to_entries_flat(self, fill: bool = False) -> List[ASNEntry]:
+ """Convert an ASMap object to a list of non-overlapping (prefix, asn) objects."""
+ prefix : List[bool] = []
+
+ def recurse(node: List) -> List[ASNEntry]:
+ ret = []
+ if len(node) == 1:
+ if node[0] > 0:
+ ret = [(list(prefix), node[0])]
+ elif len(node) == 2:
+ prefix.append(False)
+ ret = recurse(node[0])
+ prefix[-1] = True
+ ret += recurse(node[1])
+ prefix.pop()
+ if fill and len(ret) > 1:
+ asns = set(x[1] for x in ret)
+ if len(asns) == 1:
+ ret = [(list(prefix), list(asns)[0])]
+ return ret
+ return recurse(self._trie)
+
+ def _to_entries_minimal(self, fill: bool = False) -> List[ASNEntry]:
+ """Convert a trie to a minimal list of ASNEntry objects, exploiting overlap."""
+ prefix : List[bool] = []
+
+ def recurse(node: List) -> (Tuple[Dict[Optional[int], List[ASNEntry]], bool]):
+ if len(node) == 1 and node[0] == 0:
+ return {None if fill else 0: []}, True
+ if len(node) == 1:
+ return {node[0]: [], None: [(list(prefix), node[0])]}, False
+ ret: Dict[Optional[int], List[ASNEntry]] = {}
+ prefix.append(False)
+ left, lhole = recurse(node[0])
+ prefix[-1] = True
+ right, rhole = recurse(node[1])
+ prefix.pop()
+ hole = not fill and (lhole or rhole)
+ def candidate(ctx: Optional[int], res0: Optional[List[ASNEntry]],
+ res1: Optional[List[ASNEntry]]):
+ if res0 is not None and res1 is not None:
+ if ctx not in ret or len(res0) + len(res1) < len(ret[ctx]):
+ ret[ctx] = res0 + res1
+ for ctx in set(left) | set(right):
+ candidate(ctx, left.get(ctx), right.get(ctx))
+ candidate(ctx, left.get(None), right.get(ctx))
+ candidate(ctx, left.get(ctx), right.get(None))
+ if not hole:
+ for ctx in list(ret):
+ if ctx is not None:
+ candidate(None, [(list(prefix), ctx)], ret[ctx])
+ if None in ret:
+ ret = {ctx:entries for ctx, entries in ret.items()
+ if ctx is None or len(entries) < len(ret[None])}
+ if hole:
+ ret = {ctx:entries for ctx, entries in ret.items() if ctx is None or ctx == 0}
+ return ret, hole
+ res, _ = recurse(self._trie)
+ return res[0] if 0 in res else res[None]
+
+ def __str__(self) -> str:
+ """Convert this ASMap object to a string containing Python code constructing it."""
+ return f"ASMap({self._trie})"
+
+ def to_entries(self, overlapping: bool = True, fill: bool = False) -> List[ASNEntry]:
+ """
+ Convert the mappings in this ASMap object to a list of ASNEntry objects.
+
+ Arguments:
+ overlapping: Permit the subnets in the resulting ASNEntry to overlap.
+ Setting this can result in a shorter list.
+ fill: Permit the resulting ASNEntry objects to cover subnets that
+ are unassigned in this ASMap object. Setting this can
+ result in a shorter list.
+ """
+ if overlapping:
+ return self._to_entries_minimal(fill)
+ return self._to_entries_flat(fill)
+
+ @staticmethod
+ def from_random(num_leaves: int = 10, max_asn: int = 6,
+ unassigned_prob: float = 0.5) -> "ASMap":
+ """
+ Construct a random ASMap object, with specified:
+ - Number of leaves in its trie (at least 1)
+ - Maximum ASN value (at least 1)
+ - Probability for leaf nodes to be unassigned
+
+ The number of leaves in the resulting object may be less than what is
+ requested. This method is mostly intended for testing.
+ """
+ assert num_leaves >= 1
+ assert max_asn >= 1 or unassigned_prob == 1
+ assert _CODER_ASN.can_encode(max_asn)
+ assert 0.0 <= unassigned_prob <= 1.0
+ trie: List = []
+ leaves = [trie]
+ ret = ASMap()
+ for i in range(1, num_leaves):
+ idx = random.randrange(i)
+ leaf = leaves[idx]
+ lastleaf = leaves.pop()
+ if idx + 1 < i:
+ leaves[idx] = lastleaf
+ leaf.append([])
+ leaf.append([])
+ leaves.append(leaf[0])
+ leaves.append(leaf[1])
+ for leaf in leaves:
+ if random.random() >= unassigned_prob:
+ leaf.append(random.randrange(1, max_asn + 1))
+ else:
+ leaf.append(0)
+ #pylint: disable=protected-access
+ ret._set_trie(trie)
+ return ret
+
+ def _to_binnode(self, fill: bool = False) -> _BinNode:
+ """Convert a trie to a _BinNode object."""
+ def recurse(node: List) -> Tuple[Dict[Optional[int], _BinNode], bool]:
+ if len(node) == 1 and node[0] == 0:
+ return {(None if fill else 0): _BinNode.make_end()}, True
+ if len(node) == 1:
+ return {None: _BinNode.make_leaf(node[0]), node[0]: _BinNode.make_end()}, False
+ ret: Dict[Optional[int], _BinNode] = {}
+ left, lhole = recurse(node[0])
+ right, rhole = recurse(node[1])
+ hole = (lhole or rhole) and not fill
+
+ def candidate(ctx: Optional[int], arg1, arg2, func: Callable):
+ if arg1 is not None and arg2 is not None:
+ cand = func(arg1, arg2)
+ if ctx not in ret or cand.size < ret[ctx].size:
+ ret[ctx] = cand
+
+ for ctx in set(left) | set(right):
+ candidate(ctx, left.get(ctx), right.get(ctx), _BinNode.make_branch)
+ candidate(ctx, left.get(None), right.get(ctx), _BinNode.make_branch)
+ candidate(ctx, left.get(ctx), right.get(None), _BinNode.make_branch)
+ if not hole:
+ for ctx in set(ret) - set([None]):
+ candidate(None, ctx, ret[ctx], _BinNode.make_default)
+ if None in ret:
+ ret = {ctx:enc for ctx, enc in ret.items()
+ if ctx is None or enc.size < ret[None].size}
+ if hole:
+ ret = {ctx:enc for ctx, enc in ret.items() if ctx is None or ctx == 0}
+ return ret, hole
+ res, _ = recurse(self._trie)
+ return res[0] if 0 in res else res[None]
+
+ @staticmethod
+ def _from_binnode(binnode: _BinNode) -> "ASMap":
+ """Construct an ASMap object from a _BinNode. Internal use only."""
+ def recurse(node: _BinNode, default: int) -> List:
+ if node.ins == _Instruction.RETURN:
+ return [node.arg1]
+ if node.ins == _Instruction.JUMP:
+ return [recurse(node.arg1, default), recurse(node.arg2, default)]
+ if node.ins == _Instruction.MATCH:
+ val = node.arg1
+ sub = recurse(node.arg2, default)
+ while val >= 2:
+ bit = val & 1
+ val >>= 1
+ if bit:
+ sub = [[default], sub]
+ else:
+ sub = [sub, [default]]
+ return sub
+ assert node.ins == _Instruction.DEFAULT
+ return recurse(node.arg2, node.arg1)
+ ret = ASMap()
+ if binnode.ins != _Instruction.END:
+ #pylint: disable=protected-access
+ ret._set_trie(recurse(binnode, 0))
+ return ret
+
+ def to_binary(self, fill: bool = False) -> bytes:
+ """
+ Convert this ASMap object to binary.
+
+ Argument:
+ fill: permit the resulting binary encoder to contain mappers for
+ unassigned subnets in this ASMap object. Doing so may
+ reduce the size of the encoding.
+ Returns:
+ A bytes object with the encoding of this ASMap object.
+ """
+ bits: List[int] = []
+
+ def recurse(node: _BinNode) -> None:
+ _CODER_INS.encode(node.ins.value, bits)
+ if node.ins == _Instruction.RETURN:
+ _CODER_ASN.encode(node.arg1, bits)
+ elif node.ins == _Instruction.JUMP:
+ _CODER_JUMP.encode(node.arg1.size, bits)
+ recurse(node.arg1)
+ recurse(node.arg2)
+ elif node.ins == _Instruction.DEFAULT:
+ _CODER_ASN.encode(node.arg1, bits)
+ recurse(node.arg2)
+ else:
+ assert node.ins == _Instruction.MATCH
+ _CODER_MATCH.encode(node.arg1, bits)
+ recurse(node.arg2)
+
+ binnode = self._to_binnode(fill)
+ if binnode.ins != _Instruction.END:
+ recurse(binnode)
+
+ val = 0
+ nbits = 0
+ ret = []
+ for bit in bits:
+ val += (bit << nbits)
+ nbits += 1
+ if nbits == 8:
+ ret.append(val)
+ val = 0
+ nbits = 0
+ if nbits:
+ ret.append(val)
+ return bytes(ret)
+
+ @staticmethod
+ def from_binary(bindata: bytes) -> Optional["ASMap"]:
+ """Decode an ASMap object from the provided binary encoding."""
+
+ bits: List[int] = []
+ for byte in bindata:
+ bits.extend((byte >> i) & 1 for i in range(8))
+
+ def recurse(bitpos: int) -> Tuple[_BinNode, int]:
+ insval, bitpos = _CODER_INS.decode(bits, bitpos)
+ ins = _Instruction(insval)
+ if ins == _Instruction.RETURN:
+ asn, bitpos = _CODER_ASN.decode(bits, bitpos)
+ return _BinNode(ins, asn), bitpos
+ if ins == _Instruction.JUMP:
+ jump, bitpos = _CODER_JUMP.decode(bits, bitpos)
+ left, bitpos1 = recurse(bitpos)
+ if bitpos1 != bitpos + jump:
+ raise ValueError("Inconsistent jump")
+ right, bitpos = recurse(bitpos1)
+ return _BinNode(ins, left, right), bitpos
+ if ins == _Instruction.MATCH:
+ match, bitpos = _CODER_MATCH.decode(bits, bitpos)
+ sub, bitpos = recurse(bitpos)
+ return _BinNode(ins, match, sub), bitpos
+ assert ins == _Instruction.DEFAULT
+ asn, bitpos = _CODER_ASN.decode(bits, bitpos)
+ sub, bitpos = recurse(bitpos)
+ return _BinNode(ins, asn, sub), bitpos
+
+ if len(bits) == 0:
+ binnode = _BinNode(_Instruction.END)
+ else:
+ try:
+ binnode, bitpos = recurse(0)
+ except (ValueError, IndexError):
+ return None
+ if bitpos < len(bits) - 7:
+ return None
+ if not all(bit == 0 for bit in bits[bitpos:]):
+ return None
+
+ return ASMap._from_binnode(binnode)
+
+ def __lt__(self, other: "ASMap") -> bool:
+ return self._trie < other._trie
+
+ def __eq__(self, other: object) -> bool:
+ if isinstance(other, ASMap):
+ return self._trie == other._trie
+ return False
+
+ def extends(self, req: "ASMap") -> bool:
+ """Determine whether this matches req for all subranges where req is assigned."""
+ def recurse(actual: List, require: List) -> bool:
+ if len(require) == 1 and require[0] == 0:
+ return True
+ if len(require) == 1:
+ if len(actual) == 1:
+ return bool(require[0] == actual[0])
+ return recurse(actual[0], require) and recurse(actual[1], require)
+ if len(actual) == 2:
+ return recurse(actual[0], require[0]) and recurse(actual[1], require[1])
+ return recurse(actual, require[0]) and recurse(actual, require[1])
+ assert isinstance(req, ASMap)
+ #pylint: disable=protected-access
+ return recurse(self._trie, req._trie)
+
+ def diff(self, other: "ASMap") -> List[ASNDiff]:
+ """Compute the diff from self to other."""
+ prefix: List[bool] = []
+ ret: List[ASNDiff] = []
+
+ def recurse(old_node: List, new_node: List):
+ if len(old_node) == 1 and len(new_node) == 1:
+ if old_node[0] != new_node[0]:
+ ret.append((list(prefix), old_node[0], new_node[0]))
+ else:
+ old_left: List = old_node if len(old_node) == 1 else old_node[0]
+ old_right: List = old_node if len(old_node) == 1 else old_node[1]
+ new_left: List = new_node if len(new_node) == 1 else new_node[0]
+ new_right: List = new_node if len(new_node) == 1 else new_node[1]
+ prefix.append(False)
+ recurse(old_left, new_left)
+ prefix[-1] = True
+ recurse(old_right, new_right)
+ prefix.pop()
+ assert isinstance(other, ASMap)
+ #pylint: disable=protected-access
+ recurse(self._trie, other._trie)
+ return ret
+
+ def __copy__(self) -> "ASMap":
+ """Construct a copy of this ASMap object. Its state will not be shared."""
+ ret = ASMap()
+ #pylint: disable=protected-access
+ ret._set_trie(copy.deepcopy(self._trie))
+ return ret
+
+ def __deepcopy__(self, _) -> "ASMap":
+ # ASMap objects do not allow sharing of the _trie member, so we don't need the memoization.
+ return self.__copy__()
+
+
+class TestASMap(unittest.TestCase):
+ """Unit tests for this module."""
+
+ def test_ipv6_prefix_roundtrips(self) -> None:
+ """Test that random IPv6 network ranges roundtrip through prefix encoding."""
+ for _ in range(20):
+ net_bits = random.getrandbits(128)
+ for prefix_len in range(0, 129):
+ masked_bits = (net_bits >> (128 - prefix_len)) << (128 - prefix_len)
+ net = ipaddress.IPv6Network((masked_bits.to_bytes(16, 'big'), prefix_len))
+ prefix = net_to_prefix(net)
+ self.assertTrue(len(prefix) <= 128)
+ net2 = prefix_to_net(prefix)
+ self.assertEqual(net, net2)
+
+ def test_ipv4_prefix_roundtrips(self) -> None:
+ """Test that random IPv4 network ranges roundtrip through prefix encoding."""
+ for _ in range(100):
+ net_bits = random.getrandbits(32)
+ for prefix_len in range(0, 33):
+ masked_bits = (net_bits >> (32 - prefix_len)) << (32 - prefix_len)
+ net = ipaddress.IPv4Network((masked_bits.to_bytes(4, 'big'), prefix_len))
+ prefix = net_to_prefix(net)
+ self.assertTrue(32 <= len(prefix) <= 128)
+ net2 = prefix_to_net(prefix)
+ self.assertEqual(net, net2)
+
+ def test_asmap_roundtrips(self) -> None:
+ """Test case that verifies random ASMap objects roundtrip to/from entries/binary."""
+ # Iterate over the number of leaves the random test ASMap objects have.
+ for leaves in range(1, 20):
+ # Iterate over the number of bits in the AS numbers used.
+ for asnbits in range(0, 24):
+ # Iterate over the probability that leaves are unassigned.
+ for pct in range(101):
+ # Construct a random ASMap object according to the above parameters.
+ asmap = ASMap.from_random(num_leaves=leaves, max_asn=1 + (1 << asnbits),
+ unassigned_prob=0.01 * pct)
+ # Run tests for to_entries and construction from those entries, both
+ # for overlapping and non-overlapping ones.
+ for overlapping in [False, True]:
+ entries = asmap.to_entries(overlapping=overlapping, fill=False)
+ random.shuffle(entries)
+ asmap2 = ASMap(entries)
+ assert asmap2 is not None
+ self.assertEqual(asmap2, asmap)
+ entries = asmap.to_entries(overlapping=overlapping, fill=True)
+ random.shuffle(entries)
+ asmap2 = ASMap(entries)
+ assert asmap2 is not None
+ self.assertTrue(asmap2.extends(asmap))
+
+ # Run tests for to_binary and construction from binary.
+ enc = asmap.to_binary(fill=False)
+ asmap3 = ASMap.from_binary(enc)
+ assert asmap3 is not None
+ self.assertEqual(asmap3, asmap)
+ enc = asmap.to_binary(fill=True)
+ asmap3 = ASMap.from_binary(enc)
+ assert asmap3 is not None
+ self.assertTrue(asmap3.extends(asmap))
+
+ def test_patching(self) -> None:
+ """Test behavior of update, lookup, extends, and diff."""
+ #pylint: disable=too-many-locals,too-many-nested-blocks
+ # Iterate over the number of leaves the random test ASMap objects have.
+ for leaves in range(1, 20):
+ # Iterate over the number of bits in the AS numbers used.
+ for asnbits in range(0, 10):
+ # Iterate over the probability that leaves are unassigned.
+ for pct in range(0, 101):
+ # Construct a random ASMap object according to the above parameters.
+ asmap = ASMap.from_random(num_leaves=leaves, max_asn=1 + (1 << asnbits),
+ unassigned_prob=0.01 * pct)
+ # Make a copy of that asmap object to which patches will be applied.
+ # It starts off being equal to asmap.
+ patched = copy.copy(asmap)
+ # Keep a list of patches performed.
+ patches: List[ASNEntry] = []
+ # Initially there cannot be any difference.
+ self.assertEqual(asmap.diff(patched), [])
+ # Make 5 patches, each building on top of the previous ones.
+ for _ in range(0, 5):
+ # Construct a random path and new ASN to assign it to, apply it to patched,
+ # and remember it in patches.
+ pathlen = random.randrange(5)
+ path = [random.getrandbits(1) != 0 for _ in range(pathlen)]
+ newasn = random.randrange(1 + (1 << asnbits))
+ patched.update(path, newasn)
+ patches = [(path, newasn)] + patches
+
+ # Compute the diff, and whether asmap extends patched, and the other way
+ # around.
+ diff = asmap.diff(patched)
+ self.assertEqual(asmap == patched, len(diff) == 0)
+ extends = asmap.extends(patched)
+ back_extends = patched.extends(asmap)
+ # Determine whether those extends results are consistent with the diff
+ # result.
+ self.assertEqual(extends, all(d[2] == 0 for d in diff))
+ self.assertEqual(back_extends, all(d[1] == 0 for d in diff))
+ # For every diff found:
+ for path, old_asn, new_asn in diff:
+ # Verify asmap and patched actually differ there.
+ self.assertTrue(old_asn != new_asn)
+ self.assertEqual(asmap.lookup(path), old_asn)
+ self.assertEqual(patched.lookup(path), new_asn)
+ for _ in range(2):
+ # Extend the path far enough that it's smaller than any mapped
+ # range, and check the lookup holds there too.
+ spec_path = list(path)
+ while len(spec_path) < 32:
+ spec_path.append(random.getrandbits(1) != 0)
+ self.assertEqual(asmap.lookup(spec_path), old_asn)
+ self.assertEqual(patched.lookup(spec_path), new_asn)
+ # Search through the list of performed patches to find the last one
+ # applying to the extended path (note that patches is in reverse
+ # order, so the first match should work).
+ found = False
+ for patch_path, patch_asn in patches:
+ if spec_path[:len(patch_path)] == patch_path:
+ # When found, it must match whatever the result was patched
+ # to.
+ self.assertEqual(new_asn, patch_asn)
+ found = True
+ break
+ # And such a patch must exist.
+ self.assertTrue(found)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/contrib/seeds/makeseeds.py b/contrib/seeds/makeseeds.py
index 78eb04a836..37c6f5fd7c 100755
--- a/contrib/seeds/makeseeds.py
+++ b/contrib/seeds/makeseeds.py
@@ -6,12 +6,15 @@
# Generate seeds.txt from Pieter's DNS seeder
#
+import argparse
+import ipaddress
import re
import sys
-import dns.resolver
import collections
from typing import List, Dict, Union
+from asmap import ASMap, net_to_prefix
+
NSEEDS=512
MAX_SEEDS_PER_ASN = {
@@ -45,7 +48,7 @@ def parseline(line: str) -> Union[dict, None]:
sline = line.split()
if len(sline) < 11:
# line too short to be valid, skip it.
- return None
+ return None
m = PATTERN_IPV4.match(sline[0])
sortkey = None
ip = None
@@ -123,34 +126,8 @@ def filtermultiport(ips: List[Dict]) -> List[Dict]:
hist[ip['sortkey']].append(ip)
return [value[0] for (key,value) in list(hist.items()) if len(value)==1]
-def lookup_asn(net: str, ip: str) -> Union[int, None]:
- """ Look up the asn for an `ip` address by querying cymru.com
- on network `net` (e.g. ipv4 or ipv6).
-
- Returns in integer ASN or None if it could not be found.
- """
- try:
- if net == 'ipv4':
- ipaddr = ip
- prefix = '.origin'
- else: # http://www.team-cymru.com/IP-ASN-mapping.html
- res = str() # 2001:4860:b002:23::68
- for nb in ip.split(':')[:4]: # pick the first 4 nibbles
- for c in nb.zfill(4): # right padded with '0'
- res += c + '.' # 2001 4860 b002 0023
- ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
- prefix = '.origin6'
-
- asn = int([x.to_text() for x in dns.resolver.resolve('.'.join(
- reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com',
- 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
- return asn
- except Exception as e:
- sys.stderr.write(f'ERR: Could not resolve ASN for "{ip}": {e}\n')
- return None
-
# Based on Greg Maxwell's seed_filter.py
-def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
+def filterbyasn(asmap: ASMap, ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]:
""" Prunes `ips` by
(a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and
(b) trimming ips to have at most `max_per_asn` ips from each asn in each net.
@@ -165,21 +142,18 @@ def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Di
asn_count: Dict[int, int] = collections.defaultdict(int)
for i, ip in enumerate(ips_ipv46):
- if i % 10 == 0:
- # give progress update
- print(f"{i:6d}/{len(ips_ipv46)} [{100*i/len(ips_ipv46):04.1f}%]\r", file=sys.stderr, end='', flush=True)
-
if net_count[ip['net']] == max_per_net:
# do not add this ip as we already too many
# ips from this network
continue
- asn = lookup_asn(ip['net'], ip['ip'])
- if asn is None or asn_count[asn] == max_per_asn[ip['net']]:
+ asn = asmap.lookup(net_to_prefix(ipaddress.ip_network(ip['ip'])))
+ if not asn or asn_count[ip['net'], asn] == max_per_asn[ip['net']]:
# do not add this ip as we already have too many
# ips from this ASN on this network
continue
- asn_count[asn] += 1
+ asn_count[ip['net'], asn] += 1
net_count[ip['net']] += 1
+ ip['asn'] = asn
result.append(ip)
# Add back Onions (up to max_per_net)
@@ -195,9 +169,23 @@ def ip_stats(ips: List[Dict]) -> str:
return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}"
+def parse_args():
+ argparser = argparse.ArgumentParser(description='Generate a list of bitcoin node seed ip addresses.')
+ argparser.add_argument("-a","--asmap", help='the location of the asmap asn database file (required)', required=True)
+ return argparser.parse_args()
+
def main():
+ args = parse_args()
+
+ print(f'Loading asmap database "{args.asmap}"…', end='', file=sys.stderr, flush=True)
+ with open(args.asmap, 'rb') as f:
+ asmap = ASMap.from_binary(f.read())
+ print('Done.', file=sys.stderr)
+
+ print('Loading and parsing DNS seeds…', end='', file=sys.stderr, flush=True)
lines = sys.stdin.readlines()
ips = [parseline(line) for line in lines]
+ print('Done.', file=sys.stderr)
print('\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', file=sys.stderr)
print(f'{ip_stats(ips):s} Initial', file=sys.stderr)
@@ -230,15 +218,18 @@ def main():
ips = filtermultiport(ips)
print(f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', file=sys.stderr)
# Look up ASNs and limit results, both per ASN and globally.
- ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
+ ips = filterbyasn(asmap, ips, MAX_SEEDS_PER_ASN, NSEEDS)
print(f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', file=sys.stderr)
# Sort the results by IP address (for deterministic output).
ips.sort(key=lambda x: (x['net'], x['sortkey']))
for ip in ips:
if ip['net'] == 'ipv6':
- print('[%s]:%i' % (ip['ip'], ip['port']))
+ print(f"[{ip['ip']}]:{ip['port']}", end="")
else:
- print('%s:%i' % (ip['ip'], ip['port']))
+ print(f"{ip['ip']}:{ip['port']}", end="")
+ if 'asn' in ip:
+ print(f" # AS{ip['asn']}", end="")
+ print()
if __name__ == '__main__':
main()