From 58180b5fd467ff15e7170145d77c84f13ccafe6e Mon Sep 17 00:00:00 2001
From: James O'Beirne <james.obeirne@gmail.com>
Date: Fri, 19 Oct 2018 12:28:47 -0400
Subject: tests: add utility to easily profile node performance with perf

Introduces `TestNode.profile_with_perf()` context manager which
samples node execution to produce profiling data.

Also introduces a test framework flag, `--perf`, which will run
perf on all nodes for the duration of a given test.
---
 test/functional/test_framework/test_framework.py | 16 +++-
 test/functional/test_framework/test_node.py      | 98 +++++++++++++++++++++++-
 2 files changed, 111 insertions(+), 3 deletions(-)

(limited to 'test/functional/test_framework')

diff --git a/test/functional/test_framework/test_framework.py b/test/functional/test_framework/test_framework.py
index 352fa32b5b..240d9ce87c 100755
--- a/test/functional/test_framework/test_framework.py
+++ b/test/functional/test_framework/test_framework.py
@@ -128,6 +128,8 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
                             help="Attach a python debugger if test fails")
         parser.add_argument("--usecli", dest="usecli", default=False, action="store_true",
                             help="use bitcoin-cli instead of RPC for all commands")
+        parser.add_argument("--perf", dest="perf", default=False, action="store_true",
+                            help="profile running nodes with perf for the duration of the test")
         self.add_options(parser)
         self.options = parser.parse_args()
 
@@ -201,11 +203,20 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
                 node.cleanup_on_exit = False
             self.log.info("Note: bitcoinds were not stopped and may still be running")
 
-        if not self.options.nocleanup and not self.options.noshutdown and success != TestStatus.FAILED:
+        should_clean_up = (
+            not self.options.nocleanup and
+            not self.options.noshutdown and
+            success != TestStatus.FAILED and
+            not self.options.perf
+        )
+        if should_clean_up:
             self.log.info("Cleaning up {} on exit".format(self.options.tmpdir))
             cleanup_tree_on_exit = True
+        elif self.options.perf:
+            self.log.warning("Not cleaning up dir {} due to perf data".format(self.options.tmpdir))
+            cleanup_tree_on_exit = False
         else:
-            self.log.warning("Not cleaning up dir %s" % self.options.tmpdir)
+            self.log.warning("Not cleaning up dir {}".format(self.options.tmpdir))
             cleanup_tree_on_exit = False
 
         if success == TestStatus.PASSED:
@@ -309,6 +320,7 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
                 extra_conf=extra_confs[i],
                 extra_args=extra_args[i],
                 use_cli=self.options.usecli,
+                start_perf=self.options.perf,
             ))
 
     def start_node(self, i, *args, **kwargs):
diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py
index 031a8824b1..14d37e7220 100755
--- a/test/functional/test_framework/test_node.py
+++ b/test/functional/test_framework/test_node.py
@@ -18,6 +18,8 @@ import tempfile
 import time
 import urllib.parse
 import collections
+import shlex
+import sys
 
 from .authproxy import JSONRPCException
 from .util import (
@@ -59,7 +61,13 @@ class TestNode():
     To make things easier for the test writer, any unrecognised messages will
     be dispatched to the RPC connection."""
 
-    def __init__(self, i, datadir, *, rpchost, timewait, bitcoind, bitcoin_cli, mocktime, coverage_dir, extra_conf=None, extra_args=None, use_cli=False):
+    def __init__(self, i, datadir, *, rpchost, timewait, bitcoind, bitcoin_cli, mocktime, coverage_dir, extra_conf=None, extra_args=None, use_cli=False, start_perf=False):
+        """
+        Kwargs:
+            start_perf (bool): If True, begin profiling the node with `perf` as soon as
+                the node starts.
+        """
+
         self.index = i
         self.datadir = datadir
         self.stdout_dir = os.path.join(self.datadir, "stdout")
@@ -87,6 +95,7 @@ class TestNode():
 
         self.cli = TestNodeCLI(bitcoin_cli, self.datadir)
         self.use_cli = use_cli
+        self.start_perf = start_perf
 
         self.running = False
         self.process = None
@@ -95,6 +104,8 @@ class TestNode():
         self.url = None
         self.log = logging.getLogger('TestFramework.node%d' % i)
         self.cleanup_on_exit = True # Whether to kill the node when this object goes away
+        # Cache perf subprocesses here by their data output filename.
+        self.perf_subprocesses = {}
 
         self.p2ps = []
 
@@ -186,6 +197,9 @@ class TestNode():
         self.running = True
         self.log.debug("bitcoind started, waiting for RPC to come up")
 
+        if self.start_perf:
+            self._start_perf()
+
     def wait_for_rpc_connection(self):
         """Sets up an RPC connection to the bitcoind process. Returns False if unable to connect."""
         # Poll at a rate of four times per second
@@ -238,6 +252,10 @@ class TestNode():
         except http.client.CannotSendRequest:
             self.log.exception("Unable to stop node.")
 
+        # If there are any running perf processes, stop them.
+        for profile_name in tuple(self.perf_subprocesses.keys()):
+            self._stop_perf(profile_name)
+
         # Check that stderr is as expected
         self.stderr.seek(0)
         stderr = self.stderr.read().decode('utf-8').strip()
@@ -317,6 +335,84 @@ class TestNode():
                     increase_allowed * 100, before_memory_usage, after_memory_usage,
                     perc_increase_memory_usage * 100))
 
+    @contextlib.contextmanager
+    def profile_with_perf(self, profile_name):
+        """
+        Context manager that allows easy profiling of node activity using `perf`.
+
+        See `test/functional/README.md` for details on perf usage.
+
+        Args:
+            profile_name (str): This string will be appended to the
+                profile data filename generated by perf.
+        """
+        subp = self._start_perf(profile_name)
+
+        yield
+
+        if subp:
+            self._stop_perf(profile_name)
+
+    def _start_perf(self, profile_name=None):
+        """Start a perf process to profile this node.
+
+        Returns the subprocess running perf."""
+        subp = None
+
+        def test_success(cmd):
+            return subprocess.call(
+                # shell=True required for pipe use below
+                cmd, shell=True,
+                stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) == 0
+
+        if not sys.platform.startswith('linux'):
+            self.log.warning("Can't profile with perf; only availabe on Linux platforms")
+            return None
+
+        if not test_success('which perf'):
+            self.log.warning("Can't profile with perf; must install perf-tools")
+            return None
+
+        if not test_success('readelf -S {} | grep .debug_str'.format(shlex.quote(self.binary))):
+            self.log.warning(
+                "perf output won't be very useful without debug symbols compiled into bitcoind")
+
+        output_path = tempfile.NamedTemporaryFile(
+            dir=self.datadir,
+            prefix="{}.perf.data.".format(profile_name or 'test'),
+            delete=False,
+        ).name
+
+        cmd = [
+            'perf', 'record',
+            '-g',                     # Record the callgraph.
+            '--call-graph', 'dwarf',  # Compatibility for gcc's --fomit-frame-pointer.
+            '-F', '101',              # Sampling frequency in Hz.
+            '-p', str(self.process.pid),
+            '-o', output_path,
+        ]
+        subp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        self.perf_subprocesses[profile_name] = subp
+
+        return subp
+
+    def _stop_perf(self, profile_name):
+        """Stop (and pop) a perf subprocess."""
+        subp = self.perf_subprocesses.pop(profile_name)
+        output_path = subp.args[subp.args.index('-o') + 1]
+
+        subp.terminate()
+        subp.wait(timeout=10)
+
+        stderr = subp.stderr.read().decode()
+        if 'Consider tweaking /proc/sys/kernel/perf_event_paranoid' in stderr:
+            self.log.warning(
+                "perf couldn't collect data! Try "
+                "'sudo sysctl -w kernel.perf_event_paranoid=-1'")
+        else:
+            report_cmd = "perf report -i {}".format(output_path)
+            self.log.info("See perf output by running '{}'".format(report_cmd))
+
     def assert_start_raises_init_error(self, extra_args=None, expected_msg=None, match=ErrorMatch.FULL_TEXT, *args, **kwargs):
         """Attempt to start the node and expect it to raise an error.
 
-- 
cgit v1.2.3