In commit “test: Kill process if stop-RPC is not available” (4ab7879ec6b71804cc3a51bcf58fa6318fa103b0)
This change doesn’t seem safe. The stop_node method can be called by arbitrary test code at arbitrary times, including when self.rpc_connected
is False, and I don’t think it should be suppressing errors unless the caller has explicitly requested that errors should not be passed on.
Would suggest narrowing the error suppression behavior, so errors and bugs will be less likely to go unnoticed because of this change:
0--- a/test/functional/feature_framework_stop_node.py
1+++ b/test/functional/feature_framework_stop_node.py
2@@ -42,7 +42,7 @@ class FeatureFrameworkStopNode(BitcoinTestFramework):
3 assert self.nodes[0].running, \
4 "The process should still be flagged as running before calling stop_node()"
5 self.log.debug("Explicitly stopping the node to verify it completes cleanly during the test")
6- self.nodes[0].stop_node(expected_stderr=invalid_arg_error)
7+ self.nodes[0].stop_node(expected_stderr=invalid_arg_error, force=True)
8
9 if __name__ == '__main__':
10 FeatureFrameworkStopNode(__file__).main()
11--- a/test/functional/test_framework/test_framework.py
12+++ b/test/functional/test_framework/test_framework.py
13@@ -315,7 +315,7 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
14 if not self.options.noshutdown:
15 self.log.info("Stopping nodes")
16 if self.nodes:
17- self.stop_nodes()
18+ self.stop_nodes(force=self.success != TestStatus.PASSED)
19 else:
20 for node in self.nodes:
21 node.cleanup_on_exit = False
22@@ -569,7 +569,7 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
23 node.wait_for_rpc_connection()
24 except Exception:
25 # If one node failed to start, stop the others
26- self.stop_nodes()
27+ self.stop_nodes(force=True)
28 raise
29
30 if self.options.coveragedir is not None:
31@@ -580,11 +580,11 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
32 """Stop a bitcoind test node"""
33 self.nodes[i].stop_node(expected_stderr, wait=wait)
34
35- def stop_nodes(self, wait=0):
36+ def stop_nodes(self, wait=0, force=False):
37 """Stop multiple bitcoind test nodes"""
38 for node in self.nodes:
39 # Issue RPC to stop nodes
40- node.stop_node(wait=wait, wait_until_stopped=False)
41+ node.stop_node(wait=wait, wait_until_stopped=False, force=force)
42
43 for node in self.nodes:
44 # Wait for nodes to stop
45--- a/test/functional/test_framework/test_node.py
46+++ b/test/functional/test_framework/test_node.py
47@@ -393,12 +393,30 @@ class TestNode():
48 def version_is_at_least(self, ver):
49 return self.version is None or self.version >= ver
50
51- def stop_node(self, expected_stderr='', *, wait=0, wait_until_stopped=True):
52- """Stop the node."""
53+ def stop_node(self, expected_stderr='', *, wait=0, wait_until_stopped=True, force=False):
54+ """Stop the node.
55+
56+ Args:
57+ force (bool): Stop the node forcefully if necessary, and change
58+ errors that would otherwise be exceptions into logged warnings.
59+
60+ Notes:
61+ - The force argument should only be set to true when a test has
62+ already failed, and the caller is already throwing an exception of
63+ its own, and wants its own exception to take precedence over errors
64+ from this function. Since the error that happened was presumably
65+ unknown and unexpected, the node may have already crashed or may
66+ need to be shut down forcefully.
67+ """
68 if not self.running:
69 return
70 self.log.debug("Stopping node")
71- if self.rpc_connected:
72+
73+ # If self.running is true but self.rpc_connected is not true, it means
74+ # that the process has been started, but wait_for_rpc_connection() was
75+ # not able to make a successful connection. In this case, if force is
76+ # true, avoid trying to make another RPC call and just stop the process.
77+ if not force or self.rpc_connected:
78 try:
79 # Do not use wait argument when testing older nodes, e.g. in wallet_backwards_compatibility.py
80 if self.version_is_at_least(180000):
Note: The diff above is still calling the stop RPC instead of killing the node when self.rpc_connected
is true. But it might actually be more desirable to change if not force or self.rpc_connected:
there to if not force:
and prefer to kill node processes instead of stopping them after test failures. Would not suggest doing this here, since it would change other behavior beyond the scope of the PR, but I could imagine cases where changing this might reduce noise and help debugging errors in CI.