Fix hanging test runner when child process dies

When fixing the test summary after a test run doesn't finish properly I
introduced a bug where child process which died would leave the whole
run hanging. This patch fixed the bug while still having the correct
test summary.

Change-Id: I206b1a7dab4032d24cbc50667b8dd0bdcebb67a6
Signed-off-by: juraj.linkes <juraj.linkes@pantheon.tech>
diff --git a/test/run_tests.py b/test/run_tests.py
index d3a885f..76b6a62 100644
--- a/test/run_tests.py
+++ b/test/run_tests.py
@@ -52,14 +52,14 @@
 
 
 def test_runner_wrapper(suite, keep_alive_pipe, result_pipe, stdouterr_queue,
-                        logger):
+                        partial_result_queue, logger):
     sys.stdout = stdouterr_queue
     sys.stderr = stdouterr_queue
     VppTestCase.logger = logger
-    unittest.installHandler()
     result = VppTestRunner(keep_alive_pipe=keep_alive_pipe,
                            descriptions=descriptions,
                            verbosity=verbose,
+                           results_pipe=partial_result_queue,
                            failfast=failfast).run(suite)
     result_pipe.send(result)
     result_pipe.close()
@@ -71,13 +71,15 @@
         self.keep_alive_parent_end, self.keep_alive_child_end = Pipe(
             duplex=False)
         self.result_parent_end, self.result_child_end = Pipe(duplex=False)
+        self.partial_result_parent_end, self.partial_result_child_end = Pipe(
+            duplex=False)
         self.testcase_suite = testcase_suite
         self.stdouterr_queue = manager.Queue()
         self.logger = get_parallel_logger(self.stdouterr_queue)
         self.child = Process(target=test_runner_wrapper,
                              args=(testcase_suite, self.keep_alive_child_end,
                                    self.result_child_end, self.stdouterr_queue,
-                                   self.logger)
+                                   self.partial_result_child_end, self.logger)
                              )
         self.child.start()
         self.pid = self.child.pid
@@ -88,14 +90,15 @@
         self.last_heard = time.time()
         self.core_detected_at = None
         self.failed_tests = []
-        self.fail = False
-        self.fail_addressed = False
+        self.partial_result = None
 
     def close_pipes(self):
         self.keep_alive_child_end.close()
         self.result_child_end.close()
+        self.partial_result_child_end.close()
         self.keep_alive_parent_end.close()
         self.result_parent_end.close()
+        self.partial_result_parent_end.close()
 
 
 def stdouterr_reader_wrapper(unread_testcases, finished_unread_testcases,
@@ -151,7 +154,8 @@
         for wrapped_testcase_suite in wrapped_testcase_suites:
             readable = select.select(
                 [wrapped_testcase_suite.keep_alive_parent_end.fileno(),
-                 wrapped_testcase_suite.result_parent_end.fileno()],
+                 wrapped_testcase_suite.result_parent_end.fileno(),
+                 wrapped_testcase_suite.partial_result_parent_end.fileno()],
                 [], [], 1)[0]
             if wrapped_testcase_suite.result_parent_end.fileno() in readable:
                 results.append(
@@ -160,6 +164,13 @@
                 finished_testcase_suites.add(wrapped_testcase_suite)
                 continue
 
+            if wrapped_testcase_suite.partial_result_parent_end.fileno() \
+                    in readable:
+                while wrapped_testcase_suite.partial_result_parent_end.poll():
+                    wrapped_testcase_suite.partial_result = \
+                        wrapped_testcase_suite.partial_result_parent_end.recv()
+                    wrapped_testcase_suite.last_heard = time.time()
+
             if wrapped_testcase_suite.keep_alive_parent_end.fileno() \
                     in readable:
                 while wrapped_testcase_suite.keep_alive_parent_end.poll():
@@ -170,48 +181,44 @@
                         wrapped_testcase_suite.keep_alive_parent_end.recv()
                 wrapped_testcase_suite.last_heard = time.time()
 
-            if not wrapped_testcase_suite.fail:
-                if wrapped_testcase_suite.last_heard + \
-                        test_timeout < time.time() and \
-                        not os.path.isfile(
-                                    "%s/_core_handled" %
-                                    wrapped_testcase_suite.last_test_temp_dir):
-                    wrapped_testcase_suite.fail = True
-                    wrapped_testcase_suite.logger.critical(
-                        "Timeout while waiting for child test "
-                        "runner process (last test running was "
-                        "`%s' in `%s')!" %
-                        (wrapped_testcase_suite.last_test,
-                         wrapped_testcase_suite.last_test_temp_dir))
-                elif not wrapped_testcase_suite.child.is_alive():
-                    wrapped_testcase_suite.fail = True
-                    wrapped_testcase_suite.logger.critical(
-                        "Child python process unexpectedly died "
-                        "(last test running was `%s' in `%s')!" %
-                        (wrapped_testcase_suite.last_test,
-                         wrapped_testcase_suite.last_test_temp_dir))
-                elif wrapped_testcase_suite.last_test_temp_dir and \
-                        wrapped_testcase_suite.last_test_vpp_binary:
-                    core_path = "%s/core" % \
-                                wrapped_testcase_suite.last_test_temp_dir
-                    if os.path.isfile(core_path):
-                        if wrapped_testcase_suite.core_detected_at is None:
-                            wrapped_testcase_suite.core_detected_at = \
-                                time.time()
-                        elif wrapped_testcase_suite.core_detected_at + \
-                                core_timeout < time.time():
-                            if not os.path.isfile(
-                                            "%s/_core_handled" %
-                                            wrapped_testcase_suite.
-                                            last_test_temp_dir):
-                                wrapped_testcase_suite.logger.critical(
-                                    "Child python process unresponsive and "
-                                    "core-file exists in test temporary "
-                                    "directory!")
-                                wrapped_testcase_suite.fail = True
+            fail = False
+            if wrapped_testcase_suite.last_heard + test_timeout < time.time() \
+                    and not os.path.isfile(
+                                "%s/_core_handled" %
+                                wrapped_testcase_suite.last_test_temp_dir):
+                fail = True
+                wrapped_testcase_suite.logger.critical(
+                    "Timeout while waiting for child test "
+                    "runner process (last test running was "
+                    "`%s' in `%s')!" %
+                    (wrapped_testcase_suite.last_test,
+                     wrapped_testcase_suite.last_test_temp_dir))
+            elif not wrapped_testcase_suite.child.is_alive():
+                fail = True
+                wrapped_testcase_suite.logger.critical(
+                    "Child python process unexpectedly died "
+                    "(last test running was `%s' in `%s')!" %
+                    (wrapped_testcase_suite.last_test,
+                     wrapped_testcase_suite.last_test_temp_dir))
+            elif wrapped_testcase_suite.last_test_temp_dir and \
+                    wrapped_testcase_suite.last_test_vpp_binary:
+                core_path = "%s/core" % \
+                            wrapped_testcase_suite.last_test_temp_dir
+                if os.path.isfile(core_path):
+                    if wrapped_testcase_suite.core_detected_at is None:
+                        wrapped_testcase_suite.core_detected_at = time.time()
+                    elif wrapped_testcase_suite.core_detected_at + \
+                            core_timeout < time.time():
+                        if not os.path.isfile(
+                                        "%s/_core_handled" %
+                                        wrapped_testcase_suite.
+                                        last_test_temp_dir):
+                            wrapped_testcase_suite.logger.critical(
+                                "Child python process unresponsive and core-"
+                                "file exists in test temporary directory!")
+                            fail = True
 
-            if wrapped_testcase_suite.fail and not \
-                    wrapped_testcase_suite.fail_addressed:
+            if fail:
                 failed_dir = os.getenv('VPP_TEST_FAILED_DIR')
                 lttd = os.path.basename(
                     wrapped_testcase_suite.last_test_temp_dir)
@@ -255,7 +262,7 @@
                             spawn_gdb(
                                 wrapped_testcase_suite.last_test_vpp_binary,
                                 core_path, wrapped_testcase_suite.logger)
-                os.kill(wrapped_testcase_suite.child.pid, signal.SIGINT)
+                wrapped_testcase_suite.child.terminate()
                 try:
                     # terminating the child process tends to leave orphan
                     # VPP process around
@@ -263,7 +270,9 @@
                 except OSError:
                     # already dead
                     pass
-                wrapped_testcase_suite.fail_addressed = True
+                results.append((wrapped_testcase_suite.testcase_suite,
+                                wrapped_testcase_suite.partial_result))
+                finished_testcase_suites.add(wrapped_testcase_suite)
 
         for finished_testcase in finished_testcase_suites:
             finished_testcase.child.join()