make test: early core detection, code cleanup

Change-Id: I503d69f902f1c8d3107fb3f2b9f31b0083ebb747
Signed-off-by: Klement Sekera <ksekera@cisco.com>
diff --git a/test/framework.py b/test/framework.py
index 78aebf6..423feaf 100644
--- a/test/framework.py
+++ b/test/framework.py
@@ -8,7 +8,6 @@
 import unittest
 import tempfile
 import time
-import resource
 import faulthandler
 import random
 import copy
@@ -23,9 +22,9 @@
 from vpp_sub_interface import VppSubInterface
 from vpp_lo_interface import VppLoInterface
 from vpp_papi_provider import VppPapiProvider
-from log import *
+from log import RED, GREEN, YELLOW, double_line_delim, single_line_delim, \
+    getLogger, colorize
 from vpp_object import VppObjectRegistry
-from vpp_punt_socket import vpp_uds_socket_name
 if os.name == 'posix' and sys.version_info[0] < 3:
     # using subprocess32 is recommended by python official documentation
     # @ https://docs.python.org/2/library/subprocess.html
@@ -263,8 +262,7 @@
                            coredump_size, "}", "api-trace", "{", "on", "}",
                            "api-segment", "{", "prefix", cls.shm_prefix, "}",
                            "plugins", "{", "plugin", "dpdk_plugin.so", "{",
-                           "disable", "}", "}",
-                           "punt", "{", "socket", cls.punt_socket_path, "}"]
+                           "disable", "}", "}", ]
         if plugin_path is not None:
             cls.vpp_cmdline.extend(["plugin_path", plugin_path])
         cls.logger.info("vpp_cmdline: %s" % cls.vpp_cmdline)
@@ -337,7 +335,6 @@
         cls.file_handler.setLevel(DEBUG)
         cls.logger.addHandler(cls.file_handler)
         cls.shm_prefix = cls.tempdir.split("/")[-1]
-        cls.punt_socket_path = '%s/%s' % (cls.tempdir, vpp_uds_socket_name)
         os.chdir(cls.tempdir)
         cls.logger.info("Temporary dir is %s, shm prefix is %s",
                         cls.tempdir, cls.shm_prefix)
@@ -391,7 +388,7 @@
                 cls.quit()
             except:
                 pass
-            raise t, v, tb
+            raise (t, v, tb)
 
     @classmethod
     def quit(cls):
@@ -1086,7 +1083,7 @@
         for t in tests:
             if isinstance(t, unittest.suite.TestSuite):
                 # this is a bunch of tests, recursively filter...
-                x = filter_tests(t, filter_cb)
+                x = VppTestRunner.filter_tests(t, filter_cb)
                 if x.countTestCases() > 0:
                     result.addTest(x)
             elif isinstance(t, unittest.TestCase):
diff --git a/test/hook.py b/test/hook.py
index 0ef1377..44b432e 100644
--- a/test/hook.py
+++ b/test/hook.py
@@ -62,6 +62,8 @@
 
     def on_crash(self, core_path):
         if self.testcase.debug_core:
+            # notify parent process that we're handling a core file
+            open('%s/_core_handled' % self.testcase.tempdir, 'a').close()
             spawn_gdb(self.testcase.vpp_bin, core_path, self.logger)
         else:
             self.logger.critical("Core file present, debug with: gdb %s %s" %
diff --git a/test/run_tests.py b/test/run_tests.py
index a3d5336..419b8c6 100644
--- a/test/run_tests.py
+++ b/test/run_tests.py
@@ -13,6 +13,12 @@
 from log import global_logger
 from discover_tests import discover_tests
 
+# timeout which controls how long the child has to finish after seeing
+# a core dump in test temporary directory. If this is exceeded, parent assumes
+# that child process is stuck (e.g. waiting for shm mutex, which will never
+# get unlocked) and kill the child
+core_timeout = 3
+
 
 def test_runner_wrapper(suite, keep_alive_pipe, result_pipe, failed_pipe):
     result = not VppTestRunner(
@@ -62,6 +68,8 @@
     result = None
     failed = set()
     last_heard = time.time()
+    core_detected_at = None
+    debug_core = os.getenv("DEBUG", "").lower() == "core"
     while True:
         readable = select.select([keep_alive_parent_end.fileno(),
                                   result_parent_end.fileno(),
@@ -82,7 +90,8 @@
                 failed.add(failed_test.__name__)
             last_heard = time.time()
         fail = False
-        if last_heard + test_timeout < time.time():
+        if last_heard + test_timeout < time.time() and \
+                not os.path.isfile("%s/_core_handled" % last_test_temp_dir):
             fail = True
             global_logger.critical("Timeout while waiting for child test "
                                    "runner process (last test running was "
@@ -93,6 +102,19 @@
             global_logger.critical("Child process unexpectedly died (last "
                                    "test running was `%s' in `%s')!" %
                                    (last_test, last_test_temp_dir))
+        elif last_test_temp_dir and last_test_vpp_binary:
+            core_path = "%s/core" % last_test_temp_dir
+            if os.path.isfile(core_path):
+                if core_detected_at is None:
+                    core_detected_at = time.time()
+                elif core_detected_at + core_timeout < time.time():
+                    if not os.path.isfile(
+                            "%s/_core_handled" % last_test_temp_dir):
+                        global_logger.critical(
+                            "Child unresponsive and core-file exists in test "
+                            "temporary directory!")
+                        fail = True
+
         if fail:
             failed_dir = os.getenv('VPP_TEST_FAILED_DIR')
             lttd = last_test_temp_dir.split("/")[-1]
@@ -113,7 +135,7 @@
                 if os.path.isfile(core_path):
                     global_logger.error("Core-file exists in test temporary "
                                         "directory: %s!" % core_path)
-                    if d and d.lower() == "core":
+                    if debug_core:
                         spawn_gdb(last_test_vpp_binary, core_path,
                                   global_logger)
             child.terminate()