hs-test: memory leak testing

add infra for memory leak testing

Type: test

Change-Id: I882e8dbb360597cdb82ad52682725f7d39b2df24
Signed-off-by: Matus Fabian <matfabia@cisco.com>
diff --git a/extras/hs-test/Makefile b/extras/hs-test/Makefile
index 033d16b..83a42c8 100644
--- a/extras/hs-test/Makefile
+++ b/extras/hs-test/Makefile
@@ -62,6 +62,7 @@
 	@echo "Make targets:"
 	@echo " test                     - run tests"
 	@echo " test-debug               - run tests (vpp debug image)"
+	@echo " test-leak                - run memory leak tests (vpp debug image)"
 	@echo " build                    - build test infra"
 	@echo " build-cov                - coverage build of VPP and Docker images"
 	@echo " build-debug              - build test infra (vpp debug image)"
@@ -143,6 +144,10 @@
 	@$(MAKE) -C ../.. test-cov-post HS_TEST=1
 	@bash ./script/compress.sh
 
+.PHONY: test-leak
+test-leak: .deps.ok .build_debug.ok
+	@bash ./hs_test.sh --test=$(TEST) --debug_build=true --leak_check=true --vppsrc=$(VPPSRC)
+
 .PHONY: build-go
 build-go:
 	go build ./tools/http_server
diff --git a/extras/hs-test/README.rst b/extras/hs-test/README.rst
index 7841211..8a49ac6 100644
--- a/extras/hs-test/README.rst
+++ b/extras/hs-test/README.rst
@@ -307,8 +307,52 @@
 
 If a test consists of more VPP instances then this is done for each of them.
 
+**Memory leak testing**
 
-**Eternal dependencies**
+It is possible to use VPP memory traces to diagnose if and where memory leaks happen by comparing of two traces at different point in time.
+You can do it by test like following:
+
+::
+
+    func MemLeakTest(s *NoTopoSuite) {
+    	s.SkipUnlessLeakCheck()  // test is excluded from usual test run
+    	vpp := s.GetContainerByName("vpp").VppInstance
+    	/* do your configuration here */
+    	vpp.Disconnect()  // no goVPP less noise
+    	vpp.EnableMemoryTrace()  // enable memory traces
+    	traces1, err := vpp.GetMemoryTrace()  // get first sample
+    	s.AssertNil(err, fmt.Sprint(err))
+    	vpp.Vppctl("test mem-leak")  // execute some action
+    	traces2, err := vpp.GetMemoryTrace()  // get second sample
+    	s.AssertNil(err, fmt.Sprint(err))
+    	vpp.MemLeakCheck(traces1, traces2)  // compare samples and generate report
+    }
+
+To get your memory leak report run following command:
+
+::
+
+    $ make test-leak TEST=MemLeakTest
+    ...
+    NoTopoSuiteSolo mem_leak_test.go/MemLeakTest [SOLO]
+    /home/matus/vpp/extras/hs-test/infra/suite_no_topo.go:113
+
+      Report Entries >>
+
+      SUMMARY: 112 byte(s) leaked in 1 allocation(s)
+       - /home/matus/vpp/extras/hs-test/infra/vppinstance.go:624 @ 07/19/24 15:53:33.539
+
+        leak of 112 byte(s) in 1 allocation(s) from:
+            #0 clib_mem_heap_alloc_aligned + 0x31
+            #1 _vec_alloc_internal + 0x113
+            #2 _vec_validate + 0x81
+            #3 leak_memory_fn + 0x4f
+            #4 0x7fc167815ac3
+            #5 0x7fc1678a7850
+      << Report Entries
+    ------------------------------
+
+**External dependencies**
 
 * Linux tools ``ip``, ``brctl``
 * Standalone programs ``wget``, ``iperf3`` - since these are downloaded when Docker image is made,
diff --git a/extras/hs-test/hs_test.sh b/extras/hs-test/hs_test.sh
index 803b8f7..acad7eb 100644
--- a/extras/hs-test/hs_test.sh
+++ b/extras/hs-test/hs_test.sh
@@ -7,8 +7,10 @@
 persist_set=0
 unconfigure_set=0
 debug_set=0
+leak_check_set=0
 debug_build=
 ginkgo_args=
+tc_name=
 
 for i in "$@"
 do
@@ -74,6 +76,13 @@
             args="$args -cpu0"
         fi
         ;;
+    --leak_check=*)
+        leak_check="${i#*=}"
+        if [ "$leak_check" = "true" ]; then
+            args="$args -leak_check"
+            leak_check_set=1
+        fi
+        ;;
 esac
 done
 
@@ -97,6 +106,16 @@
     exit 1
 fi
 
+if [ $leak_check_set -eq 1 ]; then
+  if [ $single_test -eq 0 ]; then
+    echo "a single test has to be specified when leak_check is set"
+    exit 1
+  fi
+  ginkgo_args="--focus $tc_name"
+  sudo -E go run github.com/onsi/ginkgo/v2/ginkgo $ginkgo_args -- $args
+  exit 0
+fi
+
 mkdir -p summary
 # shellcheck disable=SC2086
 sudo -E go run github.com/onsi/ginkgo/v2/ginkgo --no-color --trace --json-report=summary/report.json $ginkgo_args -- $args
diff --git a/extras/hs-test/infra/container.go b/extras/hs-test/infra/container.go
index 5093398..44f141a 100644
--- a/extras/hs-test/infra/container.go
+++ b/extras/hs-test/infra/container.go
@@ -4,6 +4,7 @@
 	"bytes"
 	"context"
 	"fmt"
+	"github.com/docker/go-units"
 	"os"
 	"os/exec"
 	"slices"
@@ -15,7 +16,6 @@
 	containerTypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/image"
 	"github.com/docker/docker/pkg/stdcopy"
-	"github.com/docker/go-units"
 	"github.com/edwarnicke/exechelper"
 	. "github.com/onsi/ginkgo/v2"
 )
@@ -382,6 +382,11 @@
 	return nil
 }
 
+func (c *Container) GetFile(sourceFileName, targetFileName string) error {
+	cmd := exec.Command("docker", "cp", c.Name+":"+sourceFileName, targetFileName)
+	return cmd.Run()
+}
+
 /*
  * Executes in detached mode so that the started application can continue to run
  * without blocking execution of test
diff --git a/extras/hs-test/infra/hst_suite.go b/extras/hs-test/infra/hst_suite.go
index 975e01d..2cf241a 100644
--- a/extras/hs-test/infra/hst_suite.go
+++ b/extras/hs-test/infra/hst_suite.go
@@ -35,6 +35,7 @@
 var VppSourceFileDir = flag.String("vppsrc", "", "vpp source file directory")
 var IsDebugBuild = flag.Bool("debug_build", false, "some paths are different with debug build")
 var UseCpu0 = flag.Bool("cpu0", false, "use cpu0")
+var IsLeakCheck = flag.Bool("leak_check", false, "run leak-check tests")
 var NumaAwareCpuAlloc bool
 var SuiteTimeout time.Duration
 
@@ -285,6 +286,11 @@
 	}
 }
 
+func (s *HstSuite) SkipUnlessLeakCheck() {
+	if !*IsLeakCheck {
+		s.Skip("leak-check tests excluded")
+	}
+}
 func (s *HstSuite) ResetContainers() {
 	for _, container := range s.StartedContainers {
 		container.stop()
diff --git a/extras/hs-test/infra/vppinstance.go b/extras/hs-test/infra/vppinstance.go
index d4f5700..dfb236b 100644
--- a/extras/hs-test/infra/vppinstance.go
+++ b/extras/hs-test/infra/vppinstance.go
@@ -2,6 +2,7 @@
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"go.fd.io/govpp/binapi/ethernet_types"
 	"io"
@@ -97,6 +98,13 @@
 	SkipCores          int
 }
 
+type VppMemTrace struct {
+	Count     int      `json:"count"`
+	Size      int      `json:"bytes"`
+	Sample    string   `json:"sample"`
+	Traceback []string `json:"traceback"`
+}
+
 func (vpp *VppInstance) getSuite() *HstSuite {
 	return vpp.Container.Suite
 }
@@ -535,3 +543,83 @@
 
 	return c.Close().ToString()
 }
+
+// EnableMemoryTrace enables memory traces of VPP main-heap
+func (vpp *VppInstance) EnableMemoryTrace() {
+	vpp.getSuite().Log(vpp.Vppctl("memory-trace on main-heap"))
+}
+
+// GetMemoryTrace dumps memory traces for analysis
+func (vpp *VppInstance) GetMemoryTrace() ([]VppMemTrace, error) {
+	var trace []VppMemTrace
+	vpp.getSuite().Log(vpp.Vppctl("save memory-trace trace.json"))
+	err := vpp.Container.GetFile("/tmp/trace.json", "/tmp/trace.json")
+	if err != nil {
+		return nil, err
+	}
+	fileBytes, err := os.ReadFile("/tmp/trace.json")
+	if err != nil {
+		return nil, err
+	}
+	err = json.Unmarshal(fileBytes, &trace)
+	if err != nil {
+		return nil, err
+	}
+	return trace, nil
+}
+
+// memTracesSuppressCli filter out CLI related samples
+func memTracesSuppressCli(traces []VppMemTrace) []VppMemTrace {
+	var filtered []VppMemTrace
+	for i := 0; i < len(traces); i++ {
+		isCli := false
+		for j := 0; j < len(traces[i].Traceback); j++ {
+			if strings.Contains(traces[i].Traceback[j], "unix_cli") {
+				isCli = true
+				break
+			}
+		}
+		if !isCli {
+			filtered = append(filtered, traces[i])
+		}
+	}
+	return filtered
+}
+
+// MemLeakCheck compares memory traces at different point in time, analyzes if memory leaks happen and produces report
+func (vpp *VppInstance) MemLeakCheck(first, second []VppMemTrace) {
+	totalBytes := 0
+	totalCounts := 0
+	trace1 := memTracesSuppressCli(first)
+	trace2 := memTracesSuppressCli(second)
+	report := ""
+	for i := 0; i < len(trace2); i++ {
+		match := false
+		for j := 0; j < len(trace1); j++ {
+			if trace1[j].Sample == trace2[i].Sample {
+				if trace2[i].Size > trace1[j].Size {
+					deltaBytes := trace2[i].Size - trace1[j].Size
+					deltaCounts := trace2[i].Count - trace1[j].Count
+					report += fmt.Sprintf("grow %d byte(s) in %d allocation(s) from:\n", deltaBytes, deltaCounts)
+					for j := 0; j < len(trace2[i].Traceback); j++ {
+						report += fmt.Sprintf("\t#%d %s\n", j, trace2[i].Traceback[j])
+					}
+					totalBytes += deltaBytes
+					totalCounts += deltaCounts
+				}
+				match = true
+				break
+			}
+		}
+		if !match {
+			report += fmt.Sprintf("\nleak of %d byte(s) in %d allocation(s) from:\n", trace2[i].Size, trace2[i].Count)
+			for j := 0; j < len(trace2[i].Traceback); j++ {
+				report += fmt.Sprintf("\t#%d %s\n", j, trace2[i].Traceback[j])
+			}
+			totalBytes += trace2[i].Size
+			totalCounts += trace2[i].Count
+		}
+	}
+	summary := fmt.Sprintf("\nSUMMARY: %d byte(s) leaked in %d allocation(s)\n", totalBytes, totalCounts)
+	AddReportEntry(summary, report)
+}
diff --git a/extras/hs-test/mem_leak_test.go b/extras/hs-test/mem_leak_test.go
new file mode 100644
index 0000000..76966ae
--- /dev/null
+++ b/extras/hs-test/mem_leak_test.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+	. "fd.io/hs-test/infra"
+	"fmt"
+)
+
+func init() {
+	RegisterNoTopoSoloTests(MemLeakTest)
+}
+
+func MemLeakTest(s *NoTopoSuite) {
+	s.SkipUnlessLeakCheck()
+	vpp := s.GetContainerByName("vpp").VppInstance
+	/* no goVPP less noise */
+	vpp.Disconnect()
+	vpp.EnableMemoryTrace()
+	traces1, err := vpp.GetMemoryTrace()
+	s.AssertNil(err, fmt.Sprint(err))
+	vpp.Vppctl("test mem-leak")
+	traces2, err := vpp.GetMemoryTrace()
+	s.AssertNil(err, fmt.Sprint(err))
+	vpp.MemLeakCheck(traces1, traces2)
+}
diff --git a/src/plugins/unittest/util_test.c b/src/plugins/unittest/util_test.c
index 53384e5..5b7e30b 100644
--- a/src/plugins/unittest/util_test.c
+++ b/src/plugins/unittest/util_test.c
@@ -101,6 +101,36 @@
   .function = test_hash_command_fn,
 };
 
+static void *
+leak_memory_fn (void *args)
+{
+  u8 *p = 0;
+  vec_validate (p, 100);
+  p = 0;
+  return 0;
+}
+
+static clib_error_t *
+test_mem_leak_command_fn (vlib_main_t *vm, unformat_input_t *input,
+			  vlib_cli_command_t *cmd)
+{
+  /* do memory leak from thread, so no 'unix_cli' in traceback */
+  pthread_t thread;
+  int rv = pthread_create (&thread, NULL, leak_memory_fn, 0);
+  if (rv)
+    {
+      return clib_error_return (0, "pthread_create failed");
+    }
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (test_mem_leak_command, static) = {
+  .path = "test mem-leak",
+  .short_help = "leak some memory",
+  .function = test_mem_leak_command_fn,
+};
+
 /*
  * fd.io coding-style-patch-verification: ON
  *