hs-test: improved suite teardown and replaced PIDs with PPIDs

- Fixed an issue where containers wouldn't stop and get removed when
  a test run is interrupted
- Replaced PIDs with Ginkgo process indexes + PPIDs
- Fixed CPU allocation for envoy and nginx-ldp containers
- All container logs should now get saved properly

Type: test

Change-Id: I4c737c1d326390494c0dda1ec6d3fc1f04f51663
Signed-off-by: Adrian Villin <avillin@cisco.com>
diff --git a/extras/hs-test/container.go b/extras/hs-test/container.go
index 080eb73..97b71b5 100644
--- a/extras/hs-test/container.go
+++ b/extras/hs-test/container.go
@@ -78,7 +78,7 @@
 	}
 
 	if _, ok := yamlInput["volumes"]; ok {
-		workingVolumeDir := logDir + CurrentSpecReport().LeafNodeText + volumeDir
+		workingVolumeDir := logDir + suite.getCurrentTestName() + volumeDir
 		workDirReplacer := strings.NewReplacer("$HST_DIR", workDir)
 		volDirReplacer := strings.NewReplacer("$HST_VOLUME_DIR", workingVolumeDir)
 		for _, volu := range yamlInput["volumes"].([]interface{}) {
@@ -132,7 +132,9 @@
 }
 
 func (c *Container) getContainerArguments() string {
-	args := "--ulimit nofile=90000:90000 --cap-add=all --privileged --network host --rm"
+	args := "--ulimit nofile=90000:90000 --cap-add=all --privileged --network host"
+	c.allocateCpus()
+	args += fmt.Sprintf(" --cpuset-cpus=\"%d-%d\"", c.allocatedCpus[0], c.allocatedCpus[len(c.allocatedCpus)-1])
 	args += c.getVolumesAsCliOption()
 	args += c.getEnvVarsAsCliOption()
 	if *vppSourceFileDir != "" {
@@ -180,11 +182,9 @@
 
 	cmd := "docker run "
 	if c.runDetached {
-		cmd += " -dt"
+		cmd += " -d"
 	}
 
-	c.allocateCpus()
-	cmd += fmt.Sprintf(" --cpuset-cpus=\"%d-%d\"", c.allocatedCpus[0], c.allocatedCpus[len(c.allocatedCpus)-1])
 	cmd += " " + c.getContainerArguments()
 
 	c.suite.log(cmd)
@@ -260,7 +260,7 @@
 }
 
 func (c *Container) createFile(destFileName string, content string) error {
-	f, err := os.CreateTemp("/tmp", "hst-config"+c.suite.pid)
+	f, err := os.CreateTemp("/tmp", "hst-config"+c.suite.ppid)
 	if err != nil {
 		return err
 	}
@@ -302,7 +302,7 @@
 
 func (c *Container) getLogDirPath() string {
 	testId := c.suite.getTestId()
-	testName := CurrentSpecReport().LeafNodeText
+	testName := c.suite.getCurrentTestName()
 	logDirPath := logDir + testName + "/" + testId + "/"
 
 	cmd := exec.Command("mkdir", "-p", logDirPath)
@@ -314,17 +314,13 @@
 }
 
 func (c *Container) saveLogs() {
-	cmd := exec.Command("docker", "inspect", "--format='{{.State.Status}}'", c.name)
-	if output, _ := cmd.CombinedOutput(); !strings.Contains(string(output), "running") {
-		return
-	}
-
 	testLogFilePath := c.getLogDirPath() + "container-" + c.name + ".log"
 
-	cmd = exec.Command("docker", "logs", "--details", "-t", c.name)
+	cmd := exec.Command("docker", "logs", "--details", "-t", c.name)
+	c.suite.log(cmd)
 	output, err := cmd.CombinedOutput()
 	if err != nil {
-		Fail("fetching logs error: " + fmt.Sprint(err))
+		c.suite.log(err)
 	}
 
 	f, err := os.Create(testLogFilePath)
@@ -356,6 +352,7 @@
 	}
 	c.vppInstance = nil
 	c.saveLogs()
+	c.suite.log("docker stop " + c.name + " -t 0")
 	return exechelper.Run("docker stop " + c.name + " -t 0")
 }
 
diff --git a/extras/hs-test/hst_suite.go b/extras/hs-test/hst_suite.go
index 62d1a80..e68c9c7 100644
--- a/extras/hs-test/hst_suite.go
+++ b/extras/hs-test/hst_suite.go
@@ -41,7 +41,8 @@
 	cpuAllocator      *CpuAllocatorT
 	cpuContexts       []*CpuContext
 	cpuPerVpp         int
-	pid               string
+	ppid              string
+	processIndex      string
 	logger            *log.Logger
 	logFile           *os.File
 }
@@ -54,7 +55,10 @@
 		Fail(message, callerSkip...)
 	})
 	var err error
-	s.pid = fmt.Sprint(os.Getpid())
+	s.ppid = fmt.Sprint(os.Getppid())
+	// remove last number so we have space to prepend a process index (interfaces have a char limit)
+	s.ppid = s.ppid[:len(s.ppid)-1]
+	s.processIndex = fmt.Sprint(GinkgoParallelProcess())
 	s.cpuAllocator, err = CpuAllocator()
 	if err != nil {
 		Fail("failed to init cpu allocator: " + fmt.Sprint(err))
@@ -197,7 +201,7 @@
 }
 
 func (s *HstSuite) createLogger() {
-	suiteName := CurrentSpecReport().ContainerHierarchyTexts[0]
+	suiteName := s.getCurrentSuiteName()
 	var err error
 	s.logFile, err = os.Create("summary/" + suiteName + ".log")
 	if err != nil {
@@ -243,8 +247,9 @@
 }
 
 func (s *HstSuite) resetContainers() {
-	for _, container := range s.containers {
+	for _, container := range s.startedContainers {
 		container.stop()
+		exechelper.Run("docker rm " + container.name)
 	}
 }
 
@@ -257,15 +262,15 @@
 }
 
 func (s *HstSuite) getNetNamespaceByName(name string) string {
-	return name + s.pid
+	return s.processIndex + name + s.ppid
 }
 
 func (s *HstSuite) getInterfaceByName(name string) *NetInterface {
-	return s.netInterfaces[name+s.pid]
+	return s.netInterfaces[s.processIndex+name+s.ppid]
 }
 
 func (s *HstSuite) getContainerByName(name string) *Container {
-	return s.containers[name+s.pid]
+	return s.containers[s.processIndex+name+s.ppid]
 }
 
 /*
@@ -273,7 +278,7 @@
  * are not able to modify the original container and affect other tests by doing that
  */
 func (s *HstSuite) getTransientContainerByName(name string) *Container {
-	containerCopy := *s.containers[name+s.pid]
+	containerCopy := *s.containers[s.processIndex+name+s.ppid]
 	return &containerCopy
 }
 
@@ -291,7 +296,7 @@
 	for _, elem := range yamlTopo.Volumes {
 		volumeMap := elem["volume"].(VolumeConfig)
 		hostDir := volumeMap["host-dir"].(string)
-		workingVolumeDir := logDir + CurrentSpecReport().LeafNodeText + volumeDir
+		workingVolumeDir := logDir + s.getCurrentTestName() + volumeDir
 		volDirReplacer := strings.NewReplacer("$HST_VOLUME_DIR", workingVolumeDir)
 		hostDir = volDirReplacer.Replace(hostDir)
 		s.volumes = append(s.volumes, hostDir)
@@ -301,7 +306,7 @@
 	for _, elem := range yamlTopo.Containers {
 		newContainer, err := newContainer(s, elem)
 		newContainer.suite = s
-		newContainer.name += newContainer.suite.pid
+		newContainer.name = newContainer.suite.processIndex + newContainer.name + newContainer.suite.ppid
 		if err != nil {
 			Fail("container config error: " + fmt.Sprint(err))
 		}
@@ -325,26 +330,26 @@
 
 	for _, elem := range yamlTopo.Devices {
 		if _, ok := elem["name"]; ok {
-			elem["name"] = elem["name"].(string) + s.pid
+			elem["name"] = s.processIndex + elem["name"].(string) + s.ppid
 		}
 
 		if peer, ok := elem["peer"].(NetDevConfig); ok {
 			if peer["name"].(string) != "" {
-				peer["name"] = peer["name"].(string) + s.pid
+				peer["name"] = s.processIndex + peer["name"].(string) + s.ppid
 			}
 			if _, ok := peer["netns"]; ok {
-				peer["netns"] = peer["netns"].(string) + s.pid
+				peer["netns"] = s.processIndex + peer["netns"].(string) + s.ppid
 			}
 		}
 
 		if _, ok := elem["netns"]; ok {
-			elem["netns"] = elem["netns"].(string) + s.pid
+			elem["netns"] = s.processIndex + elem["netns"].(string) + s.ppid
 		}
 
 		if _, ok := elem["interfaces"]; ok {
 			interfaceCount := len(elem["interfaces"].([]interface{}))
 			for i := 0; i < interfaceCount; i++ {
-				elem["interfaces"].([]interface{})[i] = elem["interfaces"].([]interface{})[i].(string) + s.pid
+				elem["interfaces"].([]interface{})[i] = s.processIndex + elem["interfaces"].([]interface{})[i].(string) + s.ppid
 			}
 		}
 
@@ -402,7 +407,7 @@
 }
 
 func (s *HstSuite) getTestId() string {
-	testName := CurrentSpecReport().LeafNodeText
+	testName := s.getCurrentTestName()
 
 	if s.testIds == nil {
 		s.testIds = map[string]string{}
@@ -415,17 +420,25 @@
 	return s.testIds[testName]
 }
 
-// Returns last 4 digits of PID
-func (s *HstSuite) getPortFromPid() string {
-	port := s.pid
-	for len(port) < 4 {
+func (s *HstSuite) getCurrentTestName() string {
+	return strings.Split(CurrentSpecReport().LeafNodeText, "/")[1]
+}
+
+func (s *HstSuite) getCurrentSuiteName() string {
+	return CurrentSpecReport().ContainerHierarchyTexts[0]
+}
+
+// Returns last 3 digits of PID + Ginkgo process index as the 4th digit
+func (s *HstSuite) getPortFromPpid() string {
+	port := s.ppid
+	for len(port) < 3 {
 		port += "0"
 	}
-	return port[len(port)-4:]
+	return port[len(port)-3:] + s.processIndex
 }
 
 func (s *HstSuite) startServerApp(running chan error, done chan struct{}, env []string) {
-	cmd := exec.Command("iperf3", "-4", "-s", "-p", s.getPortFromPid())
+	cmd := exec.Command("iperf3", "-4", "-s", "-p", s.getPortFromPpid())
 	if env != nil {
 		cmd.Env = env
 	}
@@ -449,7 +462,7 @@
 	nTries := 0
 
 	for {
-		cmd := exec.Command("iperf3", "-c", ipAddress, "-u", "-l", "1460", "-b", "10g", "-p", s.getPortFromPid())
+		cmd := exec.Command("iperf3", "-c", ipAddress, "-u", "-l", "1460", "-b", "10g", "-p", s.getPortFromPpid())
 		if env != nil {
 			cmd.Env = env
 		}
@@ -471,7 +484,7 @@
 }
 
 func (s *HstSuite) startHttpServer(running chan struct{}, done chan struct{}, addressPort, netNs string) {
-	cmd := newCommand([]string{"./http_server", addressPort, s.pid}, netNs)
+	cmd := newCommand([]string{"./http_server", addressPort, s.ppid, s.processIndex}, netNs)
 	err := cmd.Start()
 	s.log(cmd)
 	if err != nil {
diff --git a/extras/hs-test/http_test.go b/extras/hs-test/http_test.go
index 236ed7d..baf3e5e 100644
--- a/extras/hs-test/http_test.go
+++ b/extras/hs-test/http_test.go
@@ -255,7 +255,6 @@
 
 	if ab_or_wrk == "ab" {
 		abCont := s.getContainerByName("ab")
-		abCont.runDetached = true
 		args := fmt.Sprintf("-n %d -c %d", nRequests, nClients)
 		if mode == "rps" {
 			args += " -k"
@@ -273,7 +272,6 @@
 		s.assertNil(err, "err: '%s', output: '%s'", err, o)
 	} else {
 		wrkCont := s.getContainerByName("wrk")
-		wrkCont.runDetached = true
 		args := fmt.Sprintf("-c %d -t 2 -d 30 http://%s:80/64B.json", nClients,
 			serverAddress)
 		wrkCont.extraRunningArgs = args
diff --git a/extras/hs-test/proxy_test.go b/extras/hs-test/proxy_test.go
index ac5f94c..c57b927 100644
--- a/extras/hs-test/proxy_test.go
+++ b/extras/hs-test/proxy_test.go
@@ -13,9 +13,9 @@
 }
 
 func testProxyHttpTcp(s *NsSuite, proto string) error {
-	var outputFile string = "test" + s.pid + ".data"
-	var srcFilePid string = "httpTestFile" + s.pid
-	const srcFileNoPid = "httpTestFile"
+	var outputFile string = s.processIndex + "test" + s.ppid + ".data"
+	var srcFilePpid string = s.processIndex + "httpTestFile" + s.ppid
+	const srcFileNoPpid = "httpTestFile"
 	const fileSize string = "10M"
 	stopServer := make(chan struct{}, 1)
 	serverRunning := make(chan struct{}, 1)
@@ -23,9 +23,9 @@
 	clientNetns := s.getNetNamespaceByName("cln")
 
 	// create test file
-	err := exechelper.Run(fmt.Sprintf("ip netns exec %s truncate -s %s %s", serverNetns, fileSize, srcFilePid))
+	err := exechelper.Run(fmt.Sprintf("ip netns exec %s truncate -s %s %s", serverNetns, fileSize, srcFilePpid))
 	s.assertNil(err, "failed to run truncate command: "+fmt.Sprint(err))
-	defer func() { os.Remove(srcFilePid) }()
+	defer func() { os.Remove(srcFilePpid) }()
 
 	s.log("test file created...")
 
@@ -48,7 +48,7 @@
 	if proto == "tls" {
 		c += " --secure-protocol=TLSv1_3 --no-check-certificate https://"
 	}
-	c += fmt.Sprintf("%s:555/%s", clientVeth.ip4AddressString(), srcFileNoPid)
+	c += fmt.Sprintf("%s:555/%s", clientVeth.ip4AddressString(), srcFileNoPpid)
 	s.log(c)
 	_, err = exechelper.CombinedOutput(c)
 
@@ -57,7 +57,7 @@
 	s.assertNil(err, "failed to run wget: '%s', cmd: %s", err, c)
 	stopServer <- struct{}{}
 
-	s.assertNil(assertFileSize(outputFile, srcFilePid))
+	s.assertNil(assertFileSize(outputFile, srcFilePpid))
 	return nil
 }
 
diff --git a/extras/hs-test/tools/http_server/http_server.go b/extras/hs-test/tools/http_server/http_server.go
index d2ab385..a65aa75 100644
--- a/extras/hs-test/tools/http_server/http_server.go
+++ b/extras/hs-test/tools/http_server/http_server.go
@@ -8,13 +8,13 @@
 )
 
 func main() {
-	if len(os.Args) < 3 {
+	if len(os.Args) < 4 {
 		fmt.Println("arg expected")
 		os.Exit(1)
 	}
 
 	http.HandleFunc("/httpTestFile", func(w http.ResponseWriter, r *http.Request) {
-		file, _ := os.Open("httpTestFile" + os.Args[2])
+		file, _ := os.Open(os.Args[3] + "httpTestFile" + os.Args[2])
 		defer file.Close()
 		io.Copy(w, file)
 	})