Adrian Villin | 4677d92 | 2024-06-14 09:32:39 +0200 | [diff] [blame] | 1 | package hst |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 2 | |
| 3 | import ( |
| 4 | "bufio" |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 5 | "errors" |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 6 | "fmt" |
Adrian Villin | 0df582e | 2024-05-22 09:26:47 -0400 | [diff] [blame] | 7 | . "github.com/onsi/ginkgo/v2" |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 8 | "os" |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 9 | "os/exec" |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 10 | "strconv" |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 11 | "strings" |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 12 | ) |
| 13 | |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 14 | var CgroupPath = "/sys/fs/cgroup/" |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 15 | |
| 16 | type CpuContext struct { |
| 17 | cpuAllocator *CpuAllocatorT |
| 18 | cpus []int |
| 19 | } |
| 20 | |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 21 | type CpuAllocatorT struct { |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 22 | cpus []int |
| 23 | runningInCi bool |
| 24 | buildNumber int |
| 25 | maxContainerCount int |
| 26 | } |
| 27 | |
| 28 | func iterateAndAppend(start int, end int, slice []int) []int { |
| 29 | for i := start; i <= end; i++ { |
| 30 | slice = append(slice, i) |
| 31 | } |
| 32 | return slice |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 33 | } |
| 34 | |
| 35 | var cpuAllocator *CpuAllocatorT = nil |
| 36 | |
Adrian Villin | b9464cd | 2024-05-27 09:52:59 -0400 | [diff] [blame] | 37 | func (c *CpuAllocatorT) Allocate(containerCount int, nCpus int) (*CpuContext, error) { |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 38 | var cpuCtx CpuContext |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 39 | // indexes, not actual cores |
| 40 | var minCpu, maxCpu int |
Adrian Villin | b9464cd | 2024-05-27 09:52:59 -0400 | [diff] [blame] | 41 | |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 42 | if c.runningInCi { |
| 43 | minCpu = ((c.buildNumber) * c.maxContainerCount * nCpus) |
| 44 | maxCpu = ((c.buildNumber + 1) * c.maxContainerCount * nCpus) - 1 |
| 45 | } else { |
| 46 | minCpu = ((GinkgoParallelProcess() - 1) * c.maxContainerCount * nCpus) |
| 47 | maxCpu = (GinkgoParallelProcess() * c.maxContainerCount * nCpus) - 1 |
| 48 | } |
Adrian Villin | b9464cd | 2024-05-27 09:52:59 -0400 | [diff] [blame] | 49 | |
| 50 | if len(c.cpus)-1 < maxCpu { |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 51 | err := fmt.Errorf("could not allocate %d CPUs; available count: %d; attempted to allocate cores with index %d-%d; max index: %d;\n"+ |
| 52 | "available cores: %v", nCpus*containerCount, len(c.cpus), minCpu, maxCpu, len(c.cpus)-1, c.cpus) |
Adrian Villin | 0df582e | 2024-05-22 09:26:47 -0400 | [diff] [blame] | 53 | return nil, err |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 54 | } |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 55 | |
Adrian Villin | b9464cd | 2024-05-27 09:52:59 -0400 | [diff] [blame] | 56 | if containerCount == 1 { |
| 57 | cpuCtx.cpus = c.cpus[minCpu : minCpu+nCpus] |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 58 | } else if containerCount > 1 && containerCount <= c.maxContainerCount { |
Adrian Villin | b9464cd | 2024-05-27 09:52:59 -0400 | [diff] [blame] | 59 | cpuCtx.cpus = c.cpus[minCpu+(nCpus*(containerCount-1)) : minCpu+(nCpus*containerCount)] |
Adrian Villin | 0df582e | 2024-05-22 09:26:47 -0400 | [diff] [blame] | 60 | } else { |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 61 | return nil, fmt.Errorf("too many containers; CPU allocation for >%d containers is not implemented", c.maxContainerCount) |
Adrian Villin | 0df582e | 2024-05-22 09:26:47 -0400 | [diff] [blame] | 62 | } |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 63 | cpuCtx.cpuAllocator = c |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 64 | return &cpuCtx, nil |
| 65 | } |
| 66 | |
Adrian Villin | cee15aa | 2024-03-14 11:42:55 -0400 | [diff] [blame] | 67 | func (c *CpuAllocatorT) readCpus() error { |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 68 | var first, second, third, fourth int |
| 69 | var file *os.File |
| 70 | var err error |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 71 | |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 72 | if c.runningInCi { |
| 73 | // non-debug build runs on node0, debug on node1 |
| 74 | if *IsDebugBuild { |
| 75 | file, err = os.Open("/sys/devices/system/node/node1/cpulist") |
| 76 | } else { |
| 77 | file, err = os.Open("/sys/devices/system/node/node0/cpulist") |
| 78 | } |
| 79 | if err != nil { |
| 80 | return err |
| 81 | } |
| 82 | defer file.Close() |
| 83 | |
| 84 | sc := bufio.NewScanner(file) |
| 85 | sc.Scan() |
| 86 | line := sc.Text() |
| 87 | _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &first, &second, &third, &fourth) |
| 88 | if err != nil { |
| 89 | return err |
| 90 | } |
| 91 | |
| 92 | c.cpus = iterateAndAppend(first, second, c.cpus) |
| 93 | c.cpus = iterateAndAppend(third, fourth, c.cpus) |
| 94 | } else if NumaAwareCpuAlloc { |
| 95 | var fifth, sixth int |
| 96 | var tmpCpus []int |
| 97 | |
| 98 | file, err := os.Open("/sys/devices/system/node/online") |
| 99 | if err != nil { |
| 100 | return err |
| 101 | } |
| 102 | defer file.Close() |
| 103 | |
| 104 | sc := bufio.NewScanner(file) |
| 105 | sc.Scan() |
| 106 | line := sc.Text() |
| 107 | // get numa node range |
| 108 | _, err = fmt.Sscanf(line, "%d-%d", &first, &second) |
| 109 | if err != nil { |
| 110 | return err |
| 111 | } |
| 112 | |
| 113 | for i := first; i <= second; i++ { |
| 114 | file, err := os.Open("/sys/devices/system/node/node" + fmt.Sprint(i) + "/cpulist") |
| 115 | if err != nil { |
| 116 | return err |
| 117 | } |
| 118 | defer file.Close() |
| 119 | |
| 120 | // get numa node cores |
| 121 | sc := bufio.NewScanner(file) |
| 122 | sc.Scan() |
| 123 | line := sc.Text() |
| 124 | _, err = fmt.Sscanf(line, "%d-%d,%d-%d", &third, &fourth, &fifth, &sixth) |
| 125 | if err != nil { |
| 126 | return err |
| 127 | } |
| 128 | |
| 129 | // get numa node cores from first range |
| 130 | tmpCpus = iterateAndAppend(third, fourth, tmpCpus) |
| 131 | |
| 132 | // discard cpu 0 |
| 133 | if tmpCpus[0] == 0 && !*UseCpu0{ |
| 134 | tmpCpus = tmpCpus[1:] |
| 135 | } |
| 136 | |
| 137 | // get numa node cores from second range |
| 138 | tmpCpus = iterateAndAppend(fifth, sixth, tmpCpus) |
| 139 | |
| 140 | // make c.cpus divisible by maxContainerCount * nCpus, so we don't have to check which numa will be used |
| 141 | // and we can use offsets |
| 142 | count_to_remove := len(tmpCpus) % (c.maxContainerCount * *NConfiguredCpus) |
| 143 | c.cpus = append(c.cpus, tmpCpus[:len(tmpCpus)-count_to_remove]...) |
| 144 | tmpCpus = tmpCpus[:0] |
| 145 | } |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 146 | } else { |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 147 | // Path depends on cgroup version. We need to check which version is in use. |
| 148 | // For that following command can be used: 'stat -fc %T /sys/fs/cgroup/' |
| 149 | // In case the output states 'cgroup2fs' then cgroups v2 is used, 'tmpfs' in case cgroups v1. |
| 150 | cmd := exec.Command("stat", "-fc", "%T", "/sys/fs/cgroup/") |
| 151 | byteOutput, err := cmd.CombinedOutput() |
| 152 | if err != nil { |
| 153 | return err |
| 154 | } |
| 155 | |
| 156 | CpuPath := CgroupPath |
| 157 | if strings.Contains(string(byteOutput), "tmpfs") { |
| 158 | CpuPath += "cpuset/cpuset.effective_cpus" |
| 159 | } else if strings.Contains(string(byteOutput), "cgroup2fs") { |
| 160 | CpuPath += "cpuset.cpus.effective" |
| 161 | } else { |
| 162 | return errors.New("cgroup unknown fs: " + string(byteOutput)) |
| 163 | } |
| 164 | |
| 165 | file, err := os.Open(CpuPath) |
| 166 | if err != nil { |
| 167 | return err |
| 168 | } |
| 169 | defer file.Close() |
| 170 | |
| 171 | sc := bufio.NewScanner(file) |
| 172 | sc.Scan() |
| 173 | line := sc.Text() |
| 174 | _, err = fmt.Sscanf(line, "%d-%d", &first, &second) |
| 175 | if err != nil { |
| 176 | return err |
| 177 | } |
| 178 | c.cpus = iterateAndAppend(first, second, c.cpus) |
Matus Fabian | 18c9f14 | 2024-04-29 11:06:44 +0200 | [diff] [blame] | 179 | } |
| 180 | |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 181 | // discard cpu 0 |
| 182 | if c.cpus[0] == 0 && !*UseCpu0 { |
| 183 | c.cpus = c.cpus[1:] |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 184 | } |
| 185 | return nil |
| 186 | } |
| 187 | |
| 188 | func CpuAllocator() (*CpuAllocatorT, error) { |
| 189 | if cpuAllocator == nil { |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 190 | var err error |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 191 | cpuAllocator = new(CpuAllocatorT) |
Adrian Villin | 5d171eb | 2024-06-17 08:51:27 +0200 | [diff] [blame] | 192 | cpuAllocator.maxContainerCount = 4 |
| 193 | buildNumberStr := os.Getenv("BUILD_NUMBER") |
| 194 | |
| 195 | if buildNumberStr != "" { |
| 196 | cpuAllocator.runningInCi = true |
| 197 | // get last digit of build number |
| 198 | cpuAllocator.buildNumber, err = strconv.Atoi(buildNumberStr[len(buildNumberStr)-1:]) |
| 199 | if err != nil { |
| 200 | return nil, err |
| 201 | } |
| 202 | } |
| 203 | err = cpuAllocator.readCpus() |
Filip Tehlar | 608d006 | 2023-04-28 10:29:47 +0200 | [diff] [blame] | 204 | if err != nil { |
| 205 | return nil, err |
| 206 | } |
| 207 | } |
| 208 | return cpuAllocator, nil |
| 209 | } |