blob: 313fe4a87879dc57b7880e459fb3e5c058735fd8 [file] [log] [blame]
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07002 *------------------------------------------------------------------
Dave Barach8a7fb0c2016-07-08 14:44:23 -04003 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
Ed Warnickecb9cada2015-12-08 15:45:58 -07004 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070039#include <vppinfra/heap.h>
40#include <vppinfra/pool.h>
41#include <vppinfra/format.h>
42
43#include "svm.h"
44
45static svm_region_t *root_rp;
46static int root_rp_refcount;
47
48#define MAXLOCK 2
Dave Barach8a7fb0c2016-07-08 14:44:23 -040049static pthread_mutex_t *mutexes_held[MAXLOCK];
Ed Warnickecb9cada2015-12-08 15:45:58 -070050static int nheld;
51
Dave Barach8a7fb0c2016-07-08 14:44:23 -040052svm_region_t *
53svm_get_root_rp (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -070054{
Dave Barach8a7fb0c2016-07-08 14:44:23 -040055 return root_rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -070056}
57
58#define MUTEX_DEBUG
59
Damjan Marionaec8f892018-01-08 16:35:35 +010060u64
61svm_get_global_region_base_va ()
62{
BenoƮt Ganne9fb6d402019-04-15 15:28:21 +020063#ifdef CLIB_SANITIZE_ADDR
64 return 0x200000000000;
65#endif
66
Damjan Marionaec8f892018-01-08 16:35:35 +010067#if __aarch64__
68 /* On AArch64 VA space can have different size, from 36 to 48 bits.
69 Here we are trying to detect VA bits by parsing /proc/self/maps
70 address ranges */
71 int fd;
72 unformat_input_t input;
73 u64 start, end = 0;
74 u8 bits = 0;
75
76 if ((fd = open ("/proc/self/maps", 0)) < 0)
77 clib_unix_error ("open '/proc/self/maps'");
78
79 unformat_init_clib_file (&input, fd);
80 while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
81 {
Gabriel Gannec5239ad2018-01-11 15:04:19 +010082 if (unformat (&input, "%llx-%llx", &start, &end))
83 end--;
Damjan Marionaec8f892018-01-08 16:35:35 +010084 unformat_skip_line (&input);
85 }
Gabriel Ganne83d47432018-01-10 11:40:50 +010086 unformat_free (&input);
87 close (fd);
Damjan Marionaec8f892018-01-08 16:35:35 +010088
Damjan Marion11056002018-05-10 13:40:44 +020089 bits = count_leading_zeros (end);
Gabriel Gannec5239ad2018-01-11 15:04:19 +010090 bits = 64 - bits;
Damjan Marionaec8f892018-01-08 16:35:35 +010091 if (bits >= 36 && bits <= 48)
92 return ((1ul << bits) / 4) - (2 * SVM_GLOBAL_REGION_SIZE);
93 else
94 clib_unix_error ("unexpected va bits '%u'", bits);
Damjan Marionaec8f892018-01-08 16:35:35 +010095#endif
96
97 /* default value */
Dave Barach9466c452018-08-24 17:21:14 -040098 return 0x130000000ULL;
Damjan Marionaec8f892018-01-08 16:35:35 +010099}
100
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400101static void
102region_lock (svm_region_t * rp, int tag)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700103{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400104 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700105#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400106 rp->mutex_owner_pid = getpid ();
107 rp->mutex_owner_tag = tag;
108#endif
Dave Barachc35f3e82020-04-02 10:44:09 -0400109 ASSERT (nheld < MAXLOCK); //NOSONAR
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400110 /*
111 * Keep score of held mutexes so we can try to exit
112 * cleanly if the world comes to an end at the worst possible
113 * moment
114 */
115 mutexes_held[nheld++] = &rp->mutex;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116}
117
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400118static void
119region_unlock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700120{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400121 int i, j;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700122#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400123 rp->mutex_owner_pid = 0;
124 rp->mutex_owner_tag = 0;
125#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400127 for (i = nheld - 1; i >= 0; i--)
128 {
129 if (mutexes_held[i] == &rp->mutex)
130 {
131 for (j = i; j < MAXLOCK - 1; j++)
132 mutexes_held[j] = mutexes_held[j + 1];
133 nheld--;
134 goto found;
135 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700136 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400137 ASSERT (0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700138
139found:
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400140 CLIB_MEMORY_BARRIER ();
141 pthread_mutex_unlock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700142}
143
144
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400145static u8 *
146format_svm_flags (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700147{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400148 uword f = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700149
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400150 if (f & SVM_FLAGS_MHEAP)
151 s = format (s, "MHEAP ");
152 if (f & SVM_FLAGS_FILE)
153 s = format (s, "FILE ");
154 if (f & SVM_FLAGS_NODATA)
155 s = format (s, "NODATA ");
156 if (f & SVM_FLAGS_NEED_DATA_INIT)
157 s = format (s, "INIT ");
158
159 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700160}
161
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400162static u8 *
163format_svm_size (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700164{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400165 uword size = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400167 if (size >= (1 << 20))
168 {
169 s = format (s, "(%d mb)", size >> 20);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700170 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400171 else if (size >= (1 << 10))
172 {
173 s = format (s, "(%d kb)", size >> 10);
174 }
175 else
176 {
177 s = format (s, "(%d bytes)", size);
178 }
179 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700180}
181
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400182u8 *
183format_svm_region (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700184{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400185 svm_region_t *rp = va_arg (*args, svm_region_t *);
186 int verbose = va_arg (*args, int);
187 int i;
188 uword lo, hi;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700189
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400190 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
191 rp->region_name, rp->virtual_base,
192 rp->virtual_size, format_svm_size, rp->virtual_size);
193 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
194 rp->user_ctx, rp->bitmap_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700195
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400196 if (verbose)
197 {
198 s = format (s, " flags: 0x%x %U\n", rp->flags,
199 format_svm_flags, rp->flags);
200 s = format (s,
201 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
202 rp->region_heap, rp->data_base, rp->data_heap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700203 }
204
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400205 s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700206
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400207 for (i = 0; i < vec_len (rp->client_pids); i++)
208 s = format (s, "%d ", rp->client_pids[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700209
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400210 s = format (s, "\n");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700211
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400212 if (verbose)
213 {
214 lo = hi = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700215
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400216 s = format (s, " VM in use: ");
217
218 for (i = 0; i < rp->bitmap_size; i++)
219 {
220 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
221 {
222 if (lo == ~0)
223 {
224 hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
225 }
226 else
227 {
228 hi = rp->virtual_base + i * MMAP_PAGESIZE;
229 }
230 }
231 else
232 {
233 if (lo != ~0)
234 {
235 hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
236 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
237 (hi - lo) >> 10);
238 lo = hi = ~0;
239 }
240 }
241 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700242 }
243
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400244 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700245}
246
247/*
248 * rnd_pagesize
249 * Round to a pagesize multiple, presumably 4k works
250 */
Dave Barachb3d93da2016-08-03 14:34:38 -0400251static u64
252rnd_pagesize (u64 size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253{
Dave Barachb3d93da2016-08-03 14:34:38 -0400254 u64 rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700255
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400256 rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
257 return (rv);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258}
259
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400260/*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700261 * svm_data_region_setup
262 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400263static int
264svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700265{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400266 int fd;
267 u8 junk = 0;
268 uword map_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700269
Dave Barachc3799992016-08-15 11:12:27 -0400270 map_size = rp->virtual_size - (MMAP_PAGESIZE +
271 (a->pvt_heap_size ? a->pvt_heap_size :
272 SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700273
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400274 if (a->flags & SVM_FLAGS_FILE)
275 {
276 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700277
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400278 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700279
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400280 if (fd < 0)
281 {
282 clib_unix_warning ("open");
283 return -1;
284 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700285
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400286 if (fstat (fd, &statb) < 0)
287 {
288 clib_unix_warning ("fstat");
289 close (fd);
290 return -2;
291 }
292
293 if (statb.st_mode & S_IFREG)
294 {
295 if (statb.st_size == 0)
296 {
297 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
298 {
299 clib_unix_warning ("seek region size");
300 close (fd);
301 return -3;
302 }
303 if (write (fd, &junk, 1) != 1)
304 {
305 clib_unix_warning ("set region size");
306 close (fd);
307 return -3;
308 }
309 }
310 else
311 {
312 map_size = rnd_pagesize (statb.st_size);
313 }
314 }
315 else
316 {
317 map_size = a->backing_mmap_size;
318 }
319
320 ASSERT (map_size <= rp->virtual_size -
321 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
322
323 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
324 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
325 {
326 clib_unix_warning ("mmap");
327 close (fd);
328 return -3;
329 }
330 close (fd);
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200331 CLIB_MEM_UNPOISON (rp->data_base, map_size);
BenoƮt Ganneda5b4ef2020-09-09 10:00:34 +0200332 rp->backing_file = (char *) format (0, "%s%c", a->backing_file, 0);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400333 rp->flags |= SVM_FLAGS_FILE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700334 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400335
336 if (a->flags & SVM_FLAGS_MHEAP)
337 {
Damjan Marion4537c302020-09-28 19:03:37 +0200338 rp->data_heap = clib_mem_create_heap (rp->data_base, map_size,
339 1 /* locked */ , "svm data");
Ole Troan73710c72018-06-04 22:27:49 +0200340
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400341 rp->flags |= SVM_FLAGS_MHEAP;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700342 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400343 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700344}
345
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400346static int
347svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700348{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400349 int fd;
350 u8 junk = 0;
351 uword map_size;
352 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700353
Dave Barachc3799992016-08-15 11:12:27 -0400354 map_size = rp->virtual_size -
355 (MMAP_PAGESIZE
Dave Barachb3d93da2016-08-03 14:34:38 -0400356 + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700357
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400358 if (a->flags & SVM_FLAGS_FILE)
359 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700360
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400361 fd = open (a->backing_file, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700362
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400363 if (fd < 0)
364 {
365 clib_unix_warning ("open");
366 return -1;
367 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700368
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400369 if (fstat (fd, &statb) < 0)
370 {
371 clib_unix_warning ("fstat");
372 close (fd);
373 return -2;
374 }
375
376 if (statb.st_mode & S_IFREG)
377 {
378 if (statb.st_size == 0)
379 {
380 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
381 {
382 clib_unix_warning ("seek region size");
383 close (fd);
384 return -3;
385 }
386 if (write (fd, &junk, 1) != 1)
387 {
388 clib_unix_warning ("set region size");
389 close (fd);
390 return -3;
391 }
392 }
393 else
394 {
395 map_size = rnd_pagesize (statb.st_size);
396 }
397 }
398 else
399 {
400 map_size = a->backing_mmap_size;
401 }
402
403 ASSERT (map_size <= rp->virtual_size
Dave Barachc3799992016-08-15 11:12:27 -0400404 - (MMAP_PAGESIZE
405 +
406 (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)));
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400407
408 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
409 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
410 {
411 clib_unix_warning ("mmap");
412 close (fd);
413 return -3;
414 }
415 close (fd);
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200416 CLIB_MEM_UNPOISON (rp->data_base, map_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700417 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400418 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700419}
420
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400421u8 *
422shm_name_from_svm_map_region_args (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700423{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400424 u8 *shm_name;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400425 int root_path_offset = 0;
426 int name_offset = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700427
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400428 if (a->root_path)
429 {
430 /* Tolerate present or absent slashes */
431 if (a->root_path[0] == '/')
432 root_path_offset++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700433
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400434 if (a->name[0] == '/')
435 name_offset = 1;
436
Matej Perinad135c192017-07-18 13:59:41 +0200437 shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400438 &a->name[name_offset], 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700439 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400440 else
441 shm_name = format (0, "%s%c", a->name, 0);
442 return (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700443}
444
Dave Barach59b25652017-09-10 15:04:27 -0400445void
446svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp)
447{
448 pthread_mutexattr_t attr;
449 pthread_condattr_t cattr;
450 int nbits, words, bit;
451 int overhead_space;
452 void *oldheap;
453 uword data_base;
454 ASSERT (rp);
455 int rv;
456
Dave Barachb7b92992018-10-17 10:38:51 -0400457 clib_memset (rp, 0, sizeof (*rp));
Dave Barach59b25652017-09-10 15:04:27 -0400458
459 if (pthread_mutexattr_init (&attr))
460 clib_unix_warning ("mutexattr_init");
461
462 if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
463 clib_unix_warning ("mutexattr_setpshared");
464
465 if (pthread_mutex_init (&rp->mutex, &attr))
466 clib_unix_warning ("mutex_init");
467
468 if (pthread_mutexattr_destroy (&attr))
469 clib_unix_warning ("mutexattr_destroy");
470
471 if (pthread_condattr_init (&cattr))
472 clib_unix_warning ("condattr_init");
473
474 if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
475 clib_unix_warning ("condattr_setpshared");
476
477 if (pthread_cond_init (&rp->condvar, &cattr))
478 clib_unix_warning ("cond_init");
479
480 if (pthread_condattr_destroy (&cattr))
481 clib_unix_warning ("condattr_destroy");
482
483 region_lock (rp, 1);
484
485 rp->virtual_base = a->baseva;
486 rp->virtual_size = a->size;
487
Damjan Marion4537c302020-09-28 19:03:37 +0200488 rp->region_heap = clib_mem_create_heap
Dave Barach6a5adc32018-07-04 10:56:23 -0400489 (uword_to_pointer (a->baseva + MMAP_PAGESIZE, void *),
490 (a->pvt_heap_size !=
Damjan Marion4537c302020-09-28 19:03:37 +0200491 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, 1 /* locked */ ,
492 "svm region");
Dave Barach6a5adc32018-07-04 10:56:23 -0400493
Dave Barach59b25652017-09-10 15:04:27 -0400494 oldheap = svm_push_pvt_heap (rp);
495
496 rp->region_name = (char *) format (0, "%s%c", a->name, 0);
497 vec_add1 (rp->client_pids, getpid ());
498
499 nbits = rp->virtual_size / MMAP_PAGESIZE;
500
501 ASSERT (nbits > 0);
502 rp->bitmap_size = nbits;
503 words = (nbits + BITS (uword) - 1) / BITS (uword);
504 vec_validate (rp->bitmap, words - 1);
505
506 overhead_space = MMAP_PAGESIZE /* header */ +
507 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
508
509 bit = 0;
510 data_base = (uword) rp->virtual_base;
511
512 if (a->flags & SVM_FLAGS_NODATA)
513 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
514
515 do
516 {
517 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
518 bit++;
519 overhead_space -= MMAP_PAGESIZE;
520 data_base += MMAP_PAGESIZE;
521 }
522 while (overhead_space > 0);
523
524 rp->data_base = (void *) data_base;
525
526 /*
527 * Note: although the POSIX spec guarantees that only one
528 * process enters this block, we have to play games
529 * to hold off clients until e.g. the mutex is ready
530 */
531 rp->version = SVM_VERSION;
532
533 /* setup the data portion of the region */
534
535 rv = svm_data_region_create (a, rp);
536 if (rv)
537 {
538 clib_warning ("data_region_create: %d", rv);
539 }
540
541 region_unlock (rp);
542
543 svm_pop_heap (oldheap);
544}
545
Ed Warnickecb9cada2015-12-08 15:45:58 -0700546/*
547 * svm_map_region
548 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400549void *
550svm_map_region (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700551{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400552 int svm_fd;
553 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400554 int deadman = 0;
555 u8 junk = 0;
556 void *oldheap;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400557 int rv;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400558 int pid_holding_region_lock;
559 u8 *shm_name;
560 int dead_region_recovery = 0;
561 int time_left;
562 struct stat stat;
563 struct timespec ts, tsrem;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700564
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400565 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
566 ASSERT (a->name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700567
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400568 shm_name = shm_name_from_svm_map_region_args (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700569
Florin Coras9f4ac582019-12-17 19:46:45 -0800570 if (CLIB_DEBUG > 1)
Dave Wallaced756b352017-07-03 13:11:38 -0400571 clib_warning ("[%d] map region %s: shm_open (%s)",
572 getpid (), a->name, shm_name);
573
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400574 svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700575
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400576 if (svm_fd >= 0)
577 {
Dave Wallace19296112017-08-31 15:54:11 -0400578 if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400579 clib_unix_warning ("segment chmod");
580 /* This turns out to fail harmlessly if the client starts first */
581 if (fchown (svm_fd, a->uid, a->gid) < 0)
582 clib_unix_warning ("segment chown [ok if client starts first]");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700583
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400584 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700585
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400586 if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
587 {
588 clib_warning ("seek region size");
589 close (svm_fd);
590 return (0);
591 }
592 if (write (svm_fd, &junk, 1) != 1)
593 {
594 clib_warning ("set region size");
595 close (svm_fd);
596 return (0);
597 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700598
Damjan Marion7bee80c2017-04-26 15:32:12 +0200599 rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400600 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700601
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400602 if (rp == (svm_region_t *) MAP_FAILED)
603 {
604 clib_unix_warning ("mmap create");
605 close (svm_fd);
606 return (0);
607 }
608 close (svm_fd);
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200609 CLIB_MEM_UNPOISON (rp, a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700610
Dave Barach59b25652017-09-10 15:04:27 -0400611 svm_region_init_mapped_region (a, rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700612
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400613 return ((void *) rp);
614 }
615 else
616 {
617 svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700618
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400619 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700620
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400621 if (svm_fd < 0)
622 {
623 perror ("svm_region_map(mmap open)");
624 return (0);
625 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700626
Ole Troanc4f2ef72018-05-30 22:43:25 +0200627 /* Reset ownership in case the client started first */
628 if (fchown (svm_fd, a->uid, a->gid) < 0)
629 clib_unix_warning ("segment chown [ok if client starts first]");
630
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400631 time_left = 20;
632 while (1)
633 {
634 if (0 != fstat (svm_fd, &stat))
635 {
636 clib_warning ("fstat failed: %d", errno);
637 close (svm_fd);
638 return (0);
639 }
640 if (stat.st_size > 0)
641 {
642 break;
643 }
644 if (0 == time_left)
645 {
646 clib_warning ("waiting for resize of shm file timed out");
647 close (svm_fd);
648 return (0);
649 }
650 ts.tv_sec = 0;
651 ts.tv_nsec = 100000000;
652 while (nanosleep (&ts, &tsrem) < 0)
653 ts = tsrem;
654 time_left--;
655 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700656
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400657 rp = mmap (0, MMAP_PAGESIZE,
658 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700659
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400660 if (rp == (svm_region_t *) MAP_FAILED)
661 {
662 close (svm_fd);
663 clib_warning ("mmap");
664 return (0);
665 }
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200666
667 CLIB_MEM_UNPOISON (rp, MMAP_PAGESIZE);
668
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400669 /*
670 * We lost the footrace to create this region; make sure
671 * the winner has crossed the finish line.
672 */
673 while (rp->version == 0 && deadman++ < 5)
674 {
675 sleep (1);
676 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700677
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400678 /*
679 * <bleep>-ed?
680 */
681 if (rp->version == 0)
682 {
683 clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
684 close (svm_fd);
685 munmap (rp, a->size);
686 return (0);
687 }
688 /* Remap now that the region has been placed */
689 a->baseva = rp->virtual_base;
690 a->size = rp->virtual_size;
691 munmap (rp, MMAP_PAGESIZE);
692
Damjan Marion7bee80c2017-04-26 15:32:12 +0200693 rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400694 PROT_READ | PROT_WRITE,
695 MAP_SHARED | MAP_FIXED, svm_fd, 0);
696 if ((uword) rp == (uword) MAP_FAILED)
697 {
698 clib_unix_warning ("mmap");
699 close (svm_fd);
700 return (0);
701 }
702
Dave Barachada24ea2018-05-24 17:32:00 -0400703 close (svm_fd);
704
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200705 CLIB_MEM_UNPOISON (rp, a->size);
706
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400707 if ((uword) rp != rp->virtual_base)
708 {
709 clib_warning ("mmap botch");
710 }
711
712 /*
713 * Try to fix the region mutex if it is held by
714 * a dead process
715 */
716 pid_holding_region_lock = rp->mutex_owner_pid;
717 if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
718 {
BenoƮt Ganne78de92d2021-01-20 19:10:59 +0100719 pthread_mutexattr_t attr;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400720 clib_warning
721 ("region %s mutex held by dead pid %d, tag %d, force unlock",
722 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
723 /* owner pid is nonexistent */
BenoƮt Ganne78de92d2021-01-20 19:10:59 +0100724 if (pthread_mutexattr_init (&attr))
725 clib_unix_warning ("mutexattr_init");
726 if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
727 clib_unix_warning ("mutexattr_setpshared");
728 if (pthread_mutex_init (&rp->mutex, &attr))
729 clib_unix_warning ("mutex_init");
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400730 dead_region_recovery = 1;
731 }
732
733 if (dead_region_recovery)
734 clib_warning ("recovery: attempt to re-lock region");
735
736 region_lock (rp, 2);
737 oldheap = svm_push_pvt_heap (rp);
738 vec_add1 (rp->client_pids, getpid ());
739
740 if (dead_region_recovery)
741 clib_warning ("recovery: attempt svm_data_region_map");
742
743 rv = svm_data_region_map (a, rp);
744 if (rv)
745 {
746 clib_warning ("data_region_map: %d", rv);
747 }
748
749 if (dead_region_recovery)
750 clib_warning ("unlock and continue");
751
752 region_unlock (rp);
753
754 svm_pop_heap (oldheap);
755
756 return ((void *) rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700757
758 }
Dave Barachc35f3e82020-04-02 10:44:09 -0400759 return 0; /* NOTREACHED *///NOSONAR
Ed Warnickecb9cada2015-12-08 15:45:58 -0700760}
761
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400762static void
763svm_mutex_cleanup (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700764{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400765 int i;
766 for (i = 0; i < nheld; i++)
767 {
Dave Barachc35f3e82020-04-02 10:44:09 -0400768 pthread_mutex_unlock (mutexes_held[i]); //NOSONAR
Ed Warnickecb9cada2015-12-08 15:45:58 -0700769 }
770}
771
Ole Troan3cdc25f2017-08-17 11:07:33 +0200772static int
Dave Barachb3d93da2016-08-03 14:34:38 -0400773svm_region_init_internal (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700774{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400775 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400776 u64 ticks = clib_cpu_time_now ();
777 uword randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700778
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400779 /* guard against klutz calls */
780 if (root_rp)
Ole Troan3cdc25f2017-08-17 11:07:33 +0200781 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700782
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400783 root_rp_refcount++;
Dave Barach16c75df2016-05-31 14:05:46 -0400784
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400785 atexit (svm_mutex_cleanup);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700786
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400787 /* Randomize the shared-VM base at init time */
788 if (MMAP_PAGESIZE <= (4 << 10))
789 randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
790 else
791 randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700792
Dave Barachb3d93da2016-08-03 14:34:38 -0400793 a->baseva += randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700794
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400795 rp = svm_map_region (a);
Ole Troan3cdc25f2017-08-17 11:07:33 +0200796 if (!rp)
797 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700798
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400799 region_lock (rp, 3);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700800
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400801 /* Set up the main region data structures */
802 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT)
803 {
804 svm_main_region_t *mp = 0;
805 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700806
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400807 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700808
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400809 oldheap = svm_push_pvt_heap (rp);
810 vec_validate (mp, 0);
811 mp->name_hash = hash_create_string (0, sizeof (uword));
Dave Barachb3d93da2016-08-03 14:34:38 -0400812 mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
Dave Wallace19296112017-08-31 15:54:11 -0400813 mp->uid = a->uid;
814 mp->gid = a->gid;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400815 rp->data_base = mp;
816 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700817 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400818 region_unlock (rp);
819 root_rp = rp;
Ole Troan3cdc25f2017-08-17 11:07:33 +0200820
821 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700822}
823
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400824void
825svm_region_init (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700826{
Dave Barachb3d93da2016-08-03 14:34:38 -0400827 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400828
Dave Barachb7b92992018-10-17 10:38:51 -0400829 clib_memset (a, 0, sizeof (*a));
Dave Barachb3d93da2016-08-03 14:34:38 -0400830 a->root_path = 0;
831 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100832 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400833 a->size = SVM_GLOBAL_REGION_SIZE;
834 a->flags = SVM_FLAGS_NODATA;
835 a->uid = 0;
836 a->gid = 0;
837
838 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700839}
840
Ole Troan3cdc25f2017-08-17 11:07:33 +0200841int
Neale Rannse72be392017-04-26 13:59:20 -0700842svm_region_init_chroot (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700843{
Dave Barachb3d93da2016-08-03 14:34:38 -0400844 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400845
Dave Barachb7b92992018-10-17 10:38:51 -0400846 clib_memset (a, 0, sizeof (*a));
Dave Barachb3d93da2016-08-03 14:34:38 -0400847 a->root_path = root_path;
848 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100849 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400850 a->size = SVM_GLOBAL_REGION_SIZE;
851 a->flags = SVM_FLAGS_NODATA;
852 a->uid = 0;
853 a->gid = 0;
854
Ole Troan3cdc25f2017-08-17 11:07:33 +0200855 return svm_region_init_internal (a);
Dave Barach16c75df2016-05-31 14:05:46 -0400856}
857
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400858void
Neale Rannse72be392017-04-26 13:59:20 -0700859svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
Dave Barach16c75df2016-05-31 14:05:46 -0400860{
Dave Barachb3d93da2016-08-03 14:34:38 -0400861 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400862
Dave Barachb7b92992018-10-17 10:38:51 -0400863 clib_memset (a, 0, sizeof (*a));
Dave Barachb3d93da2016-08-03 14:34:38 -0400864 a->root_path = root_path;
865 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100866 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400867 a->size = SVM_GLOBAL_REGION_SIZE;
868 a->flags = SVM_FLAGS_NODATA;
869 a->uid = uid;
870 a->gid = gid;
871
872 svm_region_init_internal (a);
873}
874
875void
876svm_region_init_args (svm_map_region_args_t * a)
877{
878 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700879}
880
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400881void *
882svm_region_find_or_create (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700883{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400884 svm_main_region_t *mp;
885 svm_region_t *rp;
886 uword need_nbits;
887 int index, i;
888 void *oldheap;
889 uword *p;
890 u8 *name;
891 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700892
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400893 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700894
Dave Barachc3799992016-08-15 11:12:27 -0400895 a->size += MMAP_PAGESIZE +
Dave Barachb3d93da2016-08-03 14:34:38 -0400896 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400897 a->size = rnd_pagesize (a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700898
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400899 region_lock (root_rp, 4);
900 oldheap = svm_push_pvt_heap (root_rp);
901 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700902
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400903 ASSERT (mp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700904
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400905 /* Map the named region from the correct chroot environment */
Jan Srnicek5beec812017-03-24 10:18:11 +0100906 if (a->root_path == NULL)
907 a->root_path = (char *) mp->root_path;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400908
909 /*
910 * See if this region is already known. If it is, we're
911 * almost done...
912 */
913 p = hash_get_mem (mp->name_hash, a->name);
914
915 if (p)
916 {
917 rp = svm_map_region (a);
918 region_unlock (root_rp);
919 svm_pop_heap (oldheap);
920 return rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700921 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700922
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400923 /* Create the region. */
924 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700925
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400926 need_nbits = a->size / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700927
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400928 index = 1; /* $$$ fixme, figure out how many bit to really skip */
929
930 /*
931 * Scan the virtual space allocation bitmap, looking for a large
932 * enough chunk
933 */
934 do
935 {
936 if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
937 {
938 for (i = 0; i < (need_nbits - 1); i++)
939 {
940 if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
941 {
942 index = index + i;
943 goto next;
944 }
945 }
946 break;
947 }
948 index++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700949 next:;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700950 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400951 while (index < root_rp->bitmap_size);
952
953 /* Completely out of VM? */
954 if (index >= root_rp->bitmap_size)
955 {
Dave Barachb3d93da2016-08-03 14:34:38 -0400956 clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
957 root_rp->region_name, a->size, a->size);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400958 svm_pop_heap (oldheap);
959 region_unlock (root_rp);
960 return 0;
961 }
962
963 /*
964 * Mark virtual space allocated
965 */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700966#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400967 clib_warning ("set %d bits at index %d", need_nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700968#endif
969
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400970 for (i = 0; i < need_nbits; i++)
971 {
972 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700973 }
974
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400975 /* Place this region where it goes... */
976 a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700977
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400978 rp = svm_map_region (a);
Dave Barachc3799992016-08-15 11:12:27 -0400979
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400980 pool_get (mp->subregions, subp);
981 name = format (0, "%s%c", a->name, 0);
982 subp->subregion_name = name;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700983
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400984 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700985
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400986 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700987
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400988 region_unlock (root_rp);
989
990 return (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700991}
992
Dave Wallaced756b352017-07-03 13:11:38 -0400993void
994svm_region_unlink (svm_region_t * rp)
995{
996 svm_map_region_args_t _a, *a = &_a;
997 svm_main_region_t *mp;
998 u8 *shm_name;
999
1000 ASSERT (root_rp);
1001 ASSERT (rp);
1002 ASSERT (vec_c_string_is_terminated (rp->region_name));
1003
1004 mp = root_rp->data_base;
1005 ASSERT (mp);
1006
1007 a->root_path = (char *) mp->root_path;
1008 a->name = rp->region_name;
1009 shm_name = shm_name_from_svm_map_region_args (a);
1010 if (CLIB_DEBUG > 1)
1011 clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
1012 shm_unlink ((const char *) shm_name);
1013 vec_free (shm_name);
1014}
1015
Ed Warnickecb9cada2015-12-08 15:45:58 -07001016/*
1017 * svm_region_unmap
1018 *
1019 * Let go of the indicated region. If the calling process
1020 * is the last customer, throw it away completely.
1021 * The root region mutex guarantees atomicity with respect to
1022 * a new region client showing up at the wrong moment.
1023 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001024void
Florin Corasd6c30d92018-01-29 05:11:24 -08001025svm_region_unmap_internal (void *rp_arg, u8 is_client)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001026{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001027 int i, mypid = getpid ();
1028 int nclients_left;
1029 void *oldheap;
1030 uword virtual_base, virtual_size;
1031 svm_region_t *rp = rp_arg;
1032 char *name;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001033
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001034 /*
1035 * If we take a signal while holding one or more shared-memory
1036 * mutexes, we may end up back here from an otherwise
1037 * benign exit handler. Bail out to avoid a recursive
1038 * mutex screw-up.
1039 */
1040 if (nheld)
1041 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001042
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001043 ASSERT (rp);
1044 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001045
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001046 if (CLIB_DEBUG > 1)
1047 clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001048
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001049 region_lock (root_rp, 5);
1050 region_lock (rp, 6);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001051
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001052 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
1053
1054 /* Remove the caller from the list of mappers */
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +02001055 CLIB_MEM_UNPOISON (rp->client_pids, vec_bytes (rp->client_pids));
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001056 for (i = 0; i < vec_len (rp->client_pids); i++)
1057 {
1058 if (rp->client_pids[i] == mypid)
1059 {
1060 vec_delete (rp->client_pids, 1, i);
1061 goto found;
1062 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001063 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001064 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001065
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001066found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001067
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001068 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001069
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001070 nclients_left = vec_len (rp->client_pids);
1071 virtual_base = rp->virtual_base;
1072 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001073
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001074 if (nclients_left == 0)
1075 {
1076 int index, nbits, i;
1077 svm_main_region_t *mp;
1078 uword *p;
1079 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001080
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001081 /* Kill the region, last guy on his way out */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001082
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001083 oldheap = svm_push_pvt_heap (root_rp);
1084 name = vec_dup (rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001085
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001086 virtual_base = rp->virtual_base;
1087 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001088
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001089 /* Figure out which bits to clear in the root region bitmap */
1090 index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001091
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001092 nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001093
1094#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001095 clib_warning ("clear %d bits at index %d", nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001096#endif
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001097 /* Give back the allocated VM */
1098 for (i = 0; i < nbits; i++)
1099 {
1100 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
1101 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001102
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001103 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001104
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001105 p = hash_get_mem (mp->name_hash, name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001106
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001107 /* Better never happen ... */
1108 if (p == NULL)
1109 {
1110 region_unlock (rp);
1111 region_unlock (root_rp);
1112 svm_pop_heap (oldheap);
1113 clib_warning ("Region name '%s' not found?", name);
1114 return;
1115 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001116
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001117 /* Remove from the root region subregion pool */
1118 subp = mp->subregions + p[0];
1119 pool_put (mp->subregions, subp);
1120
1121 hash_unset_mem (mp->name_hash, name);
1122
1123 vec_free (name);
1124
1125 region_unlock (rp);
Florin Corasd6c30d92018-01-29 05:11:24 -08001126
1127 /* If a client asks for the cleanup, don't unlink the backing
1128 * file since we can't tell if it has been recreated. */
1129 if (!is_client)
1130 svm_region_unlink (rp);
1131
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001132 munmap ((void *) virtual_base, virtual_size);
1133 region_unlock (root_rp);
1134 svm_pop_heap (oldheap);
1135 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001136 }
1137
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001138 region_unlock (rp);
1139 region_unlock (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001140
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001141 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001142}
1143
Florin Corasd6c30d92018-01-29 05:11:24 -08001144void
1145svm_region_unmap (void *rp_arg)
1146{
1147 svm_region_unmap_internal (rp_arg, 0 /* is_client */ );
1148}
1149
1150void
1151svm_region_unmap_client (void *rp_arg)
1152{
1153 svm_region_unmap_internal (rp_arg, 1 /* is_client */ );
1154}
1155
Ed Warnickecb9cada2015-12-08 15:45:58 -07001156/*
1157 * svm_region_exit
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001158 */
Florin Corasd6c30d92018-01-29 05:11:24 -08001159static void
1160svm_region_exit_internal (u8 is_client)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001161{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001162 void *oldheap;
1163 int i, mypid = getpid ();
1164 uword virtual_base, virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001165
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001166 /* It felt so nice we did it twice... */
1167 if (root_rp == 0)
1168 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001169
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001170 if (--root_rp_refcount > 0)
1171 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001172
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001173 /*
1174 * If we take a signal while holding one or more shared-memory
1175 * mutexes, we may end up back here from an otherwise
1176 * benign exit handler. Bail out to avoid a recursive
1177 * mutex screw-up.
1178 */
1179 if (nheld)
1180 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001181
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001182 region_lock (root_rp, 7);
1183 oldheap = svm_push_pvt_heap (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001184
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001185 virtual_base = root_rp->virtual_base;
1186 virtual_size = root_rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001187
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +02001188 CLIB_MEM_UNPOISON (root_rp->client_pids, vec_bytes (root_rp->client_pids));
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001189 for (i = 0; i < vec_len (root_rp->client_pids); i++)
1190 {
1191 if (root_rp->client_pids[i] == mypid)
1192 {
1193 vec_delete (root_rp->client_pids, 1, i);
1194 goto found;
1195 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001196 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001197 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001198
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001199found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001200
Florin Corasd6c30d92018-01-29 05:11:24 -08001201 if (!is_client && vec_len (root_rp->client_pids) == 0)
Dave Wallaced756b352017-07-03 13:11:38 -04001202 svm_region_unlink (root_rp);
1203
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001204 region_unlock (root_rp);
1205 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001206
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001207 root_rp = 0;
1208 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001209}
1210
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001211void
Florin Corasd6c30d92018-01-29 05:11:24 -08001212svm_region_exit (void)
1213{
1214 svm_region_exit_internal (0 /* is_client */ );
1215}
1216
1217void
1218svm_region_exit_client (void)
1219{
1220 svm_region_exit_internal (1 /* is_client */ );
1221}
1222
1223void
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001224svm_client_scan_this_region_nolock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001225{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001226 int j;
1227 int mypid = getpid ();
1228 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001229
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001230 for (j = 0; j < vec_len (rp->client_pids); j++)
1231 {
1232 if (mypid == rp->client_pids[j])
1233 continue;
1234 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
1235 {
1236 clib_warning ("%s: cleanup ghost pid %d",
1237 rp->region_name, rp->client_pids[j]);
1238 /* nb: client vec in rp->region_heap */
1239 oldheap = svm_push_pvt_heap (rp);
1240 vec_delete (rp->client_pids, 1, j);
1241 j--;
1242 svm_pop_heap (oldheap);
1243 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001244 }
1245}
1246
1247
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001248/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07001249 * Scan svm regions for dead clients
1250 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001251void
Neale Rannse72be392017-04-26 13:59:20 -07001252svm_client_scan (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001253{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001254 int i, j;
1255 svm_main_region_t *mp;
1256 svm_map_region_args_t *a = 0;
1257 svm_region_t *root_rp;
1258 svm_region_t *rp;
1259 svm_subregion_t *subp;
1260 u8 *name = 0;
1261 u8 **svm_names = 0;
1262 void *oldheap;
1263 int mypid = getpid ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001264
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001265 vec_validate (a, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001266
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001267 svm_region_init_chroot (root_path);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001268
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001269 root_rp = svm_get_root_rp ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001270
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001271 pthread_mutex_lock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001272
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001273 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001274
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001275 for (j = 0; j < vec_len (root_rp->client_pids); j++)
1276 {
1277 if (mypid == root_rp->client_pids[j])
1278 continue;
1279 if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
1280 {
1281 clib_warning ("%s: cleanup ghost pid %d",
1282 root_rp->region_name, root_rp->client_pids[j]);
1283 /* nb: client vec in root_rp->region_heap */
1284 oldheap = svm_push_pvt_heap (root_rp);
1285 vec_delete (root_rp->client_pids, 1, j);
1286 j--;
1287 svm_pop_heap (oldheap);
1288 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001289 }
1290
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001291 /*
1292 * Snapshoot names, can't hold root rp mutex across
1293 * find_or_create.
1294 */
1295 /* *INDENT-OFF* */
Damjan Marionb2c31b62020-12-13 21:47:40 +01001296 pool_foreach (subp, mp->subregions) {
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001297 name = vec_dup (subp->subregion_name);
1298 vec_add1(svm_names, name);
Damjan Marionb2c31b62020-12-13 21:47:40 +01001299 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001300 /* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001301
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001302 pthread_mutex_unlock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001303
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001304 for (i = 0; i < vec_len (svm_names); i++)
1305 {
1306 vec_validate (a, 0);
1307 a->root_path = root_path;
1308 a->name = (char *) svm_names[i];
1309 rp = svm_region_find_or_create (a);
1310 if (rp)
1311 {
1312 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001313
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001314 svm_client_scan_this_region_nolock (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001315
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001316 pthread_mutex_unlock (&rp->mutex);
1317 svm_region_unmap (rp);
1318 vec_free (svm_names[i]);
1319 }
1320 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001321 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001322 vec_free (svm_names);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001323
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001324 svm_region_exit ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001325
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001326 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001327}
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001328
1329/*
1330 * fd.io coding-style-patch-verification: ON
1331 *
1332 * Local Variables:
1333 * eval: (c-set-style "gnu")
1334 * End:
1335 */