blob: 2834524b934b21831ce48861acdf7a559eaa2a45 [file] [log] [blame]
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07002 *------------------------------------------------------------------
Dave Barach8a7fb0c2016-07-08 14:44:23 -04003 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
Ed Warnickecb9cada2015-12-08 15:45:58 -07004 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070039#include <vppinfra/heap.h>
40#include <vppinfra/pool.h>
41#include <vppinfra/format.h>
42
43#include "svm.h"
44
45static svm_region_t *root_rp;
46static int root_rp_refcount;
47
48#define MAXLOCK 2
Dave Barach8a7fb0c2016-07-08 14:44:23 -040049static pthread_mutex_t *mutexes_held[MAXLOCK];
Ed Warnickecb9cada2015-12-08 15:45:58 -070050static int nheld;
51
Dave Barach8a7fb0c2016-07-08 14:44:23 -040052svm_region_t *
53svm_get_root_rp (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -070054{
Dave Barach8a7fb0c2016-07-08 14:44:23 -040055 return root_rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -070056}
57
58#define MUTEX_DEBUG
59
Damjan Marionaec8f892018-01-08 16:35:35 +010060u64
61svm_get_global_region_base_va ()
62{
BenoƮt Ganne9fb6d402019-04-15 15:28:21 +020063#ifdef CLIB_SANITIZE_ADDR
64 return 0x200000000000;
65#endif
66
Damjan Marionaec8f892018-01-08 16:35:35 +010067#if __aarch64__
68 /* On AArch64 VA space can have different size, from 36 to 48 bits.
69 Here we are trying to detect VA bits by parsing /proc/self/maps
70 address ranges */
71 int fd;
72 unformat_input_t input;
73 u64 start, end = 0;
74 u8 bits = 0;
75
76 if ((fd = open ("/proc/self/maps", 0)) < 0)
77 clib_unix_error ("open '/proc/self/maps'");
78
79 unformat_init_clib_file (&input, fd);
80 while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
81 {
Gabriel Gannec5239ad2018-01-11 15:04:19 +010082 if (unformat (&input, "%llx-%llx", &start, &end))
83 end--;
Damjan Marionaec8f892018-01-08 16:35:35 +010084 unformat_skip_line (&input);
85 }
Gabriel Ganne83d47432018-01-10 11:40:50 +010086 unformat_free (&input);
87 close (fd);
Damjan Marionaec8f892018-01-08 16:35:35 +010088
Damjan Marion11056002018-05-10 13:40:44 +020089 bits = count_leading_zeros (end);
Gabriel Gannec5239ad2018-01-11 15:04:19 +010090 bits = 64 - bits;
Damjan Marionaec8f892018-01-08 16:35:35 +010091 if (bits >= 36 && bits <= 48)
92 return ((1ul << bits) / 4) - (2 * SVM_GLOBAL_REGION_SIZE);
93 else
94 clib_unix_error ("unexpected va bits '%u'", bits);
Damjan Marionaec8f892018-01-08 16:35:35 +010095#endif
96
97 /* default value */
Dave Barach9466c452018-08-24 17:21:14 -040098 return 0x130000000ULL;
Damjan Marionaec8f892018-01-08 16:35:35 +010099}
100
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400101static void
102region_lock (svm_region_t * rp, int tag)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700103{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400104 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700105#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400106 rp->mutex_owner_pid = getpid ();
107 rp->mutex_owner_tag = tag;
108#endif
Dave Barachc35f3e82020-04-02 10:44:09 -0400109 ASSERT (nheld < MAXLOCK); //NOSONAR
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400110 /*
111 * Keep score of held mutexes so we can try to exit
112 * cleanly if the world comes to an end at the worst possible
113 * moment
114 */
115 mutexes_held[nheld++] = &rp->mutex;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116}
117
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400118static void
119region_unlock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700120{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400121 int i, j;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700122#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400123 rp->mutex_owner_pid = 0;
124 rp->mutex_owner_tag = 0;
125#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400127 for (i = nheld - 1; i >= 0; i--)
128 {
129 if (mutexes_held[i] == &rp->mutex)
130 {
131 for (j = i; j < MAXLOCK - 1; j++)
132 mutexes_held[j] = mutexes_held[j + 1];
133 nheld--;
134 goto found;
135 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700136 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400137 ASSERT (0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700138
139found:
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400140 CLIB_MEMORY_BARRIER ();
141 pthread_mutex_unlock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700142}
143
144
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400145static u8 *
146format_svm_flags (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700147{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400148 uword f = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700149
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400150 if (f & SVM_FLAGS_MHEAP)
151 s = format (s, "MHEAP ");
152 if (f & SVM_FLAGS_FILE)
153 s = format (s, "FILE ");
154 if (f & SVM_FLAGS_NODATA)
155 s = format (s, "NODATA ");
156 if (f & SVM_FLAGS_NEED_DATA_INIT)
157 s = format (s, "INIT ");
158
159 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700160}
161
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400162static u8 *
163format_svm_size (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700164{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400165 uword size = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400167 if (size >= (1 << 20))
168 {
169 s = format (s, "(%d mb)", size >> 20);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700170 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400171 else if (size >= (1 << 10))
172 {
173 s = format (s, "(%d kb)", size >> 10);
174 }
175 else
176 {
177 s = format (s, "(%d bytes)", size);
178 }
179 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700180}
181
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400182u8 *
183format_svm_region (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700184{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400185 svm_region_t *rp = va_arg (*args, svm_region_t *);
186 int verbose = va_arg (*args, int);
187 int i;
188 uword lo, hi;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700189
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400190 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
191 rp->region_name, rp->virtual_base,
192 rp->virtual_size, format_svm_size, rp->virtual_size);
193 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
194 rp->user_ctx, rp->bitmap_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700195
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400196 if (verbose)
197 {
198 s = format (s, " flags: 0x%x %U\n", rp->flags,
199 format_svm_flags, rp->flags);
200 s = format (s,
201 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
202 rp->region_heap, rp->data_base, rp->data_heap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700203 }
204
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400205 s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700206
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400207 for (i = 0; i < vec_len (rp->client_pids); i++)
208 s = format (s, "%d ", rp->client_pids[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700209
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400210 s = format (s, "\n");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700211
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400212 if (verbose)
213 {
214 lo = hi = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700215
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400216 s = format (s, " VM in use: ");
217
218 for (i = 0; i < rp->bitmap_size; i++)
219 {
220 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
221 {
222 if (lo == ~0)
223 {
224 hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
225 }
226 else
227 {
228 hi = rp->virtual_base + i * MMAP_PAGESIZE;
229 }
230 }
231 else
232 {
233 if (lo != ~0)
234 {
235 hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
236 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
237 (hi - lo) >> 10);
238 lo = hi = ~0;
239 }
240 }
241 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700242 }
243
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400244 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700245}
246
247/*
248 * rnd_pagesize
249 * Round to a pagesize multiple, presumably 4k works
250 */
Dave Barachb3d93da2016-08-03 14:34:38 -0400251static u64
252rnd_pagesize (u64 size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253{
Dave Barachb3d93da2016-08-03 14:34:38 -0400254 u64 rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700255
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400256 rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
257 return (rv);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258}
259
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400260/*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700261 * svm_data_region_setup
262 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400263static int
264svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700265{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400266 int fd;
267 u8 junk = 0;
268 uword map_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700269
Dave Barachc3799992016-08-15 11:12:27 -0400270 map_size = rp->virtual_size - (MMAP_PAGESIZE +
271 (a->pvt_heap_size ? a->pvt_heap_size :
272 SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700273
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400274 if (a->flags & SVM_FLAGS_FILE)
275 {
276 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700277
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400278 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700279
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400280 if (fd < 0)
281 {
282 clib_unix_warning ("open");
283 return -1;
284 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700285
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400286 if (fstat (fd, &statb) < 0)
287 {
288 clib_unix_warning ("fstat");
289 close (fd);
290 return -2;
291 }
292
293 if (statb.st_mode & S_IFREG)
294 {
295 if (statb.st_size == 0)
296 {
297 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
298 {
299 clib_unix_warning ("seek region size");
300 close (fd);
301 return -3;
302 }
303 if (write (fd, &junk, 1) != 1)
304 {
305 clib_unix_warning ("set region size");
306 close (fd);
307 return -3;
308 }
309 }
310 else
311 {
312 map_size = rnd_pagesize (statb.st_size);
313 }
314 }
315 else
316 {
317 map_size = a->backing_mmap_size;
318 }
319
320 ASSERT (map_size <= rp->virtual_size -
321 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
322
323 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
324 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
325 {
326 clib_unix_warning ("mmap");
327 close (fd);
328 return -3;
329 }
330 close (fd);
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200331 CLIB_MEM_UNPOISON (rp->data_base, map_size);
BenoƮt Ganneda5b4ef2020-09-09 10:00:34 +0200332 rp->backing_file = (char *) format (0, "%s%c", a->backing_file, 0);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400333 rp->flags |= SVM_FLAGS_FILE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700334 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400335
336 if (a->flags & SVM_FLAGS_MHEAP)
337 {
Damjan Marion4537c302020-09-28 19:03:37 +0200338 rp->data_heap = clib_mem_create_heap (rp->data_base, map_size,
339 1 /* locked */ , "svm data");
Ole Troan73710c72018-06-04 22:27:49 +0200340
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400341 rp->flags |= SVM_FLAGS_MHEAP;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700342 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400343 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700344}
345
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400346static int
347svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700348{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400349 int fd;
350 u8 junk = 0;
351 uword map_size;
352 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700353
Dave Barachc3799992016-08-15 11:12:27 -0400354 map_size = rp->virtual_size -
355 (MMAP_PAGESIZE
Dave Barachb3d93da2016-08-03 14:34:38 -0400356 + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700357
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400358 if (a->flags & SVM_FLAGS_FILE)
359 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700360
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400361 fd = open (a->backing_file, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700362
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400363 if (fd < 0)
364 {
365 clib_unix_warning ("open");
366 return -1;
367 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700368
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400369 if (fstat (fd, &statb) < 0)
370 {
371 clib_unix_warning ("fstat");
372 close (fd);
373 return -2;
374 }
375
376 if (statb.st_mode & S_IFREG)
377 {
378 if (statb.st_size == 0)
379 {
380 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
381 {
382 clib_unix_warning ("seek region size");
383 close (fd);
384 return -3;
385 }
386 if (write (fd, &junk, 1) != 1)
387 {
388 clib_unix_warning ("set region size");
389 close (fd);
390 return -3;
391 }
392 }
393 else
394 {
395 map_size = rnd_pagesize (statb.st_size);
396 }
397 }
398 else
399 {
400 map_size = a->backing_mmap_size;
401 }
402
403 ASSERT (map_size <= rp->virtual_size
Dave Barachc3799992016-08-15 11:12:27 -0400404 - (MMAP_PAGESIZE
405 +
406 (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)));
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400407
408 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
409 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
410 {
411 clib_unix_warning ("mmap");
412 close (fd);
413 return -3;
414 }
415 close (fd);
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200416 CLIB_MEM_UNPOISON (rp->data_base, map_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700417 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400418 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700419}
420
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400421u8 *
422shm_name_from_svm_map_region_args (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700423{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400424 u8 *shm_name;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400425 int root_path_offset = 0;
426 int name_offset = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700427
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400428 if (a->root_path)
429 {
430 /* Tolerate present or absent slashes */
431 if (a->root_path[0] == '/')
432 root_path_offset++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700433
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400434 if (a->name[0] == '/')
435 name_offset = 1;
436
Matej Perinad135c192017-07-18 13:59:41 +0200437 shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400438 &a->name[name_offset], 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700439 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400440 else
441 shm_name = format (0, "%s%c", a->name, 0);
442 return (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700443}
444
Dave Barach59b25652017-09-10 15:04:27 -0400445void
446svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp)
447{
448 pthread_mutexattr_t attr;
449 pthread_condattr_t cattr;
450 int nbits, words, bit;
451 int overhead_space;
452 void *oldheap;
453 uword data_base;
454 ASSERT (rp);
455 int rv;
456
Dave Barachb7b92992018-10-17 10:38:51 -0400457 clib_memset (rp, 0, sizeof (*rp));
Dave Barach59b25652017-09-10 15:04:27 -0400458
459 if (pthread_mutexattr_init (&attr))
460 clib_unix_warning ("mutexattr_init");
461
462 if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
463 clib_unix_warning ("mutexattr_setpshared");
464
465 if (pthread_mutex_init (&rp->mutex, &attr))
466 clib_unix_warning ("mutex_init");
467
468 if (pthread_mutexattr_destroy (&attr))
469 clib_unix_warning ("mutexattr_destroy");
470
471 if (pthread_condattr_init (&cattr))
472 clib_unix_warning ("condattr_init");
473
474 if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
475 clib_unix_warning ("condattr_setpshared");
476
477 if (pthread_cond_init (&rp->condvar, &cattr))
478 clib_unix_warning ("cond_init");
479
480 if (pthread_condattr_destroy (&cattr))
481 clib_unix_warning ("condattr_destroy");
482
483 region_lock (rp, 1);
484
485 rp->virtual_base = a->baseva;
486 rp->virtual_size = a->size;
487
Damjan Marion4537c302020-09-28 19:03:37 +0200488 rp->region_heap = clib_mem_create_heap
Dave Barach6a5adc32018-07-04 10:56:23 -0400489 (uword_to_pointer (a->baseva + MMAP_PAGESIZE, void *),
490 (a->pvt_heap_size !=
Damjan Marion4537c302020-09-28 19:03:37 +0200491 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, 1 /* locked */ ,
492 "svm region");
Dave Barach6a5adc32018-07-04 10:56:23 -0400493
Dave Barach59b25652017-09-10 15:04:27 -0400494 oldheap = svm_push_pvt_heap (rp);
495
496 rp->region_name = (char *) format (0, "%s%c", a->name, 0);
497 vec_add1 (rp->client_pids, getpid ());
498
499 nbits = rp->virtual_size / MMAP_PAGESIZE;
500
501 ASSERT (nbits > 0);
502 rp->bitmap_size = nbits;
503 words = (nbits + BITS (uword) - 1) / BITS (uword);
504 vec_validate (rp->bitmap, words - 1);
505
506 overhead_space = MMAP_PAGESIZE /* header */ +
507 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
508
509 bit = 0;
510 data_base = (uword) rp->virtual_base;
511
512 if (a->flags & SVM_FLAGS_NODATA)
513 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
514
515 do
516 {
517 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
518 bit++;
519 overhead_space -= MMAP_PAGESIZE;
520 data_base += MMAP_PAGESIZE;
521 }
522 while (overhead_space > 0);
523
524 rp->data_base = (void *) data_base;
525
526 /*
527 * Note: although the POSIX spec guarantees that only one
528 * process enters this block, we have to play games
529 * to hold off clients until e.g. the mutex is ready
530 */
531 rp->version = SVM_VERSION;
532
533 /* setup the data portion of the region */
534
535 rv = svm_data_region_create (a, rp);
536 if (rv)
537 {
538 clib_warning ("data_region_create: %d", rv);
539 }
540
541 region_unlock (rp);
542
543 svm_pop_heap (oldheap);
544}
545
Ed Warnickecb9cada2015-12-08 15:45:58 -0700546/*
547 * svm_map_region
548 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400549void *
550svm_map_region (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700551{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400552 int svm_fd;
553 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400554 int deadman = 0;
555 u8 junk = 0;
556 void *oldheap;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400557 int rv;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400558 int pid_holding_region_lock;
559 u8 *shm_name;
560 int dead_region_recovery = 0;
561 int time_left;
562 struct stat stat;
563 struct timespec ts, tsrem;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700564
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400565 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
566 ASSERT (a->name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700567
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400568 shm_name = shm_name_from_svm_map_region_args (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700569
Florin Coras9f4ac582019-12-17 19:46:45 -0800570 if (CLIB_DEBUG > 1)
Dave Wallaced756b352017-07-03 13:11:38 -0400571 clib_warning ("[%d] map region %s: shm_open (%s)",
572 getpid (), a->name, shm_name);
573
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400574 svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700575
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400576 if (svm_fd >= 0)
577 {
Dave Wallace19296112017-08-31 15:54:11 -0400578 if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400579 clib_unix_warning ("segment chmod");
580 /* This turns out to fail harmlessly if the client starts first */
581 if (fchown (svm_fd, a->uid, a->gid) < 0)
582 clib_unix_warning ("segment chown [ok if client starts first]");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700583
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400584 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700585
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400586 if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
587 {
588 clib_warning ("seek region size");
589 close (svm_fd);
590 return (0);
591 }
592 if (write (svm_fd, &junk, 1) != 1)
593 {
594 clib_warning ("set region size");
595 close (svm_fd);
596 return (0);
597 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700598
Damjan Marion7bee80c2017-04-26 15:32:12 +0200599 rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400600 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700601
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400602 if (rp == (svm_region_t *) MAP_FAILED)
603 {
604 clib_unix_warning ("mmap create");
605 close (svm_fd);
606 return (0);
607 }
608 close (svm_fd);
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200609 CLIB_MEM_UNPOISON (rp, a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700610
Dave Barach59b25652017-09-10 15:04:27 -0400611 svm_region_init_mapped_region (a, rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700612
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400613 return ((void *) rp);
614 }
615 else
616 {
617 svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700618
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400619 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700620
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400621 if (svm_fd < 0)
622 {
623 perror ("svm_region_map(mmap open)");
624 return (0);
625 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700626
Ole Troanc4f2ef72018-05-30 22:43:25 +0200627 /* Reset ownership in case the client started first */
628 if (fchown (svm_fd, a->uid, a->gid) < 0)
629 clib_unix_warning ("segment chown [ok if client starts first]");
630
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400631 time_left = 20;
632 while (1)
633 {
634 if (0 != fstat (svm_fd, &stat))
635 {
636 clib_warning ("fstat failed: %d", errno);
637 close (svm_fd);
638 return (0);
639 }
640 if (stat.st_size > 0)
641 {
642 break;
643 }
644 if (0 == time_left)
645 {
646 clib_warning ("waiting for resize of shm file timed out");
647 close (svm_fd);
648 return (0);
649 }
650 ts.tv_sec = 0;
651 ts.tv_nsec = 100000000;
652 while (nanosleep (&ts, &tsrem) < 0)
653 ts = tsrem;
654 time_left--;
655 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700656
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400657 rp = mmap (0, MMAP_PAGESIZE,
658 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700659
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400660 if (rp == (svm_region_t *) MAP_FAILED)
661 {
662 close (svm_fd);
663 clib_warning ("mmap");
664 return (0);
665 }
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200666
667 CLIB_MEM_UNPOISON (rp, MMAP_PAGESIZE);
668
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400669 /*
670 * We lost the footrace to create this region; make sure
671 * the winner has crossed the finish line.
672 */
673 while (rp->version == 0 && deadman++ < 5)
674 {
675 sleep (1);
676 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700677
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400678 /*
679 * <bleep>-ed?
680 */
681 if (rp->version == 0)
682 {
683 clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
684 close (svm_fd);
685 munmap (rp, a->size);
686 return (0);
687 }
688 /* Remap now that the region has been placed */
689 a->baseva = rp->virtual_base;
690 a->size = rp->virtual_size;
691 munmap (rp, MMAP_PAGESIZE);
692
Damjan Marion7bee80c2017-04-26 15:32:12 +0200693 rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400694 PROT_READ | PROT_WRITE,
695 MAP_SHARED | MAP_FIXED, svm_fd, 0);
696 if ((uword) rp == (uword) MAP_FAILED)
697 {
698 clib_unix_warning ("mmap");
699 close (svm_fd);
700 return (0);
701 }
702
Dave Barachada24ea2018-05-24 17:32:00 -0400703 close (svm_fd);
704
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +0200705 CLIB_MEM_UNPOISON (rp, a->size);
706
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400707 if ((uword) rp != rp->virtual_base)
708 {
709 clib_warning ("mmap botch");
710 }
711
712 /*
713 * Try to fix the region mutex if it is held by
714 * a dead process
715 */
716 pid_holding_region_lock = rp->mutex_owner_pid;
717 if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
718 {
719 clib_warning
720 ("region %s mutex held by dead pid %d, tag %d, force unlock",
721 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
722 /* owner pid is nonexistent */
723 rp->mutex.__data.__owner = 0;
724 rp->mutex.__data.__lock = 0;
725 dead_region_recovery = 1;
726 }
727
728 if (dead_region_recovery)
729 clib_warning ("recovery: attempt to re-lock region");
730
731 region_lock (rp, 2);
732 oldheap = svm_push_pvt_heap (rp);
733 vec_add1 (rp->client_pids, getpid ());
734
735 if (dead_region_recovery)
736 clib_warning ("recovery: attempt svm_data_region_map");
737
738 rv = svm_data_region_map (a, rp);
739 if (rv)
740 {
741 clib_warning ("data_region_map: %d", rv);
742 }
743
744 if (dead_region_recovery)
745 clib_warning ("unlock and continue");
746
747 region_unlock (rp);
748
749 svm_pop_heap (oldheap);
750
751 return ((void *) rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700752
753 }
Dave Barachc35f3e82020-04-02 10:44:09 -0400754 return 0; /* NOTREACHED *///NOSONAR
Ed Warnickecb9cada2015-12-08 15:45:58 -0700755}
756
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400757static void
758svm_mutex_cleanup (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700759{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400760 int i;
761 for (i = 0; i < nheld; i++)
762 {
Dave Barachc35f3e82020-04-02 10:44:09 -0400763 pthread_mutex_unlock (mutexes_held[i]); //NOSONAR
Ed Warnickecb9cada2015-12-08 15:45:58 -0700764 }
765}
766
Ole Troan3cdc25f2017-08-17 11:07:33 +0200767static int
Dave Barachb3d93da2016-08-03 14:34:38 -0400768svm_region_init_internal (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700769{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400770 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400771 u64 ticks = clib_cpu_time_now ();
772 uword randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700773
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400774 /* guard against klutz calls */
775 if (root_rp)
Ole Troan3cdc25f2017-08-17 11:07:33 +0200776 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700777
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400778 root_rp_refcount++;
Dave Barach16c75df2016-05-31 14:05:46 -0400779
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400780 atexit (svm_mutex_cleanup);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700781
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400782 /* Randomize the shared-VM base at init time */
783 if (MMAP_PAGESIZE <= (4 << 10))
784 randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
785 else
786 randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700787
Dave Barachb3d93da2016-08-03 14:34:38 -0400788 a->baseva += randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700789
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400790 rp = svm_map_region (a);
Ole Troan3cdc25f2017-08-17 11:07:33 +0200791 if (!rp)
792 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700793
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400794 region_lock (rp, 3);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700795
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400796 /* Set up the main region data structures */
797 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT)
798 {
799 svm_main_region_t *mp = 0;
800 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700801
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400802 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700803
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400804 oldheap = svm_push_pvt_heap (rp);
805 vec_validate (mp, 0);
806 mp->name_hash = hash_create_string (0, sizeof (uword));
Dave Barachb3d93da2016-08-03 14:34:38 -0400807 mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
Dave Wallace19296112017-08-31 15:54:11 -0400808 mp->uid = a->uid;
809 mp->gid = a->gid;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400810 rp->data_base = mp;
811 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700812 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400813 region_unlock (rp);
814 root_rp = rp;
Ole Troan3cdc25f2017-08-17 11:07:33 +0200815
816 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700817}
818
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400819void
820svm_region_init (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700821{
Dave Barachb3d93da2016-08-03 14:34:38 -0400822 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400823
Dave Barachb7b92992018-10-17 10:38:51 -0400824 clib_memset (a, 0, sizeof (*a));
Dave Barachb3d93da2016-08-03 14:34:38 -0400825 a->root_path = 0;
826 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100827 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400828 a->size = SVM_GLOBAL_REGION_SIZE;
829 a->flags = SVM_FLAGS_NODATA;
830 a->uid = 0;
831 a->gid = 0;
832
833 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700834}
835
Ole Troan3cdc25f2017-08-17 11:07:33 +0200836int
Neale Rannse72be392017-04-26 13:59:20 -0700837svm_region_init_chroot (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700838{
Dave Barachb3d93da2016-08-03 14:34:38 -0400839 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400840
Dave Barachb7b92992018-10-17 10:38:51 -0400841 clib_memset (a, 0, sizeof (*a));
Dave Barachb3d93da2016-08-03 14:34:38 -0400842 a->root_path = root_path;
843 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100844 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400845 a->size = SVM_GLOBAL_REGION_SIZE;
846 a->flags = SVM_FLAGS_NODATA;
847 a->uid = 0;
848 a->gid = 0;
849
Ole Troan3cdc25f2017-08-17 11:07:33 +0200850 return svm_region_init_internal (a);
Dave Barach16c75df2016-05-31 14:05:46 -0400851}
852
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400853void
Neale Rannse72be392017-04-26 13:59:20 -0700854svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
Dave Barach16c75df2016-05-31 14:05:46 -0400855{
Dave Barachb3d93da2016-08-03 14:34:38 -0400856 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400857
Dave Barachb7b92992018-10-17 10:38:51 -0400858 clib_memset (a, 0, sizeof (*a));
Dave Barachb3d93da2016-08-03 14:34:38 -0400859 a->root_path = root_path;
860 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100861 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400862 a->size = SVM_GLOBAL_REGION_SIZE;
863 a->flags = SVM_FLAGS_NODATA;
864 a->uid = uid;
865 a->gid = gid;
866
867 svm_region_init_internal (a);
868}
869
870void
871svm_region_init_args (svm_map_region_args_t * a)
872{
873 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700874}
875
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400876void *
877svm_region_find_or_create (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700878{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400879 svm_main_region_t *mp;
880 svm_region_t *rp;
881 uword need_nbits;
882 int index, i;
883 void *oldheap;
884 uword *p;
885 u8 *name;
886 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700887
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400888 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700889
Dave Barachc3799992016-08-15 11:12:27 -0400890 a->size += MMAP_PAGESIZE +
Dave Barachb3d93da2016-08-03 14:34:38 -0400891 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400892 a->size = rnd_pagesize (a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700893
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400894 region_lock (root_rp, 4);
895 oldheap = svm_push_pvt_heap (root_rp);
896 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700897
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400898 ASSERT (mp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700899
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400900 /* Map the named region from the correct chroot environment */
Jan Srnicek5beec812017-03-24 10:18:11 +0100901 if (a->root_path == NULL)
902 a->root_path = (char *) mp->root_path;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400903
904 /*
905 * See if this region is already known. If it is, we're
906 * almost done...
907 */
908 p = hash_get_mem (mp->name_hash, a->name);
909
910 if (p)
911 {
912 rp = svm_map_region (a);
913 region_unlock (root_rp);
914 svm_pop_heap (oldheap);
915 return rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700916 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700917
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400918 /* Create the region. */
919 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700920
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400921 need_nbits = a->size / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700922
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400923 index = 1; /* $$$ fixme, figure out how many bit to really skip */
924
925 /*
926 * Scan the virtual space allocation bitmap, looking for a large
927 * enough chunk
928 */
929 do
930 {
931 if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
932 {
933 for (i = 0; i < (need_nbits - 1); i++)
934 {
935 if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
936 {
937 index = index + i;
938 goto next;
939 }
940 }
941 break;
942 }
943 index++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700944 next:;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700945 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400946 while (index < root_rp->bitmap_size);
947
948 /* Completely out of VM? */
949 if (index >= root_rp->bitmap_size)
950 {
Dave Barachb3d93da2016-08-03 14:34:38 -0400951 clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
952 root_rp->region_name, a->size, a->size);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400953 svm_pop_heap (oldheap);
954 region_unlock (root_rp);
955 return 0;
956 }
957
958 /*
959 * Mark virtual space allocated
960 */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700961#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400962 clib_warning ("set %d bits at index %d", need_nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700963#endif
964
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400965 for (i = 0; i < need_nbits; i++)
966 {
967 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700968 }
969
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400970 /* Place this region where it goes... */
971 a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700972
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400973 rp = svm_map_region (a);
Dave Barachc3799992016-08-15 11:12:27 -0400974
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400975 pool_get (mp->subregions, subp);
976 name = format (0, "%s%c", a->name, 0);
977 subp->subregion_name = name;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700978
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400979 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700980
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400981 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700982
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400983 region_unlock (root_rp);
984
985 return (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700986}
987
Dave Wallaced756b352017-07-03 13:11:38 -0400988void
989svm_region_unlink (svm_region_t * rp)
990{
991 svm_map_region_args_t _a, *a = &_a;
992 svm_main_region_t *mp;
993 u8 *shm_name;
994
995 ASSERT (root_rp);
996 ASSERT (rp);
997 ASSERT (vec_c_string_is_terminated (rp->region_name));
998
999 mp = root_rp->data_base;
1000 ASSERT (mp);
1001
1002 a->root_path = (char *) mp->root_path;
1003 a->name = rp->region_name;
1004 shm_name = shm_name_from_svm_map_region_args (a);
1005 if (CLIB_DEBUG > 1)
1006 clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
1007 shm_unlink ((const char *) shm_name);
1008 vec_free (shm_name);
1009}
1010
Ed Warnickecb9cada2015-12-08 15:45:58 -07001011/*
1012 * svm_region_unmap
1013 *
1014 * Let go of the indicated region. If the calling process
1015 * is the last customer, throw it away completely.
1016 * The root region mutex guarantees atomicity with respect to
1017 * a new region client showing up at the wrong moment.
1018 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001019void
Florin Corasd6c30d92018-01-29 05:11:24 -08001020svm_region_unmap_internal (void *rp_arg, u8 is_client)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001021{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001022 int i, mypid = getpid ();
1023 int nclients_left;
1024 void *oldheap;
1025 uword virtual_base, virtual_size;
1026 svm_region_t *rp = rp_arg;
1027 char *name;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001028
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001029 /*
1030 * If we take a signal while holding one or more shared-memory
1031 * mutexes, we may end up back here from an otherwise
1032 * benign exit handler. Bail out to avoid a recursive
1033 * mutex screw-up.
1034 */
1035 if (nheld)
1036 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001037
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001038 ASSERT (rp);
1039 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001040
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001041 if (CLIB_DEBUG > 1)
1042 clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001043
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001044 region_lock (root_rp, 5);
1045 region_lock (rp, 6);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001046
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001047 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
1048
1049 /* Remove the caller from the list of mappers */
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +02001050 CLIB_MEM_UNPOISON (rp->client_pids, vec_bytes (rp->client_pids));
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001051 for (i = 0; i < vec_len (rp->client_pids); i++)
1052 {
1053 if (rp->client_pids[i] == mypid)
1054 {
1055 vec_delete (rp->client_pids, 1, i);
1056 goto found;
1057 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001058 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001059 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001060
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001061found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001062
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001063 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001064
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001065 nclients_left = vec_len (rp->client_pids);
1066 virtual_base = rp->virtual_base;
1067 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001068
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001069 if (nclients_left == 0)
1070 {
1071 int index, nbits, i;
1072 svm_main_region_t *mp;
1073 uword *p;
1074 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001075
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001076 /* Kill the region, last guy on his way out */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001077
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001078 oldheap = svm_push_pvt_heap (root_rp);
1079 name = vec_dup (rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001080
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001081 virtual_base = rp->virtual_base;
1082 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001083
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001084 /* Figure out which bits to clear in the root region bitmap */
1085 index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001086
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001087 nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001088
1089#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001090 clib_warning ("clear %d bits at index %d", nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001091#endif
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001092 /* Give back the allocated VM */
1093 for (i = 0; i < nbits; i++)
1094 {
1095 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
1096 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001097
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001098 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001099
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001100 p = hash_get_mem (mp->name_hash, name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001101
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001102 /* Better never happen ... */
1103 if (p == NULL)
1104 {
1105 region_unlock (rp);
1106 region_unlock (root_rp);
1107 svm_pop_heap (oldheap);
1108 clib_warning ("Region name '%s' not found?", name);
1109 return;
1110 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001111
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001112 /* Remove from the root region subregion pool */
1113 subp = mp->subregions + p[0];
1114 pool_put (mp->subregions, subp);
1115
1116 hash_unset_mem (mp->name_hash, name);
1117
1118 vec_free (name);
1119
1120 region_unlock (rp);
Florin Corasd6c30d92018-01-29 05:11:24 -08001121
1122 /* If a client asks for the cleanup, don't unlink the backing
1123 * file since we can't tell if it has been recreated. */
1124 if (!is_client)
1125 svm_region_unlink (rp);
1126
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001127 munmap ((void *) virtual_base, virtual_size);
1128 region_unlock (root_rp);
1129 svm_pop_heap (oldheap);
1130 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001131 }
1132
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001133 region_unlock (rp);
1134 region_unlock (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001135
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001136 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001137}
1138
Florin Corasd6c30d92018-01-29 05:11:24 -08001139void
1140svm_region_unmap (void *rp_arg)
1141{
1142 svm_region_unmap_internal (rp_arg, 0 /* is_client */ );
1143}
1144
1145void
1146svm_region_unmap_client (void *rp_arg)
1147{
1148 svm_region_unmap_internal (rp_arg, 1 /* is_client */ );
1149}
1150
Ed Warnickecb9cada2015-12-08 15:45:58 -07001151/*
1152 * svm_region_exit
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001153 */
Florin Corasd6c30d92018-01-29 05:11:24 -08001154static void
1155svm_region_exit_internal (u8 is_client)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001156{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001157 void *oldheap;
1158 int i, mypid = getpid ();
1159 uword virtual_base, virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001160
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001161 /* It felt so nice we did it twice... */
1162 if (root_rp == 0)
1163 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001164
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001165 if (--root_rp_refcount > 0)
1166 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001167
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001168 /*
1169 * If we take a signal while holding one or more shared-memory
1170 * mutexes, we may end up back here from an otherwise
1171 * benign exit handler. Bail out to avoid a recursive
1172 * mutex screw-up.
1173 */
1174 if (nheld)
1175 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001176
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001177 region_lock (root_rp, 7);
1178 oldheap = svm_push_pvt_heap (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001179
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001180 virtual_base = root_rp->virtual_base;
1181 virtual_size = root_rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001182
BenoƮt Ganne77d42fc2020-04-20 09:52:39 +02001183 CLIB_MEM_UNPOISON (root_rp->client_pids, vec_bytes (root_rp->client_pids));
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001184 for (i = 0; i < vec_len (root_rp->client_pids); i++)
1185 {
1186 if (root_rp->client_pids[i] == mypid)
1187 {
1188 vec_delete (root_rp->client_pids, 1, i);
1189 goto found;
1190 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001191 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001192 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001193
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001194found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001195
Florin Corasd6c30d92018-01-29 05:11:24 -08001196 if (!is_client && vec_len (root_rp->client_pids) == 0)
Dave Wallaced756b352017-07-03 13:11:38 -04001197 svm_region_unlink (root_rp);
1198
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001199 region_unlock (root_rp);
1200 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001201
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001202 root_rp = 0;
1203 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001204}
1205
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001206void
Florin Corasd6c30d92018-01-29 05:11:24 -08001207svm_region_exit (void)
1208{
1209 svm_region_exit_internal (0 /* is_client */ );
1210}
1211
1212void
1213svm_region_exit_client (void)
1214{
1215 svm_region_exit_internal (1 /* is_client */ );
1216}
1217
1218void
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001219svm_client_scan_this_region_nolock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001220{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001221 int j;
1222 int mypid = getpid ();
1223 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001224
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001225 for (j = 0; j < vec_len (rp->client_pids); j++)
1226 {
1227 if (mypid == rp->client_pids[j])
1228 continue;
1229 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
1230 {
1231 clib_warning ("%s: cleanup ghost pid %d",
1232 rp->region_name, rp->client_pids[j]);
1233 /* nb: client vec in rp->region_heap */
1234 oldheap = svm_push_pvt_heap (rp);
1235 vec_delete (rp->client_pids, 1, j);
1236 j--;
1237 svm_pop_heap (oldheap);
1238 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001239 }
1240}
1241
1242
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001243/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07001244 * Scan svm regions for dead clients
1245 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001246void
Neale Rannse72be392017-04-26 13:59:20 -07001247svm_client_scan (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001248{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001249 int i, j;
1250 svm_main_region_t *mp;
1251 svm_map_region_args_t *a = 0;
1252 svm_region_t *root_rp;
1253 svm_region_t *rp;
1254 svm_subregion_t *subp;
1255 u8 *name = 0;
1256 u8 **svm_names = 0;
1257 void *oldheap;
1258 int mypid = getpid ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001259
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001260 vec_validate (a, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001261
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001262 svm_region_init_chroot (root_path);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001263
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001264 root_rp = svm_get_root_rp ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001265
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001266 pthread_mutex_lock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001267
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001268 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001269
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001270 for (j = 0; j < vec_len (root_rp->client_pids); j++)
1271 {
1272 if (mypid == root_rp->client_pids[j])
1273 continue;
1274 if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
1275 {
1276 clib_warning ("%s: cleanup ghost pid %d",
1277 root_rp->region_name, root_rp->client_pids[j]);
1278 /* nb: client vec in root_rp->region_heap */
1279 oldheap = svm_push_pvt_heap (root_rp);
1280 vec_delete (root_rp->client_pids, 1, j);
1281 j--;
1282 svm_pop_heap (oldheap);
1283 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001284 }
1285
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001286 /*
1287 * Snapshoot names, can't hold root rp mutex across
1288 * find_or_create.
1289 */
1290 /* *INDENT-OFF* */
Damjan Marionb2c31b62020-12-13 21:47:40 +01001291 pool_foreach (subp, mp->subregions) {
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001292 name = vec_dup (subp->subregion_name);
1293 vec_add1(svm_names, name);
Damjan Marionb2c31b62020-12-13 21:47:40 +01001294 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001295 /* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001296
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001297 pthread_mutex_unlock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001298
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001299 for (i = 0; i < vec_len (svm_names); i++)
1300 {
1301 vec_validate (a, 0);
1302 a->root_path = root_path;
1303 a->name = (char *) svm_names[i];
1304 rp = svm_region_find_or_create (a);
1305 if (rp)
1306 {
1307 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001308
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001309 svm_client_scan_this_region_nolock (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001310
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001311 pthread_mutex_unlock (&rp->mutex);
1312 svm_region_unmap (rp);
1313 vec_free (svm_names[i]);
1314 }
1315 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001316 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001317 vec_free (svm_names);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001318
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001319 svm_region_exit ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001320
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001321 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001322}
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001323
1324/*
1325 * fd.io coding-style-patch-verification: ON
1326 *
1327 * Local Variables:
1328 * eval: (c-set-style "gnu")
1329 * End:
1330 */