blob: aa84a2fe3b4b1a1377611150c879ff8ceac3c76e [file] [log] [blame]
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07002 *------------------------------------------------------------------
Dave Barach8a7fb0c2016-07-08 14:44:23 -04003 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
Ed Warnickecb9cada2015-12-08 15:45:58 -07004 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
39#include <vppinfra/mheap.h>
40#include <vppinfra/heap.h>
41#include <vppinfra/pool.h>
42#include <vppinfra/format.h>
43
44#include "svm.h"
45
46static svm_region_t *root_rp;
47static int root_rp_refcount;
48
49#define MAXLOCK 2
Dave Barach8a7fb0c2016-07-08 14:44:23 -040050static pthread_mutex_t *mutexes_held[MAXLOCK];
Ed Warnickecb9cada2015-12-08 15:45:58 -070051static int nheld;
52
Dave Barach8a7fb0c2016-07-08 14:44:23 -040053svm_region_t *
54svm_get_root_rp (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -070055{
Dave Barach8a7fb0c2016-07-08 14:44:23 -040056 return root_rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -070057}
58
59#define MUTEX_DEBUG
60
Damjan Marionaec8f892018-01-08 16:35:35 +010061u64
62svm_get_global_region_base_va ()
63{
64#if __aarch64__
65 /* On AArch64 VA space can have different size, from 36 to 48 bits.
66 Here we are trying to detect VA bits by parsing /proc/self/maps
67 address ranges */
68 int fd;
69 unformat_input_t input;
70 u64 start, end = 0;
71 u8 bits = 0;
72
73 if ((fd = open ("/proc/self/maps", 0)) < 0)
74 clib_unix_error ("open '/proc/self/maps'");
75
76 unformat_init_clib_file (&input, fd);
77 while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
78 {
Gabriel Gannec5239ad2018-01-11 15:04:19 +010079 if (unformat (&input, "%llx-%llx", &start, &end))
80 end--;
Damjan Marionaec8f892018-01-08 16:35:35 +010081 unformat_skip_line (&input);
82 }
Gabriel Ganne83d47432018-01-10 11:40:50 +010083 unformat_free (&input);
84 close (fd);
Damjan Marionaec8f892018-01-08 16:35:35 +010085
Damjan Marion11056002018-05-10 13:40:44 +020086 bits = count_leading_zeros (end);
Gabriel Gannec5239ad2018-01-11 15:04:19 +010087 bits = 64 - bits;
Damjan Marionaec8f892018-01-08 16:35:35 +010088 if (bits >= 36 && bits <= 48)
89 return ((1ul << bits) / 4) - (2 * SVM_GLOBAL_REGION_SIZE);
90 else
91 clib_unix_error ("unexpected va bits '%u'", bits);
Damjan Marionaec8f892018-01-08 16:35:35 +010092#endif
93
94 /* default value */
95 return 0x30000000;
96}
97
Dave Barach8a7fb0c2016-07-08 14:44:23 -040098static void
99region_lock (svm_region_t * rp, int tag)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700100{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400101 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700102#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400103 rp->mutex_owner_pid = getpid ();
104 rp->mutex_owner_tag = tag;
105#endif
106 ASSERT (nheld < MAXLOCK);
107 /*
108 * Keep score of held mutexes so we can try to exit
109 * cleanly if the world comes to an end at the worst possible
110 * moment
111 */
112 mutexes_held[nheld++] = &rp->mutex;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700113}
114
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400115static void
116region_unlock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700117{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400118 int i, j;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700119#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400120 rp->mutex_owner_pid = 0;
121 rp->mutex_owner_tag = 0;
122#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700123
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400124 for (i = nheld - 1; i >= 0; i--)
125 {
126 if (mutexes_held[i] == &rp->mutex)
127 {
128 for (j = i; j < MAXLOCK - 1; j++)
129 mutexes_held[j] = mutexes_held[j + 1];
130 nheld--;
131 goto found;
132 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700133 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400134 ASSERT (0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700135
136found:
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400137 CLIB_MEMORY_BARRIER ();
138 pthread_mutex_unlock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700139}
140
141
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400142static u8 *
143format_svm_flags (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700144{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400145 uword f = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700146
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400147 if (f & SVM_FLAGS_MHEAP)
148 s = format (s, "MHEAP ");
149 if (f & SVM_FLAGS_FILE)
150 s = format (s, "FILE ");
151 if (f & SVM_FLAGS_NODATA)
152 s = format (s, "NODATA ");
153 if (f & SVM_FLAGS_NEED_DATA_INIT)
154 s = format (s, "INIT ");
155
156 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700157}
158
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400159static u8 *
160format_svm_size (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700161{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400162 uword size = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700163
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400164 if (size >= (1 << 20))
165 {
166 s = format (s, "(%d mb)", size >> 20);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700167 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400168 else if (size >= (1 << 10))
169 {
170 s = format (s, "(%d kb)", size >> 10);
171 }
172 else
173 {
174 s = format (s, "(%d bytes)", size);
175 }
176 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177}
178
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400179u8 *
180format_svm_region (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700181{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400182 svm_region_t *rp = va_arg (*args, svm_region_t *);
183 int verbose = va_arg (*args, int);
184 int i;
185 uword lo, hi;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700186
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400187 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
188 rp->region_name, rp->virtual_base,
189 rp->virtual_size, format_svm_size, rp->virtual_size);
190 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
191 rp->user_ctx, rp->bitmap_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700192
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400193 if (verbose)
194 {
195 s = format (s, " flags: 0x%x %U\n", rp->flags,
196 format_svm_flags, rp->flags);
197 s = format (s,
198 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
199 rp->region_heap, rp->data_base, rp->data_heap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700200 }
201
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400202 s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700203
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400204 for (i = 0; i < vec_len (rp->client_pids); i++)
205 s = format (s, "%d ", rp->client_pids[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700206
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400207 s = format (s, "\n");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700208
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400209 if (verbose)
210 {
211 lo = hi = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700212
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400213 s = format (s, " VM in use: ");
214
215 for (i = 0; i < rp->bitmap_size; i++)
216 {
217 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
218 {
219 if (lo == ~0)
220 {
221 hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
222 }
223 else
224 {
225 hi = rp->virtual_base + i * MMAP_PAGESIZE;
226 }
227 }
228 else
229 {
230 if (lo != ~0)
231 {
232 hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
233 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
234 (hi - lo) >> 10);
235 lo = hi = ~0;
236 }
237 }
238 }
Dave Barach6a5adc32018-07-04 10:56:23 -0400239#if USE_DLMALLOC == 0
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400240 s = format (s, " rgn heap stats: %U", format_mheap,
241 rp->region_heap, 0);
242 if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap)
243 {
244 s = format (s, "\n data heap stats: %U", format_mheap,
245 rp->data_heap, 1);
246 }
247 s = format (s, "\n");
Dave Barach6a5adc32018-07-04 10:56:23 -0400248#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700249 }
250
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400251 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700252}
253
254/*
255 * rnd_pagesize
256 * Round to a pagesize multiple, presumably 4k works
257 */
Dave Barachb3d93da2016-08-03 14:34:38 -0400258static u64
259rnd_pagesize (u64 size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700260{
Dave Barachb3d93da2016-08-03 14:34:38 -0400261 u64 rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700262
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400263 rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
264 return (rv);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700265}
266
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400267/*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700268 * svm_data_region_setup
269 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400270static int
271svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700272{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400273 int fd;
274 u8 junk = 0;
275 uword map_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700276
Dave Barachc3799992016-08-15 11:12:27 -0400277 map_size = rp->virtual_size - (MMAP_PAGESIZE +
278 (a->pvt_heap_size ? a->pvt_heap_size :
279 SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700280
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400281 if (a->flags & SVM_FLAGS_FILE)
282 {
283 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700284
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400285 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700286
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400287 if (fd < 0)
288 {
289 clib_unix_warning ("open");
290 return -1;
291 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700292
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400293 if (fstat (fd, &statb) < 0)
294 {
295 clib_unix_warning ("fstat");
296 close (fd);
297 return -2;
298 }
299
300 if (statb.st_mode & S_IFREG)
301 {
302 if (statb.st_size == 0)
303 {
304 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
305 {
306 clib_unix_warning ("seek region size");
307 close (fd);
308 return -3;
309 }
310 if (write (fd, &junk, 1) != 1)
311 {
312 clib_unix_warning ("set region size");
313 close (fd);
314 return -3;
315 }
316 }
317 else
318 {
319 map_size = rnd_pagesize (statb.st_size);
320 }
321 }
322 else
323 {
324 map_size = a->backing_mmap_size;
325 }
326
327 ASSERT (map_size <= rp->virtual_size -
328 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
329
330 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
331 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
332 {
333 clib_unix_warning ("mmap");
334 close (fd);
335 return -3;
336 }
337 close (fd);
338 rp->backing_file = (char *) format (0, "%s\0", a->backing_file);
339 rp->flags |= SVM_FLAGS_FILE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700340 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400341
342 if (a->flags & SVM_FLAGS_MHEAP)
343 {
Dave Barach6a5adc32018-07-04 10:56:23 -0400344#if USE_DLMALLOC == 0
Ole Troan73710c72018-06-04 22:27:49 +0200345 mheap_t *heap_header;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400346 rp->data_heap =
347 mheap_alloc_with_flags ((void *) (rp->data_base), map_size,
348 MHEAP_FLAG_DISABLE_VM);
Ole Troan73710c72018-06-04 22:27:49 +0200349 heap_header = mheap_header (rp->data_heap);
350 heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
Dave Barach6a5adc32018-07-04 10:56:23 -0400351#else
352 rp->data_heap = create_mspace_with_base (rp->data_base,
353 map_size, 1 /* locked */ );
354 mspace_disable_expand (rp->data_heap);
355#endif
Ole Troan73710c72018-06-04 22:27:49 +0200356
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400357 rp->flags |= SVM_FLAGS_MHEAP;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700358 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400359 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700360}
361
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400362static int
363svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700364{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400365 int fd;
366 u8 junk = 0;
367 uword map_size;
368 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700369
Dave Barachc3799992016-08-15 11:12:27 -0400370 map_size = rp->virtual_size -
371 (MMAP_PAGESIZE
Dave Barachb3d93da2016-08-03 14:34:38 -0400372 + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700373
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400374 if (a->flags & SVM_FLAGS_FILE)
375 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700376
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400377 fd = open (a->backing_file, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700378
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400379 if (fd < 0)
380 {
381 clib_unix_warning ("open");
382 return -1;
383 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700384
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400385 if (fstat (fd, &statb) < 0)
386 {
387 clib_unix_warning ("fstat");
388 close (fd);
389 return -2;
390 }
391
392 if (statb.st_mode & S_IFREG)
393 {
394 if (statb.st_size == 0)
395 {
396 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
397 {
398 clib_unix_warning ("seek region size");
399 close (fd);
400 return -3;
401 }
402 if (write (fd, &junk, 1) != 1)
403 {
404 clib_unix_warning ("set region size");
405 close (fd);
406 return -3;
407 }
408 }
409 else
410 {
411 map_size = rnd_pagesize (statb.st_size);
412 }
413 }
414 else
415 {
416 map_size = a->backing_mmap_size;
417 }
418
419 ASSERT (map_size <= rp->virtual_size
Dave Barachc3799992016-08-15 11:12:27 -0400420 - (MMAP_PAGESIZE
421 +
422 (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)));
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400423
424 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
425 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
426 {
427 clib_unix_warning ("mmap");
428 close (fd);
429 return -3;
430 }
431 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700432 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400433 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700434}
435
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400436u8 *
437shm_name_from_svm_map_region_args (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700438{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400439 u8 *path;
440 u8 *shm_name;
441 u8 *split_point;
442 u8 *mkdir_arg = 0;
443 int root_path_offset = 0;
444 int name_offset = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700445
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400446 if (a->root_path)
447 {
448 /* Tolerate present or absent slashes */
449 if (a->root_path[0] == '/')
450 root_path_offset++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700451
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400452 /* create the root_path under /dev/shm
453 iterate through path creating directories */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700454
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400455 path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
456 split_point = path + 1;
457 vec_add1 (mkdir_arg, '-');
458
459 while (*split_point)
460 {
461 while (*split_point && *split_point != '/')
462 {
463 vec_add1 (mkdir_arg, *split_point);
464 split_point++;
465 }
466 vec_add1 (mkdir_arg, 0);
467
468 /* ready to descend another level */
469 mkdir_arg[vec_len (mkdir_arg) - 1] = '-';
470 split_point++;
471 }
472 vec_free (mkdir_arg);
473 vec_free (path);
474
475 if (a->name[0] == '/')
476 name_offset = 1;
477
Matej Perinad135c192017-07-18 13:59:41 +0200478 shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400479 &a->name[name_offset], 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700480 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400481 else
482 shm_name = format (0, "%s%c", a->name, 0);
483 return (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700484}
485
Dave Barach59b25652017-09-10 15:04:27 -0400486void
487svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp)
488{
489 pthread_mutexattr_t attr;
490 pthread_condattr_t cattr;
491 int nbits, words, bit;
492 int overhead_space;
493 void *oldheap;
494 uword data_base;
495 ASSERT (rp);
496 int rv;
497
498 memset (rp, 0, sizeof (*rp));
499
500 if (pthread_mutexattr_init (&attr))
501 clib_unix_warning ("mutexattr_init");
502
503 if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
504 clib_unix_warning ("mutexattr_setpshared");
505
506 if (pthread_mutex_init (&rp->mutex, &attr))
507 clib_unix_warning ("mutex_init");
508
509 if (pthread_mutexattr_destroy (&attr))
510 clib_unix_warning ("mutexattr_destroy");
511
512 if (pthread_condattr_init (&cattr))
513 clib_unix_warning ("condattr_init");
514
515 if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
516 clib_unix_warning ("condattr_setpshared");
517
518 if (pthread_cond_init (&rp->condvar, &cattr))
519 clib_unix_warning ("cond_init");
520
521 if (pthread_condattr_destroy (&cattr))
522 clib_unix_warning ("condattr_destroy");
523
524 region_lock (rp, 1);
525
526 rp->virtual_base = a->baseva;
527 rp->virtual_size = a->size;
528
Dave Barach6a5adc32018-07-04 10:56:23 -0400529#if USE_DLMALLOC == 0
Dave Barach59b25652017-09-10 15:04:27 -0400530 rp->region_heap =
531 mheap_alloc_with_flags (uword_to_pointer
532 (a->baseva + MMAP_PAGESIZE, void *),
533 (a->pvt_heap_size !=
534 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE,
535 MHEAP_FLAG_DISABLE_VM);
Dave Barach6a5adc32018-07-04 10:56:23 -0400536#else
537 rp->region_heap = create_mspace_with_base
538 (uword_to_pointer (a->baseva + MMAP_PAGESIZE, void *),
539 (a->pvt_heap_size !=
540 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, 1 /* locked */ );
541
542 mspace_disable_expand (rp->region_heap);
543#endif
544
Dave Barach59b25652017-09-10 15:04:27 -0400545 oldheap = svm_push_pvt_heap (rp);
546
547 rp->region_name = (char *) format (0, "%s%c", a->name, 0);
548 vec_add1 (rp->client_pids, getpid ());
549
550 nbits = rp->virtual_size / MMAP_PAGESIZE;
551
552 ASSERT (nbits > 0);
553 rp->bitmap_size = nbits;
554 words = (nbits + BITS (uword) - 1) / BITS (uword);
555 vec_validate (rp->bitmap, words - 1);
556
557 overhead_space = MMAP_PAGESIZE /* header */ +
558 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
559
560 bit = 0;
561 data_base = (uword) rp->virtual_base;
562
563 if (a->flags & SVM_FLAGS_NODATA)
564 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
565
566 do
567 {
568 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
569 bit++;
570 overhead_space -= MMAP_PAGESIZE;
571 data_base += MMAP_PAGESIZE;
572 }
573 while (overhead_space > 0);
574
575 rp->data_base = (void *) data_base;
576
577 /*
578 * Note: although the POSIX spec guarantees that only one
579 * process enters this block, we have to play games
580 * to hold off clients until e.g. the mutex is ready
581 */
582 rp->version = SVM_VERSION;
583
584 /* setup the data portion of the region */
585
586 rv = svm_data_region_create (a, rp);
587 if (rv)
588 {
589 clib_warning ("data_region_create: %d", rv);
590 }
591
592 region_unlock (rp);
593
594 svm_pop_heap (oldheap);
595}
596
Ed Warnickecb9cada2015-12-08 15:45:58 -0700597/*
598 * svm_map_region
599 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400600void *
601svm_map_region (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700602{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400603 int svm_fd;
604 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400605 int deadman = 0;
606 u8 junk = 0;
607 void *oldheap;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400608 int rv;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400609 int pid_holding_region_lock;
610 u8 *shm_name;
611 int dead_region_recovery = 0;
612 int time_left;
613 struct stat stat;
614 struct timespec ts, tsrem;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700615
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400616 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
617 ASSERT (a->name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700618
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400619 shm_name = shm_name_from_svm_map_region_args (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700620
Dave Wallaced756b352017-07-03 13:11:38 -0400621 if (CLIB_DEBUG > 1)
622 clib_warning ("[%d] map region %s: shm_open (%s)",
623 getpid (), a->name, shm_name);
624
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400625 svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700626
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400627 if (svm_fd >= 0)
628 {
Dave Wallace19296112017-08-31 15:54:11 -0400629 if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400630 clib_unix_warning ("segment chmod");
631 /* This turns out to fail harmlessly if the client starts first */
632 if (fchown (svm_fd, a->uid, a->gid) < 0)
633 clib_unix_warning ("segment chown [ok if client starts first]");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700634
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400635 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400637 if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
638 {
639 clib_warning ("seek region size");
640 close (svm_fd);
641 return (0);
642 }
643 if (write (svm_fd, &junk, 1) != 1)
644 {
645 clib_warning ("set region size");
646 close (svm_fd);
647 return (0);
648 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700649
Damjan Marion7bee80c2017-04-26 15:32:12 +0200650 rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400651 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700652
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400653 if (rp == (svm_region_t *) MAP_FAILED)
654 {
655 clib_unix_warning ("mmap create");
656 close (svm_fd);
657 return (0);
658 }
659 close (svm_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700660
Dave Barach59b25652017-09-10 15:04:27 -0400661 svm_region_init_mapped_region (a, rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700662
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400663 return ((void *) rp);
664 }
665 else
666 {
667 svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700668
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400669 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700670
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400671 if (svm_fd < 0)
672 {
673 perror ("svm_region_map(mmap open)");
674 return (0);
675 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700676
Ole Troanc4f2ef72018-05-30 22:43:25 +0200677 /* Reset ownership in case the client started first */
678 if (fchown (svm_fd, a->uid, a->gid) < 0)
679 clib_unix_warning ("segment chown [ok if client starts first]");
680
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400681 time_left = 20;
682 while (1)
683 {
684 if (0 != fstat (svm_fd, &stat))
685 {
686 clib_warning ("fstat failed: %d", errno);
687 close (svm_fd);
688 return (0);
689 }
690 if (stat.st_size > 0)
691 {
692 break;
693 }
694 if (0 == time_left)
695 {
696 clib_warning ("waiting for resize of shm file timed out");
697 close (svm_fd);
698 return (0);
699 }
700 ts.tv_sec = 0;
701 ts.tv_nsec = 100000000;
702 while (nanosleep (&ts, &tsrem) < 0)
703 ts = tsrem;
704 time_left--;
705 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700706
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400707 rp = mmap (0, MMAP_PAGESIZE,
708 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700709
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400710 if (rp == (svm_region_t *) MAP_FAILED)
711 {
712 close (svm_fd);
713 clib_warning ("mmap");
714 return (0);
715 }
716 /*
717 * We lost the footrace to create this region; make sure
718 * the winner has crossed the finish line.
719 */
720 while (rp->version == 0 && deadman++ < 5)
721 {
722 sleep (1);
723 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700724
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400725 /*
726 * <bleep>-ed?
727 */
728 if (rp->version == 0)
729 {
730 clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
731 close (svm_fd);
732 munmap (rp, a->size);
733 return (0);
734 }
735 /* Remap now that the region has been placed */
736 a->baseva = rp->virtual_base;
737 a->size = rp->virtual_size;
738 munmap (rp, MMAP_PAGESIZE);
739
Damjan Marion7bee80c2017-04-26 15:32:12 +0200740 rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400741 PROT_READ | PROT_WRITE,
742 MAP_SHARED | MAP_FIXED, svm_fd, 0);
743 if ((uword) rp == (uword) MAP_FAILED)
744 {
745 clib_unix_warning ("mmap");
746 close (svm_fd);
747 return (0);
748 }
749
Dave Barachada24ea2018-05-24 17:32:00 -0400750 close (svm_fd);
751
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400752 if ((uword) rp != rp->virtual_base)
753 {
754 clib_warning ("mmap botch");
755 }
756
757 /*
758 * Try to fix the region mutex if it is held by
759 * a dead process
760 */
761 pid_holding_region_lock = rp->mutex_owner_pid;
762 if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
763 {
764 clib_warning
765 ("region %s mutex held by dead pid %d, tag %d, force unlock",
766 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
767 /* owner pid is nonexistent */
768 rp->mutex.__data.__owner = 0;
769 rp->mutex.__data.__lock = 0;
770 dead_region_recovery = 1;
771 }
772
773 if (dead_region_recovery)
774 clib_warning ("recovery: attempt to re-lock region");
775
776 region_lock (rp, 2);
777 oldheap = svm_push_pvt_heap (rp);
778 vec_add1 (rp->client_pids, getpid ());
779
780 if (dead_region_recovery)
781 clib_warning ("recovery: attempt svm_data_region_map");
782
783 rv = svm_data_region_map (a, rp);
784 if (rv)
785 {
786 clib_warning ("data_region_map: %d", rv);
787 }
788
789 if (dead_region_recovery)
790 clib_warning ("unlock and continue");
791
792 region_unlock (rp);
793
794 svm_pop_heap (oldheap);
795
796 return ((void *) rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700797
798 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400799 return 0; /* NOTREACHED */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700800}
801
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400802static void
803svm_mutex_cleanup (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700804{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400805 int i;
806 for (i = 0; i < nheld; i++)
807 {
808 pthread_mutex_unlock (mutexes_held[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700809 }
810}
811
Ole Troan3cdc25f2017-08-17 11:07:33 +0200812static int
Dave Barachb3d93da2016-08-03 14:34:38 -0400813svm_region_init_internal (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700814{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400815 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400816 u64 ticks = clib_cpu_time_now ();
817 uword randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700818
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400819 /* guard against klutz calls */
820 if (root_rp)
Ole Troan3cdc25f2017-08-17 11:07:33 +0200821 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700822
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400823 root_rp_refcount++;
Dave Barach16c75df2016-05-31 14:05:46 -0400824
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400825 atexit (svm_mutex_cleanup);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700826
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400827 /* Randomize the shared-VM base at init time */
828 if (MMAP_PAGESIZE <= (4 << 10))
829 randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
830 else
831 randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700832
Dave Barachb3d93da2016-08-03 14:34:38 -0400833 a->baseva += randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700834
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400835 rp = svm_map_region (a);
Ole Troan3cdc25f2017-08-17 11:07:33 +0200836 if (!rp)
837 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700838
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400839 region_lock (rp, 3);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700840
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400841 /* Set up the main region data structures */
842 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT)
843 {
844 svm_main_region_t *mp = 0;
845 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700846
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400847 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700848
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400849 oldheap = svm_push_pvt_heap (rp);
850 vec_validate (mp, 0);
851 mp->name_hash = hash_create_string (0, sizeof (uword));
Dave Barachb3d93da2016-08-03 14:34:38 -0400852 mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
Dave Wallace19296112017-08-31 15:54:11 -0400853 mp->uid = a->uid;
854 mp->gid = a->gid;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400855 rp->data_base = mp;
856 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700857 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400858 region_unlock (rp);
859 root_rp = rp;
Ole Troan3cdc25f2017-08-17 11:07:33 +0200860
861 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700862}
863
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400864void
865svm_region_init (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700866{
Dave Barachb3d93da2016-08-03 14:34:38 -0400867 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400868
Dave Barachb3d93da2016-08-03 14:34:38 -0400869 memset (a, 0, sizeof (*a));
870 a->root_path = 0;
871 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100872 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400873 a->size = SVM_GLOBAL_REGION_SIZE;
874 a->flags = SVM_FLAGS_NODATA;
875 a->uid = 0;
876 a->gid = 0;
877
878 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700879}
880
Ole Troan3cdc25f2017-08-17 11:07:33 +0200881int
Neale Rannse72be392017-04-26 13:59:20 -0700882svm_region_init_chroot (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700883{
Dave Barachb3d93da2016-08-03 14:34:38 -0400884 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400885
Dave Barachb3d93da2016-08-03 14:34:38 -0400886 memset (a, 0, sizeof (*a));
887 a->root_path = root_path;
888 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100889 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400890 a->size = SVM_GLOBAL_REGION_SIZE;
891 a->flags = SVM_FLAGS_NODATA;
892 a->uid = 0;
893 a->gid = 0;
894
Ole Troan3cdc25f2017-08-17 11:07:33 +0200895 return svm_region_init_internal (a);
Dave Barach16c75df2016-05-31 14:05:46 -0400896}
897
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400898void
Neale Rannse72be392017-04-26 13:59:20 -0700899svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
Dave Barach16c75df2016-05-31 14:05:46 -0400900{
Dave Barachb3d93da2016-08-03 14:34:38 -0400901 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400902
Dave Barachb3d93da2016-08-03 14:34:38 -0400903 memset (a, 0, sizeof (*a));
904 a->root_path = root_path;
905 a->name = SVM_GLOBAL_REGION_NAME;
Damjan Marionaec8f892018-01-08 16:35:35 +0100906 a->baseva = svm_get_global_region_base_va ();
Dave Barachb3d93da2016-08-03 14:34:38 -0400907 a->size = SVM_GLOBAL_REGION_SIZE;
908 a->flags = SVM_FLAGS_NODATA;
909 a->uid = uid;
910 a->gid = gid;
911
912 svm_region_init_internal (a);
913}
914
915void
916svm_region_init_args (svm_map_region_args_t * a)
917{
918 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700919}
920
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400921void *
922svm_region_find_or_create (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700923{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400924 svm_main_region_t *mp;
925 svm_region_t *rp;
926 uword need_nbits;
927 int index, i;
928 void *oldheap;
929 uword *p;
930 u8 *name;
931 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700932
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400933 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700934
Dave Barachc3799992016-08-15 11:12:27 -0400935 a->size += MMAP_PAGESIZE +
Dave Barachb3d93da2016-08-03 14:34:38 -0400936 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400937 a->size = rnd_pagesize (a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700938
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400939 region_lock (root_rp, 4);
940 oldheap = svm_push_pvt_heap (root_rp);
941 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700942
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400943 ASSERT (mp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700944
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400945 /* Map the named region from the correct chroot environment */
Jan Srnicek5beec812017-03-24 10:18:11 +0100946 if (a->root_path == NULL)
947 a->root_path = (char *) mp->root_path;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400948
949 /*
950 * See if this region is already known. If it is, we're
951 * almost done...
952 */
953 p = hash_get_mem (mp->name_hash, a->name);
954
955 if (p)
956 {
957 rp = svm_map_region (a);
958 region_unlock (root_rp);
959 svm_pop_heap (oldheap);
960 return rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700961 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700962
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400963 /* Create the region. */
964 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700965
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400966 need_nbits = a->size / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700967
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400968 index = 1; /* $$$ fixme, figure out how many bit to really skip */
969
970 /*
971 * Scan the virtual space allocation bitmap, looking for a large
972 * enough chunk
973 */
974 do
975 {
976 if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
977 {
978 for (i = 0; i < (need_nbits - 1); i++)
979 {
980 if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
981 {
982 index = index + i;
983 goto next;
984 }
985 }
986 break;
987 }
988 index++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700989 next:;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700990 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400991 while (index < root_rp->bitmap_size);
992
993 /* Completely out of VM? */
994 if (index >= root_rp->bitmap_size)
995 {
Dave Barachb3d93da2016-08-03 14:34:38 -0400996 clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
997 root_rp->region_name, a->size, a->size);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400998 svm_pop_heap (oldheap);
999 region_unlock (root_rp);
1000 return 0;
1001 }
1002
1003 /*
1004 * Mark virtual space allocated
1005 */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001006#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001007 clib_warning ("set %d bits at index %d", need_nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001008#endif
1009
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001010 for (i = 0; i < need_nbits; i++)
1011 {
1012 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001013 }
1014
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001015 /* Place this region where it goes... */
1016 a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001017
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001018 rp = svm_map_region (a);
Dave Barachc3799992016-08-15 11:12:27 -04001019
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001020 pool_get (mp->subregions, subp);
1021 name = format (0, "%s%c", a->name, 0);
1022 subp->subregion_name = name;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001023
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001024 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001025
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001026 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001027
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001028 region_unlock (root_rp);
1029
1030 return (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001031}
1032
Dave Wallaced756b352017-07-03 13:11:38 -04001033void
1034svm_region_unlink (svm_region_t * rp)
1035{
1036 svm_map_region_args_t _a, *a = &_a;
1037 svm_main_region_t *mp;
1038 u8 *shm_name;
1039
1040 ASSERT (root_rp);
1041 ASSERT (rp);
1042 ASSERT (vec_c_string_is_terminated (rp->region_name));
1043
1044 mp = root_rp->data_base;
1045 ASSERT (mp);
1046
1047 a->root_path = (char *) mp->root_path;
1048 a->name = rp->region_name;
1049 shm_name = shm_name_from_svm_map_region_args (a);
1050 if (CLIB_DEBUG > 1)
1051 clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
1052 shm_unlink ((const char *) shm_name);
1053 vec_free (shm_name);
1054}
1055
Ed Warnickecb9cada2015-12-08 15:45:58 -07001056/*
1057 * svm_region_unmap
1058 *
1059 * Let go of the indicated region. If the calling process
1060 * is the last customer, throw it away completely.
1061 * The root region mutex guarantees atomicity with respect to
1062 * a new region client showing up at the wrong moment.
1063 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001064void
Florin Corasd6c30d92018-01-29 05:11:24 -08001065svm_region_unmap_internal (void *rp_arg, u8 is_client)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001066{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001067 int i, mypid = getpid ();
1068 int nclients_left;
1069 void *oldheap;
1070 uword virtual_base, virtual_size;
1071 svm_region_t *rp = rp_arg;
1072 char *name;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001073
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001074 /*
1075 * If we take a signal while holding one or more shared-memory
1076 * mutexes, we may end up back here from an otherwise
1077 * benign exit handler. Bail out to avoid a recursive
1078 * mutex screw-up.
1079 */
1080 if (nheld)
1081 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001082
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001083 ASSERT (rp);
1084 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001085
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001086 if (CLIB_DEBUG > 1)
1087 clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001088
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001089 region_lock (root_rp, 5);
1090 region_lock (rp, 6);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001091
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001092 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
1093
1094 /* Remove the caller from the list of mappers */
1095 for (i = 0; i < vec_len (rp->client_pids); i++)
1096 {
1097 if (rp->client_pids[i] == mypid)
1098 {
1099 vec_delete (rp->client_pids, 1, i);
1100 goto found;
1101 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001102 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001103 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001104
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001105found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001106
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001107 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001108
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001109 nclients_left = vec_len (rp->client_pids);
1110 virtual_base = rp->virtual_base;
1111 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001112
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001113 if (nclients_left == 0)
1114 {
1115 int index, nbits, i;
1116 svm_main_region_t *mp;
1117 uword *p;
1118 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001119
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001120 /* Kill the region, last guy on his way out */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001121
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001122 oldheap = svm_push_pvt_heap (root_rp);
1123 name = vec_dup (rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001124
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001125 virtual_base = rp->virtual_base;
1126 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001127
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001128 /* Figure out which bits to clear in the root region bitmap */
1129 index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001130
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001131 nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001132
1133#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001134 clib_warning ("clear %d bits at index %d", nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001135#endif
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001136 /* Give back the allocated VM */
1137 for (i = 0; i < nbits; i++)
1138 {
1139 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
1140 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001141
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001142 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001143
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001144 p = hash_get_mem (mp->name_hash, name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001145
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001146 /* Better never happen ... */
1147 if (p == NULL)
1148 {
1149 region_unlock (rp);
1150 region_unlock (root_rp);
1151 svm_pop_heap (oldheap);
1152 clib_warning ("Region name '%s' not found?", name);
1153 return;
1154 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001155
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001156 /* Remove from the root region subregion pool */
1157 subp = mp->subregions + p[0];
1158 pool_put (mp->subregions, subp);
1159
1160 hash_unset_mem (mp->name_hash, name);
1161
1162 vec_free (name);
1163
1164 region_unlock (rp);
Florin Corasd6c30d92018-01-29 05:11:24 -08001165
1166 /* If a client asks for the cleanup, don't unlink the backing
1167 * file since we can't tell if it has been recreated. */
1168 if (!is_client)
1169 svm_region_unlink (rp);
1170
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001171 munmap ((void *) virtual_base, virtual_size);
1172 region_unlock (root_rp);
1173 svm_pop_heap (oldheap);
1174 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001175 }
1176
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001177 region_unlock (rp);
1178 region_unlock (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001179
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001180 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001181}
1182
Florin Corasd6c30d92018-01-29 05:11:24 -08001183void
1184svm_region_unmap (void *rp_arg)
1185{
1186 svm_region_unmap_internal (rp_arg, 0 /* is_client */ );
1187}
1188
1189void
1190svm_region_unmap_client (void *rp_arg)
1191{
1192 svm_region_unmap_internal (rp_arg, 1 /* is_client */ );
1193}
1194
Ed Warnickecb9cada2015-12-08 15:45:58 -07001195/*
1196 * svm_region_exit
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001197 */
Florin Corasd6c30d92018-01-29 05:11:24 -08001198static void
1199svm_region_exit_internal (u8 is_client)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001200{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001201 void *oldheap;
1202 int i, mypid = getpid ();
1203 uword virtual_base, virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001204
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001205 /* It felt so nice we did it twice... */
1206 if (root_rp == 0)
1207 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001208
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001209 if (--root_rp_refcount > 0)
1210 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001211
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001212 /*
1213 * If we take a signal while holding one or more shared-memory
1214 * mutexes, we may end up back here from an otherwise
1215 * benign exit handler. Bail out to avoid a recursive
1216 * mutex screw-up.
1217 */
1218 if (nheld)
1219 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001220
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001221 region_lock (root_rp, 7);
1222 oldheap = svm_push_pvt_heap (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001223
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001224 virtual_base = root_rp->virtual_base;
1225 virtual_size = root_rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001226
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001227 for (i = 0; i < vec_len (root_rp->client_pids); i++)
1228 {
1229 if (root_rp->client_pids[i] == mypid)
1230 {
1231 vec_delete (root_rp->client_pids, 1, i);
1232 goto found;
1233 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001234 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001235 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001236
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001237found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001238
Florin Corasd6c30d92018-01-29 05:11:24 -08001239 if (!is_client && vec_len (root_rp->client_pids) == 0)
Dave Wallaced756b352017-07-03 13:11:38 -04001240 svm_region_unlink (root_rp);
1241
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001242 region_unlock (root_rp);
1243 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001244
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001245 root_rp = 0;
1246 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001247}
1248
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001249void
Florin Corasd6c30d92018-01-29 05:11:24 -08001250svm_region_exit (void)
1251{
1252 svm_region_exit_internal (0 /* is_client */ );
1253}
1254
1255void
1256svm_region_exit_client (void)
1257{
1258 svm_region_exit_internal (1 /* is_client */ );
1259}
1260
1261void
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001262svm_client_scan_this_region_nolock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001263{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001264 int j;
1265 int mypid = getpid ();
1266 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001267
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001268 for (j = 0; j < vec_len (rp->client_pids); j++)
1269 {
1270 if (mypid == rp->client_pids[j])
1271 continue;
1272 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
1273 {
1274 clib_warning ("%s: cleanup ghost pid %d",
1275 rp->region_name, rp->client_pids[j]);
1276 /* nb: client vec in rp->region_heap */
1277 oldheap = svm_push_pvt_heap (rp);
1278 vec_delete (rp->client_pids, 1, j);
1279 j--;
1280 svm_pop_heap (oldheap);
1281 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001282 }
1283}
1284
1285
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001286/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07001287 * Scan svm regions for dead clients
1288 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001289void
Neale Rannse72be392017-04-26 13:59:20 -07001290svm_client_scan (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001291{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001292 int i, j;
1293 svm_main_region_t *mp;
1294 svm_map_region_args_t *a = 0;
1295 svm_region_t *root_rp;
1296 svm_region_t *rp;
1297 svm_subregion_t *subp;
1298 u8 *name = 0;
1299 u8 **svm_names = 0;
1300 void *oldheap;
1301 int mypid = getpid ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001302
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001303 vec_validate (a, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001304
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001305 svm_region_init_chroot (root_path);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001306
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001307 root_rp = svm_get_root_rp ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001308
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001309 pthread_mutex_lock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001310
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001311 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001312
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001313 for (j = 0; j < vec_len (root_rp->client_pids); j++)
1314 {
1315 if (mypid == root_rp->client_pids[j])
1316 continue;
1317 if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
1318 {
1319 clib_warning ("%s: cleanup ghost pid %d",
1320 root_rp->region_name, root_rp->client_pids[j]);
1321 /* nb: client vec in root_rp->region_heap */
1322 oldheap = svm_push_pvt_heap (root_rp);
1323 vec_delete (root_rp->client_pids, 1, j);
1324 j--;
1325 svm_pop_heap (oldheap);
1326 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001327 }
1328
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001329 /*
1330 * Snapshoot names, can't hold root rp mutex across
1331 * find_or_create.
1332 */
1333 /* *INDENT-OFF* */
1334 pool_foreach (subp, mp->subregions, ({
1335 name = vec_dup (subp->subregion_name);
1336 vec_add1(svm_names, name);
1337 }));
1338 /* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001339
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001340 pthread_mutex_unlock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001341
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001342 for (i = 0; i < vec_len (svm_names); i++)
1343 {
1344 vec_validate (a, 0);
1345 a->root_path = root_path;
1346 a->name = (char *) svm_names[i];
1347 rp = svm_region_find_or_create (a);
1348 if (rp)
1349 {
1350 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001351
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001352 svm_client_scan_this_region_nolock (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001353
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001354 pthread_mutex_unlock (&rp->mutex);
1355 svm_region_unmap (rp);
1356 vec_free (svm_names[i]);
1357 }
1358 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001359 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001360 vec_free (svm_names);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001361
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001362 svm_region_exit ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001363
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001364 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001365}
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001366
1367/*
1368 * fd.io coding-style-patch-verification: ON
1369 *
1370 * Local Variables:
1371 * eval: (c-set-style "gnu")
1372 * End:
1373 */