blob: c54f9730094d20a7fe92bfa4e024a20c3b0d9e42 [file] [log] [blame]
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07002 *------------------------------------------------------------------
Dave Barach8a7fb0c2016-07-08 14:44:23 -04003 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
Ed Warnickecb9cada2015-12-08 15:45:58 -07004 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
39#include <vppinfra/mheap.h>
40#include <vppinfra/heap.h>
41#include <vppinfra/pool.h>
42#include <vppinfra/format.h>
43
44#include "svm.h"
45
46static svm_region_t *root_rp;
47static int root_rp_refcount;
48
49#define MAXLOCK 2
Dave Barach8a7fb0c2016-07-08 14:44:23 -040050static pthread_mutex_t *mutexes_held[MAXLOCK];
Ed Warnickecb9cada2015-12-08 15:45:58 -070051static int nheld;
52
Dave Barach8a7fb0c2016-07-08 14:44:23 -040053svm_region_t *
54svm_get_root_rp (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -070055{
Dave Barach8a7fb0c2016-07-08 14:44:23 -040056 return root_rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -070057}
58
59#define MUTEX_DEBUG
60
Dave Barach8a7fb0c2016-07-08 14:44:23 -040061static void
62region_lock (svm_region_t * rp, int tag)
Ed Warnickecb9cada2015-12-08 15:45:58 -070063{
Dave Barach8a7fb0c2016-07-08 14:44:23 -040064 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -070065#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -040066 rp->mutex_owner_pid = getpid ();
67 rp->mutex_owner_tag = tag;
68#endif
69 ASSERT (nheld < MAXLOCK);
70 /*
71 * Keep score of held mutexes so we can try to exit
72 * cleanly if the world comes to an end at the worst possible
73 * moment
74 */
75 mutexes_held[nheld++] = &rp->mutex;
Ed Warnickecb9cada2015-12-08 15:45:58 -070076}
77
Dave Barach8a7fb0c2016-07-08 14:44:23 -040078static void
79region_unlock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -070080{
Dave Barach8a7fb0c2016-07-08 14:44:23 -040081 int i, j;
Ed Warnickecb9cada2015-12-08 15:45:58 -070082#ifdef MUTEX_DEBUG
Dave Barach8a7fb0c2016-07-08 14:44:23 -040083 rp->mutex_owner_pid = 0;
84 rp->mutex_owner_tag = 0;
85#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -070086
Dave Barach8a7fb0c2016-07-08 14:44:23 -040087 for (i = nheld - 1; i >= 0; i--)
88 {
89 if (mutexes_held[i] == &rp->mutex)
90 {
91 for (j = i; j < MAXLOCK - 1; j++)
92 mutexes_held[j] = mutexes_held[j + 1];
93 nheld--;
94 goto found;
95 }
Ed Warnickecb9cada2015-12-08 15:45:58 -070096 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -040097 ASSERT (0);
Ed Warnickecb9cada2015-12-08 15:45:58 -070098
99found:
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400100 CLIB_MEMORY_BARRIER ();
101 pthread_mutex_unlock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700102}
103
104
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400105static u8 *
106format_svm_flags (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700107{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400108 uword f = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700109
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400110 if (f & SVM_FLAGS_MHEAP)
111 s = format (s, "MHEAP ");
112 if (f & SVM_FLAGS_FILE)
113 s = format (s, "FILE ");
114 if (f & SVM_FLAGS_NODATA)
115 s = format (s, "NODATA ");
116 if (f & SVM_FLAGS_NEED_DATA_INIT)
117 s = format (s, "INIT ");
118
119 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700120}
121
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400122static u8 *
123format_svm_size (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700124{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400125 uword size = va_arg (*args, uword);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400127 if (size >= (1 << 20))
128 {
129 s = format (s, "(%d mb)", size >> 20);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700130 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400131 else if (size >= (1 << 10))
132 {
133 s = format (s, "(%d kb)", size >> 10);
134 }
135 else
136 {
137 s = format (s, "(%d bytes)", size);
138 }
139 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140}
141
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400142u8 *
143format_svm_region (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700144{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400145 svm_region_t *rp = va_arg (*args, svm_region_t *);
146 int verbose = va_arg (*args, int);
147 int i;
148 uword lo, hi;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700149
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400150 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
151 rp->region_name, rp->virtual_base,
152 rp->virtual_size, format_svm_size, rp->virtual_size);
153 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
154 rp->user_ctx, rp->bitmap_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700155
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400156 if (verbose)
157 {
158 s = format (s, " flags: 0x%x %U\n", rp->flags,
159 format_svm_flags, rp->flags);
160 s = format (s,
161 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
162 rp->region_heap, rp->data_base, rp->data_heap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700163 }
164
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400165 s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400167 for (i = 0; i < vec_len (rp->client_pids); i++)
168 s = format (s, "%d ", rp->client_pids[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700169
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400170 s = format (s, "\n");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400172 if (verbose)
173 {
174 lo = hi = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700175
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400176 s = format (s, " VM in use: ");
177
178 for (i = 0; i < rp->bitmap_size; i++)
179 {
180 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
181 {
182 if (lo == ~0)
183 {
184 hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
185 }
186 else
187 {
188 hi = rp->virtual_base + i * MMAP_PAGESIZE;
189 }
190 }
191 else
192 {
193 if (lo != ~0)
194 {
195 hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
196 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
197 (hi - lo) >> 10);
198 lo = hi = ~0;
199 }
200 }
201 }
202 s = format (s, " rgn heap stats: %U", format_mheap,
203 rp->region_heap, 0);
204 if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap)
205 {
206 s = format (s, "\n data heap stats: %U", format_mheap,
207 rp->data_heap, 1);
208 }
209 s = format (s, "\n");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700210 }
211
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400212 return (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700213}
214
215/*
216 * rnd_pagesize
217 * Round to a pagesize multiple, presumably 4k works
218 */
Dave Barachb3d93da2016-08-03 14:34:38 -0400219static u64
220rnd_pagesize (u64 size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700221{
Dave Barachb3d93da2016-08-03 14:34:38 -0400222 u64 rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700223
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400224 rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
225 return (rv);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700226}
227
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400228/*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229 * svm_data_region_setup
230 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400231static int
232svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700233{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400234 int fd;
235 u8 junk = 0;
236 uword map_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700237
Dave Barachc3799992016-08-15 11:12:27 -0400238 map_size = rp->virtual_size - (MMAP_PAGESIZE +
239 (a->pvt_heap_size ? a->pvt_heap_size :
240 SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700241
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400242 if (a->flags & SVM_FLAGS_FILE)
243 {
244 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700245
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400246 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700247
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400248 if (fd < 0)
249 {
250 clib_unix_warning ("open");
251 return -1;
252 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400254 if (fstat (fd, &statb) < 0)
255 {
256 clib_unix_warning ("fstat");
257 close (fd);
258 return -2;
259 }
260
261 if (statb.st_mode & S_IFREG)
262 {
263 if (statb.st_size == 0)
264 {
265 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
266 {
267 clib_unix_warning ("seek region size");
268 close (fd);
269 return -3;
270 }
271 if (write (fd, &junk, 1) != 1)
272 {
273 clib_unix_warning ("set region size");
274 close (fd);
275 return -3;
276 }
277 }
278 else
279 {
280 map_size = rnd_pagesize (statb.st_size);
281 }
282 }
283 else
284 {
285 map_size = a->backing_mmap_size;
286 }
287
288 ASSERT (map_size <= rp->virtual_size -
289 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
290
291 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
292 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
293 {
294 clib_unix_warning ("mmap");
295 close (fd);
296 return -3;
297 }
298 close (fd);
299 rp->backing_file = (char *) format (0, "%s\0", a->backing_file);
300 rp->flags |= SVM_FLAGS_FILE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700301 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400302
303 if (a->flags & SVM_FLAGS_MHEAP)
304 {
305 rp->data_heap =
306 mheap_alloc_with_flags ((void *) (rp->data_base), map_size,
307 MHEAP_FLAG_DISABLE_VM);
308 rp->flags |= SVM_FLAGS_MHEAP;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700309 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400310 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311}
312
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400313static int
314svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700315{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400316 int fd;
317 u8 junk = 0;
318 uword map_size;
319 struct stat statb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700320
Dave Barachc3799992016-08-15 11:12:27 -0400321 map_size = rp->virtual_size -
322 (MMAP_PAGESIZE
Dave Barachb3d93da2016-08-03 14:34:38 -0400323 + (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700324
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400325 if (a->flags & SVM_FLAGS_FILE)
326 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700327
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400328 fd = open (a->backing_file, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700329
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400330 if (fd < 0)
331 {
332 clib_unix_warning ("open");
333 return -1;
334 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400336 if (fstat (fd, &statb) < 0)
337 {
338 clib_unix_warning ("fstat");
339 close (fd);
340 return -2;
341 }
342
343 if (statb.st_mode & S_IFREG)
344 {
345 if (statb.st_size == 0)
346 {
347 if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
348 {
349 clib_unix_warning ("seek region size");
350 close (fd);
351 return -3;
352 }
353 if (write (fd, &junk, 1) != 1)
354 {
355 clib_unix_warning ("set region size");
356 close (fd);
357 return -3;
358 }
359 }
360 else
361 {
362 map_size = rnd_pagesize (statb.st_size);
363 }
364 }
365 else
366 {
367 map_size = a->backing_mmap_size;
368 }
369
370 ASSERT (map_size <= rp->virtual_size
Dave Barachc3799992016-08-15 11:12:27 -0400371 - (MMAP_PAGESIZE
372 +
373 (a->pvt_heap_size ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE)));
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400374
375 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
376 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
377 {
378 clib_unix_warning ("mmap");
379 close (fd);
380 return -3;
381 }
382 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700383 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400384 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700385}
386
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400387u8 *
388shm_name_from_svm_map_region_args (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700389{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400390 u8 *path;
391 u8 *shm_name;
392 u8 *split_point;
393 u8 *mkdir_arg = 0;
394 int root_path_offset = 0;
395 int name_offset = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700396
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400397 if (a->root_path)
398 {
399 /* Tolerate present or absent slashes */
400 if (a->root_path[0] == '/')
401 root_path_offset++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700402
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400403 /* create the root_path under /dev/shm
404 iterate through path creating directories */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700405
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400406 path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
407 split_point = path + 1;
408 vec_add1 (mkdir_arg, '-');
409
410 while (*split_point)
411 {
412 while (*split_point && *split_point != '/')
413 {
414 vec_add1 (mkdir_arg, *split_point);
415 split_point++;
416 }
417 vec_add1 (mkdir_arg, 0);
418
419 /* ready to descend another level */
420 mkdir_arg[vec_len (mkdir_arg) - 1] = '-';
421 split_point++;
422 }
423 vec_free (mkdir_arg);
424 vec_free (path);
425
426 if (a->name[0] == '/')
427 name_offset = 1;
428
Matej Perinad135c192017-07-18 13:59:41 +0200429 shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400430 &a->name[name_offset], 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700431 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400432 else
433 shm_name = format (0, "%s%c", a->name, 0);
434 return (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700435}
436
Dave Barach59b25652017-09-10 15:04:27 -0400437void
438svm_region_init_mapped_region (svm_map_region_args_t * a, svm_region_t * rp)
439{
440 pthread_mutexattr_t attr;
441 pthread_condattr_t cattr;
442 int nbits, words, bit;
443 int overhead_space;
444 void *oldheap;
445 uword data_base;
446 ASSERT (rp);
447 int rv;
448
449 memset (rp, 0, sizeof (*rp));
450
451 if (pthread_mutexattr_init (&attr))
452 clib_unix_warning ("mutexattr_init");
453
454 if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
455 clib_unix_warning ("mutexattr_setpshared");
456
457 if (pthread_mutex_init (&rp->mutex, &attr))
458 clib_unix_warning ("mutex_init");
459
460 if (pthread_mutexattr_destroy (&attr))
461 clib_unix_warning ("mutexattr_destroy");
462
463 if (pthread_condattr_init (&cattr))
464 clib_unix_warning ("condattr_init");
465
466 if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
467 clib_unix_warning ("condattr_setpshared");
468
469 if (pthread_cond_init (&rp->condvar, &cattr))
470 clib_unix_warning ("cond_init");
471
472 if (pthread_condattr_destroy (&cattr))
473 clib_unix_warning ("condattr_destroy");
474
475 region_lock (rp, 1);
476
477 rp->virtual_base = a->baseva;
478 rp->virtual_size = a->size;
479
480 rp->region_heap =
481 mheap_alloc_with_flags (uword_to_pointer
482 (a->baseva + MMAP_PAGESIZE, void *),
483 (a->pvt_heap_size !=
484 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE,
485 MHEAP_FLAG_DISABLE_VM);
486 oldheap = svm_push_pvt_heap (rp);
487
488 rp->region_name = (char *) format (0, "%s%c", a->name, 0);
489 vec_add1 (rp->client_pids, getpid ());
490
491 nbits = rp->virtual_size / MMAP_PAGESIZE;
492
493 ASSERT (nbits > 0);
494 rp->bitmap_size = nbits;
495 words = (nbits + BITS (uword) - 1) / BITS (uword);
496 vec_validate (rp->bitmap, words - 1);
497
498 overhead_space = MMAP_PAGESIZE /* header */ +
499 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
500
501 bit = 0;
502 data_base = (uword) rp->virtual_base;
503
504 if (a->flags & SVM_FLAGS_NODATA)
505 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
506
507 do
508 {
509 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
510 bit++;
511 overhead_space -= MMAP_PAGESIZE;
512 data_base += MMAP_PAGESIZE;
513 }
514 while (overhead_space > 0);
515
516 rp->data_base = (void *) data_base;
517
518 /*
519 * Note: although the POSIX spec guarantees that only one
520 * process enters this block, we have to play games
521 * to hold off clients until e.g. the mutex is ready
522 */
523 rp->version = SVM_VERSION;
524
525 /* setup the data portion of the region */
526
527 rv = svm_data_region_create (a, rp);
528 if (rv)
529 {
530 clib_warning ("data_region_create: %d", rv);
531 }
532
533 region_unlock (rp);
534
535 svm_pop_heap (oldheap);
536}
537
Ed Warnickecb9cada2015-12-08 15:45:58 -0700538/*
539 * svm_map_region
540 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400541void *
542svm_map_region (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700543{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400544 int svm_fd;
545 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400546 int deadman = 0;
547 u8 junk = 0;
548 void *oldheap;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400549 int rv;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400550 int pid_holding_region_lock;
551 u8 *shm_name;
552 int dead_region_recovery = 0;
553 int time_left;
554 struct stat stat;
555 struct timespec ts, tsrem;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700556
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400557 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
558 ASSERT (a->name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700559
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400560 shm_name = shm_name_from_svm_map_region_args (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700561
Dave Wallaced756b352017-07-03 13:11:38 -0400562 if (CLIB_DEBUG > 1)
563 clib_warning ("[%d] map region %s: shm_open (%s)",
564 getpid (), a->name, shm_name);
565
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400566 svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700567
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400568 if (svm_fd >= 0)
569 {
Dave Wallace19296112017-08-31 15:54:11 -0400570 if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400571 clib_unix_warning ("segment chmod");
572 /* This turns out to fail harmlessly if the client starts first */
573 if (fchown (svm_fd, a->uid, a->gid) < 0)
574 clib_unix_warning ("segment chown [ok if client starts first]");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700575
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400576 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700577
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400578 if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
579 {
580 clib_warning ("seek region size");
581 close (svm_fd);
582 return (0);
583 }
584 if (write (svm_fd, &junk, 1) != 1)
585 {
586 clib_warning ("set region size");
587 close (svm_fd);
588 return (0);
589 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700590
Damjan Marion7bee80c2017-04-26 15:32:12 +0200591 rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400592 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700593
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400594 if (rp == (svm_region_t *) MAP_FAILED)
595 {
596 clib_unix_warning ("mmap create");
597 close (svm_fd);
598 return (0);
599 }
600 close (svm_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700601
Dave Barach59b25652017-09-10 15:04:27 -0400602 svm_region_init_mapped_region (a, rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700603
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400604 return ((void *) rp);
605 }
606 else
607 {
608 svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700609
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400610 vec_free (shm_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700611
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400612 if (svm_fd < 0)
613 {
614 perror ("svm_region_map(mmap open)");
615 return (0);
616 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700617
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400618 time_left = 20;
619 while (1)
620 {
621 if (0 != fstat (svm_fd, &stat))
622 {
623 clib_warning ("fstat failed: %d", errno);
624 close (svm_fd);
625 return (0);
626 }
627 if (stat.st_size > 0)
628 {
629 break;
630 }
631 if (0 == time_left)
632 {
633 clib_warning ("waiting for resize of shm file timed out");
634 close (svm_fd);
635 return (0);
636 }
637 ts.tv_sec = 0;
638 ts.tv_nsec = 100000000;
639 while (nanosleep (&ts, &tsrem) < 0)
640 ts = tsrem;
641 time_left--;
642 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700643
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400644 rp = mmap (0, MMAP_PAGESIZE,
645 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700646
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400647 if (rp == (svm_region_t *) MAP_FAILED)
648 {
649 close (svm_fd);
650 clib_warning ("mmap");
651 return (0);
652 }
653 /*
654 * We lost the footrace to create this region; make sure
655 * the winner has crossed the finish line.
656 */
657 while (rp->version == 0 && deadman++ < 5)
658 {
659 sleep (1);
660 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700661
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400662 /*
663 * <bleep>-ed?
664 */
665 if (rp->version == 0)
666 {
667 clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
668 close (svm_fd);
669 munmap (rp, a->size);
670 return (0);
671 }
672 /* Remap now that the region has been placed */
673 a->baseva = rp->virtual_base;
674 a->size = rp->virtual_size;
675 munmap (rp, MMAP_PAGESIZE);
676
Damjan Marion7bee80c2017-04-26 15:32:12 +0200677 rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400678 PROT_READ | PROT_WRITE,
679 MAP_SHARED | MAP_FIXED, svm_fd, 0);
680 if ((uword) rp == (uword) MAP_FAILED)
681 {
682 clib_unix_warning ("mmap");
683 close (svm_fd);
684 return (0);
685 }
686
687 if ((uword) rp != rp->virtual_base)
688 {
689 clib_warning ("mmap botch");
690 }
691
692 /*
693 * Try to fix the region mutex if it is held by
694 * a dead process
695 */
696 pid_holding_region_lock = rp->mutex_owner_pid;
697 if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
698 {
699 clib_warning
700 ("region %s mutex held by dead pid %d, tag %d, force unlock",
701 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
702 /* owner pid is nonexistent */
703 rp->mutex.__data.__owner = 0;
704 rp->mutex.__data.__lock = 0;
705 dead_region_recovery = 1;
706 }
707
708 if (dead_region_recovery)
709 clib_warning ("recovery: attempt to re-lock region");
710
711 region_lock (rp, 2);
712 oldheap = svm_push_pvt_heap (rp);
713 vec_add1 (rp->client_pids, getpid ());
714
715 if (dead_region_recovery)
716 clib_warning ("recovery: attempt svm_data_region_map");
717
718 rv = svm_data_region_map (a, rp);
719 if (rv)
720 {
721 clib_warning ("data_region_map: %d", rv);
722 }
723
724 if (dead_region_recovery)
725 clib_warning ("unlock and continue");
726
727 region_unlock (rp);
728
729 svm_pop_heap (oldheap);
730
731 return ((void *) rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700732
733 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400734 return 0; /* NOTREACHED */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700735}
736
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400737static void
738svm_mutex_cleanup (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700739{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400740 int i;
741 for (i = 0; i < nheld; i++)
742 {
743 pthread_mutex_unlock (mutexes_held[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700744 }
745}
746
Ole Troan3cdc25f2017-08-17 11:07:33 +0200747static int
Dave Barachb3d93da2016-08-03 14:34:38 -0400748svm_region_init_internal (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700749{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400750 svm_region_t *rp;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400751 u64 ticks = clib_cpu_time_now ();
752 uword randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700753
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400754 /* guard against klutz calls */
755 if (root_rp)
Ole Troan3cdc25f2017-08-17 11:07:33 +0200756 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700757
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400758 root_rp_refcount++;
Dave Barach16c75df2016-05-31 14:05:46 -0400759
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400760 atexit (svm_mutex_cleanup);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700761
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400762 /* Randomize the shared-VM base at init time */
763 if (MMAP_PAGESIZE <= (4 << 10))
764 randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
765 else
766 randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700767
Dave Barachb3d93da2016-08-03 14:34:38 -0400768 a->baseva += randomize_baseva;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700769
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400770 rp = svm_map_region (a);
Ole Troan3cdc25f2017-08-17 11:07:33 +0200771 if (!rp)
772 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700773
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400774 region_lock (rp, 3);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700775
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400776 /* Set up the main region data structures */
777 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT)
778 {
779 svm_main_region_t *mp = 0;
780 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700781
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400782 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700783
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400784 oldheap = svm_push_pvt_heap (rp);
785 vec_validate (mp, 0);
786 mp->name_hash = hash_create_string (0, sizeof (uword));
Dave Barachb3d93da2016-08-03 14:34:38 -0400787 mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
Dave Wallace19296112017-08-31 15:54:11 -0400788 mp->uid = a->uid;
789 mp->gid = a->gid;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400790 rp->data_base = mp;
791 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700792 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400793 region_unlock (rp);
794 root_rp = rp;
Ole Troan3cdc25f2017-08-17 11:07:33 +0200795
796 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700797}
798
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400799void
800svm_region_init (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700801{
Dave Barachb3d93da2016-08-03 14:34:38 -0400802 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400803
Dave Barachb3d93da2016-08-03 14:34:38 -0400804 memset (a, 0, sizeof (*a));
805 a->root_path = 0;
806 a->name = SVM_GLOBAL_REGION_NAME;
807 a->baseva = SVM_GLOBAL_REGION_BASEVA;
808 a->size = SVM_GLOBAL_REGION_SIZE;
809 a->flags = SVM_FLAGS_NODATA;
810 a->uid = 0;
811 a->gid = 0;
812
813 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700814}
815
Ole Troan3cdc25f2017-08-17 11:07:33 +0200816int
Neale Rannse72be392017-04-26 13:59:20 -0700817svm_region_init_chroot (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700818{
Dave Barachb3d93da2016-08-03 14:34:38 -0400819 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400820
Dave Barachb3d93da2016-08-03 14:34:38 -0400821 memset (a, 0, sizeof (*a));
822 a->root_path = root_path;
823 a->name = SVM_GLOBAL_REGION_NAME;
824 a->baseva = SVM_GLOBAL_REGION_BASEVA;
825 a->size = SVM_GLOBAL_REGION_SIZE;
826 a->flags = SVM_FLAGS_NODATA;
827 a->uid = 0;
828 a->gid = 0;
829
Ole Troan3cdc25f2017-08-17 11:07:33 +0200830 return svm_region_init_internal (a);
Dave Barach16c75df2016-05-31 14:05:46 -0400831}
832
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400833void
Neale Rannse72be392017-04-26 13:59:20 -0700834svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
Dave Barach16c75df2016-05-31 14:05:46 -0400835{
Dave Barachb3d93da2016-08-03 14:34:38 -0400836 svm_map_region_args_t _a, *a = &_a;
Dave Barachc3799992016-08-15 11:12:27 -0400837
Dave Barachb3d93da2016-08-03 14:34:38 -0400838 memset (a, 0, sizeof (*a));
839 a->root_path = root_path;
840 a->name = SVM_GLOBAL_REGION_NAME;
841 a->baseva = SVM_GLOBAL_REGION_BASEVA;
842 a->size = SVM_GLOBAL_REGION_SIZE;
843 a->flags = SVM_FLAGS_NODATA;
844 a->uid = uid;
845 a->gid = gid;
846
847 svm_region_init_internal (a);
848}
849
850void
851svm_region_init_args (svm_map_region_args_t * a)
852{
853 svm_region_init_internal (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700854}
855
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400856void *
857svm_region_find_or_create (svm_map_region_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700858{
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400859 svm_main_region_t *mp;
860 svm_region_t *rp;
861 uword need_nbits;
862 int index, i;
863 void *oldheap;
864 uword *p;
865 u8 *name;
866 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700867
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400868 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700869
Dave Barachc3799992016-08-15 11:12:27 -0400870 a->size += MMAP_PAGESIZE +
Dave Barachb3d93da2016-08-03 14:34:38 -0400871 ((a->pvt_heap_size != 0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400872 a->size = rnd_pagesize (a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700873
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400874 region_lock (root_rp, 4);
875 oldheap = svm_push_pvt_heap (root_rp);
876 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700877
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400878 ASSERT (mp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700879
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400880 /* Map the named region from the correct chroot environment */
Jan Srnicek5beec812017-03-24 10:18:11 +0100881 if (a->root_path == NULL)
882 a->root_path = (char *) mp->root_path;
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400883
884 /*
885 * See if this region is already known. If it is, we're
886 * almost done...
887 */
888 p = hash_get_mem (mp->name_hash, a->name);
889
890 if (p)
891 {
892 rp = svm_map_region (a);
893 region_unlock (root_rp);
894 svm_pop_heap (oldheap);
895 return rp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700896 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700897
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400898 /* Create the region. */
899 ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700900
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400901 need_nbits = a->size / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700902
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400903 index = 1; /* $$$ fixme, figure out how many bit to really skip */
904
905 /*
906 * Scan the virtual space allocation bitmap, looking for a large
907 * enough chunk
908 */
909 do
910 {
911 if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
912 {
913 for (i = 0; i < (need_nbits - 1); i++)
914 {
915 if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
916 {
917 index = index + i;
918 goto next;
919 }
920 }
921 break;
922 }
923 index++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700924 next:;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700925 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400926 while (index < root_rp->bitmap_size);
927
928 /* Completely out of VM? */
929 if (index >= root_rp->bitmap_size)
930 {
Dave Barachb3d93da2016-08-03 14:34:38 -0400931 clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
932 root_rp->region_name, a->size, a->size);
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400933 svm_pop_heap (oldheap);
934 region_unlock (root_rp);
935 return 0;
936 }
937
938 /*
939 * Mark virtual space allocated
940 */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700941#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400942 clib_warning ("set %d bits at index %d", need_nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700943#endif
944
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400945 for (i = 0; i < need_nbits; i++)
946 {
947 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700948 }
949
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400950 /* Place this region where it goes... */
951 a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700952
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400953 rp = svm_map_region (a);
Dave Barachc3799992016-08-15 11:12:27 -0400954
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400955 pool_get (mp->subregions, subp);
956 name = format (0, "%s%c", a->name, 0);
957 subp->subregion_name = name;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700958
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400959 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700960
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400961 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700962
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400963 region_unlock (root_rp);
964
965 return (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700966}
967
Dave Wallaced756b352017-07-03 13:11:38 -0400968void
969svm_region_unlink (svm_region_t * rp)
970{
971 svm_map_region_args_t _a, *a = &_a;
972 svm_main_region_t *mp;
973 u8 *shm_name;
974
975 ASSERT (root_rp);
976 ASSERT (rp);
977 ASSERT (vec_c_string_is_terminated (rp->region_name));
978
979 mp = root_rp->data_base;
980 ASSERT (mp);
981
982 a->root_path = (char *) mp->root_path;
983 a->name = rp->region_name;
984 shm_name = shm_name_from_svm_map_region_args (a);
985 if (CLIB_DEBUG > 1)
986 clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
987 shm_unlink ((const char *) shm_name);
988 vec_free (shm_name);
989}
990
Ed Warnickecb9cada2015-12-08 15:45:58 -0700991/*
992 * svm_region_unmap
993 *
994 * Let go of the indicated region. If the calling process
995 * is the last customer, throw it away completely.
996 * The root region mutex guarantees atomicity with respect to
997 * a new region client showing up at the wrong moment.
998 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -0400999void
1000svm_region_unmap (void *rp_arg)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001001{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001002 int i, mypid = getpid ();
1003 int nclients_left;
1004 void *oldheap;
1005 uword virtual_base, virtual_size;
1006 svm_region_t *rp = rp_arg;
1007 char *name;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001008
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001009 /*
1010 * If we take a signal while holding one or more shared-memory
1011 * mutexes, we may end up back here from an otherwise
1012 * benign exit handler. Bail out to avoid a recursive
1013 * mutex screw-up.
1014 */
1015 if (nheld)
1016 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001017
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001018 ASSERT (rp);
1019 ASSERT (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001020
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001021 if (CLIB_DEBUG > 1)
1022 clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001023
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001024 region_lock (root_rp, 5);
1025 region_lock (rp, 6);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001026
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001027 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
1028
1029 /* Remove the caller from the list of mappers */
1030 for (i = 0; i < vec_len (rp->client_pids); i++)
1031 {
1032 if (rp->client_pids[i] == mypid)
1033 {
1034 vec_delete (rp->client_pids, 1, i);
1035 goto found;
1036 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001037 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001038 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001039
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001040found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001041
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001042 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001043
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001044 nclients_left = vec_len (rp->client_pids);
1045 virtual_base = rp->virtual_base;
1046 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001047
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001048 if (nclients_left == 0)
1049 {
1050 int index, nbits, i;
1051 svm_main_region_t *mp;
1052 uword *p;
1053 svm_subregion_t *subp;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001054
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001055 /* Kill the region, last guy on his way out */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001056
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001057 oldheap = svm_push_pvt_heap (root_rp);
1058 name = vec_dup (rp->region_name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001059
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001060 virtual_base = rp->virtual_base;
1061 virtual_size = rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001062
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001063 /* Figure out which bits to clear in the root region bitmap */
1064 index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001065
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001066 nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001067
1068#if CLIB_DEBUG > 1
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001069 clib_warning ("clear %d bits at index %d", nbits, index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001070#endif
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001071 /* Give back the allocated VM */
1072 for (i = 0; i < nbits; i++)
1073 {
1074 clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
1075 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001076
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001077 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001078
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001079 p = hash_get_mem (mp->name_hash, name);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001080
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001081 /* Better never happen ... */
1082 if (p == NULL)
1083 {
1084 region_unlock (rp);
1085 region_unlock (root_rp);
1086 svm_pop_heap (oldheap);
1087 clib_warning ("Region name '%s' not found?", name);
1088 return;
1089 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001090
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001091 /* Remove from the root region subregion pool */
1092 subp = mp->subregions + p[0];
1093 pool_put (mp->subregions, subp);
1094
1095 hash_unset_mem (mp->name_hash, name);
1096
1097 vec_free (name);
1098
1099 region_unlock (rp);
Dave Wallaced756b352017-07-03 13:11:38 -04001100 svm_region_unlink (rp);
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001101 munmap ((void *) virtual_base, virtual_size);
1102 region_unlock (root_rp);
1103 svm_pop_heap (oldheap);
1104 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001105 }
1106
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001107 region_unlock (rp);
1108 region_unlock (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001109
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001110 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001111}
1112
1113/*
1114 * svm_region_exit
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001115 */
1116void
1117svm_region_exit ()
Ed Warnickecb9cada2015-12-08 15:45:58 -07001118{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001119 void *oldheap;
1120 int i, mypid = getpid ();
1121 uword virtual_base, virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001122
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001123 /* It felt so nice we did it twice... */
1124 if (root_rp == 0)
1125 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001126
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001127 if (--root_rp_refcount > 0)
1128 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001129
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001130 /*
1131 * If we take a signal while holding one or more shared-memory
1132 * mutexes, we may end up back here from an otherwise
1133 * benign exit handler. Bail out to avoid a recursive
1134 * mutex screw-up.
1135 */
1136 if (nheld)
1137 return;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001138
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001139 region_lock (root_rp, 7);
1140 oldheap = svm_push_pvt_heap (root_rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001141
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001142 virtual_base = root_rp->virtual_base;
1143 virtual_size = root_rp->virtual_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001144
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001145 for (i = 0; i < vec_len (root_rp->client_pids); i++)
1146 {
1147 if (root_rp->client_pids[i] == mypid)
1148 {
1149 vec_delete (root_rp->client_pids, 1, i);
1150 goto found;
1151 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001152 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001153 clib_warning ("pid %d AWOL", mypid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001154
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001155found:
Ed Warnickecb9cada2015-12-08 15:45:58 -07001156
Dave Wallaced756b352017-07-03 13:11:38 -04001157 if (vec_len (root_rp->client_pids) == 0)
1158 svm_region_unlink (root_rp);
1159
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001160 region_unlock (root_rp);
1161 svm_pop_heap (oldheap);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001162
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001163 root_rp = 0;
1164 munmap ((void *) virtual_base, virtual_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001165}
1166
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001167void
1168svm_client_scan_this_region_nolock (svm_region_t * rp)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001169{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001170 int j;
1171 int mypid = getpid ();
1172 void *oldheap;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001173
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001174 for (j = 0; j < vec_len (rp->client_pids); j++)
1175 {
1176 if (mypid == rp->client_pids[j])
1177 continue;
1178 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
1179 {
1180 clib_warning ("%s: cleanup ghost pid %d",
1181 rp->region_name, rp->client_pids[j]);
1182 /* nb: client vec in rp->region_heap */
1183 oldheap = svm_push_pvt_heap (rp);
1184 vec_delete (rp->client_pids, 1, j);
1185 j--;
1186 svm_pop_heap (oldheap);
1187 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001188 }
1189}
1190
1191
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001192/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07001193 * Scan svm regions for dead clients
1194 */
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001195void
Neale Rannse72be392017-04-26 13:59:20 -07001196svm_client_scan (const char *root_path)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001197{
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001198 int i, j;
1199 svm_main_region_t *mp;
1200 svm_map_region_args_t *a = 0;
1201 svm_region_t *root_rp;
1202 svm_region_t *rp;
1203 svm_subregion_t *subp;
1204 u8 *name = 0;
1205 u8 **svm_names = 0;
1206 void *oldheap;
1207 int mypid = getpid ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001208
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001209 vec_validate (a, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001210
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001211 svm_region_init_chroot (root_path);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001212
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001213 root_rp = svm_get_root_rp ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001214
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001215 pthread_mutex_lock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001216
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001217 mp = root_rp->data_base;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001218
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001219 for (j = 0; j < vec_len (root_rp->client_pids); j++)
1220 {
1221 if (mypid == root_rp->client_pids[j])
1222 continue;
1223 if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
1224 {
1225 clib_warning ("%s: cleanup ghost pid %d",
1226 root_rp->region_name, root_rp->client_pids[j]);
1227 /* nb: client vec in root_rp->region_heap */
1228 oldheap = svm_push_pvt_heap (root_rp);
1229 vec_delete (root_rp->client_pids, 1, j);
1230 j--;
1231 svm_pop_heap (oldheap);
1232 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001233 }
1234
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001235 /*
1236 * Snapshoot names, can't hold root rp mutex across
1237 * find_or_create.
1238 */
1239 /* *INDENT-OFF* */
1240 pool_foreach (subp, mp->subregions, ({
1241 name = vec_dup (subp->subregion_name);
1242 vec_add1(svm_names, name);
1243 }));
1244 /* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001245
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001246 pthread_mutex_unlock (&root_rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001247
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001248 for (i = 0; i < vec_len (svm_names); i++)
1249 {
1250 vec_validate (a, 0);
1251 a->root_path = root_path;
1252 a->name = (char *) svm_names[i];
1253 rp = svm_region_find_or_create (a);
1254 if (rp)
1255 {
1256 pthread_mutex_lock (&rp->mutex);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001257
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001258 svm_client_scan_this_region_nolock (rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001259
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001260 pthread_mutex_unlock (&rp->mutex);
1261 svm_region_unmap (rp);
1262 vec_free (svm_names[i]);
1263 }
1264 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001265 }
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001266 vec_free (svm_names);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001267
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001268 svm_region_exit ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001269
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001270 vec_free (a);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001271}
Dave Barach8a7fb0c2016-07-08 14:44:23 -04001272
1273/*
1274 * fd.io coding-style-patch-verification: ON
1275 *
1276 * Local Variables:
1277 * eval: (c-set-style "gnu")
1278 * End:
1279 */