blob: b50aa8207ef3e044f4dbaf402503e9f4c3331c44 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 *------------------------------------------------------------------
3 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
4 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
39#include <vppinfra/mheap.h>
40#include <vppinfra/heap.h>
41#include <vppinfra/pool.h>
42#include <vppinfra/format.h>
43
44#include "svm.h"
45
46static svm_region_t *root_rp;
47static int root_rp_refcount;
48
49#define MAXLOCK 2
50static pthread_mutex_t *mutexes_held [MAXLOCK];
51static int nheld;
52
53svm_region_t *svm_get_root_rp (void)
54{
55 return root_rp;
56}
57
58#define MUTEX_DEBUG
59
60static void region_lock(svm_region_t *rp, int tag)
61{
62 pthread_mutex_lock(&rp->mutex);
63#ifdef MUTEX_DEBUG
64 rp->mutex_owner_pid = getpid();
65 rp->mutex_owner_tag = tag;
66#endif
67 ASSERT(nheld < MAXLOCK);
68 /*
69 * Keep score of held mutexes so we can try to exit
70 * cleanly if the world comes to an end at the worst possible
71 * moment
72 */
73 mutexes_held [nheld++] = &rp->mutex;
74}
75
76static void region_unlock(svm_region_t *rp)
77{
78 int i,j;
79#ifdef MUTEX_DEBUG
80 rp->mutex_owner_pid = 0;
81 rp->mutex_owner_tag = 0;
82#endif
83
84 for (i = nheld-1; i >= 0; i--) {
85 if (mutexes_held[i] == &rp->mutex) {
86 for (j = i; j < MAXLOCK-1; j++)
87 mutexes_held[j] = mutexes_held[j+1];
88 nheld--;
89 goto found;
90 }
91 }
92 ASSERT(0);
93
94found:
95 CLIB_MEMORY_BARRIER();
96 pthread_mutex_unlock(&rp->mutex);
97}
98
99
100static u8 * format_svm_flags (u8 * s, va_list * args)
101{
102 uword f = va_arg (*args, uword);
103
104 if (f & SVM_FLAGS_MHEAP)
105 s = format (s, "MHEAP ");
106 if (f & SVM_FLAGS_FILE)
107 s = format (s, "FILE ");
108 if (f & SVM_FLAGS_NODATA)
109 s = format (s, "NODATA ");
110 if (f & SVM_FLAGS_NEED_DATA_INIT)
111 s = format (s, "INIT ");
112
113 return (s);
114}
115
116static u8 * format_svm_size (u8 * s, va_list * args)
117{
118 uword size = va_arg (*args, uword);
119
Damjan Marion2c29d752015-12-18 10:26:56 +0100120 if (size >= (1<<20)) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121 s = format (s, "(%d mb)", size >> 20);
Damjan Marion2c29d752015-12-18 10:26:56 +0100122 } else if (size >= (1<<10)) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700123 s = format (s, "(%d kb)", size >> 10);
124 } else {
125 s = format (s, "(%d bytes)", size);
126 }
127 return (s);
128}
129
130u8 * format_svm_region (u8 * s, va_list * args)
131{
132 svm_region_t *rp = va_arg (*args, svm_region_t *);
133 int verbose = va_arg (*args, int);
134 int i;
135 uword lo, hi;
136
137 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
138 rp->region_name, rp->virtual_base,
139 rp->virtual_size, format_svm_size, rp->virtual_size);
140 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
141 rp->user_ctx, rp->bitmap_size);
142
143 if (verbose) {
144 s = format (s, " flags: 0x%x %U\n", rp->flags,
145 format_svm_flags, rp->flags);
146 s = format (s,
147 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
148 rp->region_heap, rp->data_base, rp->data_heap);
149 }
150
151 s = format (s, " %d clients, pids: ",
152 vec_len(rp->client_pids));
153
154 for (i = 0; i < vec_len(rp->client_pids); i++)
155 s = format (s, "%d ", rp->client_pids[i]);
156
157 s = format (s, "\n");
158
159 if (verbose) {
160 lo = hi = ~0;
161
162 s = format (s, " VM in use: ");
163
164 for (i = 0; i < rp->bitmap_size; i++) {
165 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) {
166 if (lo == ~0) {
167 hi = lo = rp->virtual_base + i*MMAP_PAGESIZE;
168 } else {
169 hi = rp->virtual_base + i*MMAP_PAGESIZE;
170 }
171 } else {
172 if (lo != ~0) {
173 hi = rp->virtual_base + i*MMAP_PAGESIZE -1;
174 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
175 (hi - lo)>>10);
176 lo = hi = ~0;
177 }
178 }
179 }
180 s = format (s, " rgn heap stats: %U", format_mheap,
181 rp->region_heap, 0);
182 if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap) {
183 s = format (s, "\n data heap stats: %U", format_mheap,
184 rp->data_heap, 1);
185 }
186 s = format (s, "\n");
187 }
188
189 return(s);
190}
191
192/*
193 * rnd_pagesize
194 * Round to a pagesize multiple, presumably 4k works
195 */
196static unsigned int rnd_pagesize(unsigned int size)
197{
198 unsigned int rv;
199
200 rv = (size + (MMAP_PAGESIZE-1)) & ~(MMAP_PAGESIZE-1);
201 return(rv);
202}
203
204/*
205 * svm_data_region_setup
206 */
207static int svm_data_region_create (svm_map_region_args_t *a,
208 svm_region_t *rp)
209{
210 int fd;
211 u8 junk = 0;
212 uword map_size;
213
214 map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
215
216 if (a->flags & SVM_FLAGS_FILE) {
217 struct stat statb;
218
219 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
220
221 if (fd < 0) {
222 clib_unix_warning ("open");
223 return -1;
224 }
225
226 if (fstat(fd, &statb) < 0) {
227 clib_unix_warning("fstat");
228 return -2;
229 }
230
231 if (statb.st_mode & S_IFREG) {
232 if (statb.st_size == 0) {
233 lseek(fd, map_size, SEEK_SET);
234 if (write(fd, &junk, 1) != 1)
235 clib_unix_warning ("set region size");
236 } else {
237 map_size = rnd_pagesize (statb.st_size);
238 }
239 } else {
240 map_size = a->backing_mmap_size;
241 }
242
243 ASSERT(map_size <= rp->virtual_size -
244 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
245
246 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
247 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
248 clib_unix_warning("mmap");
249 return -3;
250 }
251 close(fd);
252 rp->backing_file = (char *) format(0, "%s\0", a->backing_file);
253 rp->flags |= SVM_FLAGS_FILE;
254 }
255
256 if (a->flags & SVM_FLAGS_MHEAP) {
257 rp->data_heap =
258 mheap_alloc_with_flags ((void *)(rp->data_base), map_size,
259 MHEAP_FLAG_DISABLE_VM);
260 rp->flags |= SVM_FLAGS_MHEAP;
261 }
262 return 0;
263}
264
265static int svm_data_region_map (svm_map_region_args_t *a,
266 svm_region_t *rp)
267{
268 int fd;
269 u8 junk = 0;
270 uword map_size;
271 struct stat statb;
272
273 map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
274
275 if (a->flags & SVM_FLAGS_FILE) {
276
277 fd = open (a->backing_file, O_RDWR, 0777);
278
279 if (fd < 0) {
280 clib_unix_warning ("open");
281 return -1;
282 }
283
284 if (fstat(fd, &statb) < 0) {
285 clib_unix_warning("fstat");
286 return -2;
287 }
288
289 if (statb.st_mode & S_IFREG) {
290 if (statb.st_size == 0) {
291 lseek(fd, map_size, SEEK_SET);
292 if (write(fd, &junk, 1) != 1)
293 clib_unix_warning ("set region size");
294 } else {
295 map_size = rnd_pagesize (statb.st_size);
296 }
297 } else {
298 map_size = a->backing_mmap_size;
299 }
300
301 ASSERT(map_size <= rp->virtual_size
302 - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
303
304 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
305 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
306 clib_unix_warning("mmap");
307 return -3;
308 }
309 close(fd);
310 }
311 return 0;
312}
313
314u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t *a)
315{
316 u8 *path;
317 u8 *shm_name;
318 u8 *split_point;
319 u8 *mkdir_arg = 0;
320 int root_path_offset = 0;
321 int name_offset = 0;
322
323 if (a->root_path) {
324 /* Tolerate present or absent slashes */
325 if (a->root_path[0] == '/')
326 root_path_offset++;
327
328 /* create the root_path under /dev/shm
329 iterate through path creating directories */
330
331 path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
332 split_point = path+1;
333 vec_add1(mkdir_arg, '-');
334
335 while (*split_point) {
336 while (*split_point && *split_point != '/') {
337 vec_add1 (mkdir_arg, *split_point);
338 split_point++;
339 }
340 vec_add1 (mkdir_arg, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700341
342 /* ready to descend another level */
343 mkdir_arg[vec_len(mkdir_arg)-1] = '-';
344 split_point++;
345 }
346 vec_free(mkdir_arg);
347 vec_free(path);
348
349 if (a->name[0] == '/')
350 name_offset = 1;
351
352 shm_name = format (0, "/%s-%s%c", a->root_path,
353 &a->name[name_offset], 0);
354 }
355 else
356 shm_name = format (0, "%s%c", a->name, 0);
357 return (shm_name);
358}
359
360/*
361 * svm_map_region
362 */
363void *svm_map_region (svm_map_region_args_t *a)
364{
365 int svm_fd;
366 svm_region_t *rp;
367 pthread_mutexattr_t attr;
368 pthread_condattr_t cattr;
369 int deadman=0;
370 u8 junk = 0;
371 void *oldheap;
372 int overhead_space;
373 int rv;
374 uword data_base;
375 int nbits, words, bit;
376 int pid_holding_region_lock;
377 u8 *shm_name;
378 int dead_region_recovery = 0;
379 int time_left;
380 struct stat stat;
381 struct timespec ts, tsrem;
382
383 if (CLIB_DEBUG > 1)
384 clib_warning ("[%d] map region %s", getpid(), a->name);
385
386 ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
387 ASSERT(a->name);
388
389 shm_name = shm_name_from_svm_map_region_args (a);
390
391 svm_fd = shm_open((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
392
393 if (svm_fd >= 0) {
Dave Barach16c75df2016-05-31 14:05:46 -0400394 if (fchmod (svm_fd, 0770) < 0)
395 clib_unix_warning ("segment chmod");
396 /* This turns out to fail harmlessly if the client starts first */
397 if (fchown (svm_fd, a->uid, a->gid) < 0)
398 clib_unix_warning ("segment chown [ok if client starts first]");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700399
400 vec_free(shm_name);
401
402 lseek(svm_fd, a->size, SEEK_SET);
403 if (write(svm_fd, &junk, 1) != 1)
404 clib_warning ("set region size");
405
406 rp = mmap((void *)a->baseva, a->size,
407 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
408
409 if (rp == (svm_region_t *) MAP_FAILED) {
410 clib_unix_warning ("mmap create");
411 return (0);
412 }
413 close(svm_fd);
414 memset(rp, 0, sizeof(*rp));
415
416 if (pthread_mutexattr_init(&attr))
417 clib_unix_warning("mutexattr_init");
418
419 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED))
420 clib_unix_warning("mutexattr_setpshared");
421
422 if (pthread_mutex_init(&rp->mutex, &attr))
423 clib_unix_warning("mutex_init");
424
425 if (pthread_mutexattr_destroy(&attr))
426 clib_unix_warning("mutexattr_destroy");
427
428 if (pthread_condattr_init(&cattr))
429 clib_unix_warning("condattr_init");
430
431 if (pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED))
432 clib_unix_warning("condattr_setpshared");
433
434 if (pthread_cond_init(&rp->condvar, &cattr))
435 clib_unix_warning("cond_init");
436
437 if(pthread_condattr_destroy(&cattr))
438 clib_unix_warning("condattr_destroy");
439
440 region_lock (rp, 1);
441
442 rp->virtual_base = a->baseva;
443 rp->virtual_size = a->size;
444
445 rp->region_heap =
446 mheap_alloc_with_flags ((void *)(a->baseva+MMAP_PAGESIZE),
447 SVM_PVT_MHEAP_SIZE,
448 MHEAP_FLAG_DISABLE_VM);
449 oldheap = svm_push_pvt_heap(rp);
450
451 rp->region_name = (char *)format (0, "%s%c", a->name, 0);
452 vec_add1(rp->client_pids, getpid());
453
454 nbits = rp->virtual_size / MMAP_PAGESIZE;
455
456 ASSERT (nbits > 0);
457 rp->bitmap_size = nbits;
458 words = (nbits + BITS(uword)-1) / BITS(uword);
459 vec_validate (rp->bitmap, words-1);
460
461 overhead_space = MMAP_PAGESIZE /* header */ +
462 SVM_PVT_MHEAP_SIZE;
463
464 bit = 0;
465 data_base = (uword)rp->virtual_base;
466
467 if (a->flags & SVM_FLAGS_NODATA)
468 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
469
470 do {
471 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
472 bit++;
473 overhead_space -= MMAP_PAGESIZE;
474 data_base += MMAP_PAGESIZE;
475 } while (overhead_space > 0);
476
477 rp->data_base = (void *)data_base;
478
479 /*
480 * Note: although the POSIX spec guarantees that only one
481 * process enters this block, we have to play games
482 * to hold off clients until e.g. the mutex is ready
483 */
484 rp->version = SVM_VERSION;
485
486 /* setup the data portion of the region */
487
488 rv = svm_data_region_create (a, rp);
489 if (rv) {
490 clib_warning ("data_region_create: %d", rv);
491 }
492
493 region_unlock(rp);
494
495 svm_pop_heap(oldheap);
496
497 return ((void *) rp);
498 } else {
499 svm_fd = shm_open((char *)shm_name, O_RDWR, 0777);
500
501 vec_free(shm_name);
502
503 if (svm_fd < 0) {
504 perror("svm_region_map(mmap open)");
505 return (0);
506 }
507
508 time_left = 20;
509 while (1) {
510 if (0 != fstat(svm_fd, &stat)) {
511 clib_warning("fstat failed: %d", errno);
512 return (0);
513 }
514 if (stat.st_size > 0) {
515 break;
516 }
517 if (0 == time_left) {
518 clib_warning("waiting for resize of shm file timed out");
519 return (0);
520 }
521 ts.tv_sec = 0;
522 ts.tv_nsec = 100000000;
523 while (nanosleep(&ts, &tsrem) < 0)
524 ts = tsrem;
525 time_left--;
526 }
527
528 rp = mmap(0, MMAP_PAGESIZE,
529 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
530
531 if (rp == (svm_region_t *) MAP_FAILED) {
532 close(svm_fd);
533 clib_warning("mmap");
534 return (0);
535 }
536 /*
537 * We lost the footrace to create this region; make sure
538 * the winner has crossed the finish line.
539 */
540 while (rp->version == 0 && deadman++ < 5) {
541 sleep(1);
542 }
543
544 /*
545 * <bleep>-ed?
546 */
547 if (rp->version == 0) {
548 close(svm_fd);
549 munmap(rp, a->size);
550 clib_warning("rp->version %d not %d", rp->version,
551 SVM_VERSION);
552 return (0);
553 }
554 /* Remap now that the region has been placed */
555 a->baseva = rp->virtual_base;
556 a->size = rp->virtual_size;
557 munmap(rp, MMAP_PAGESIZE);
558
559 rp = (void *) mmap ((void *)a->baseva, a->size,
560 PROT_READ | PROT_WRITE,
561 MAP_SHARED | MAP_FIXED, svm_fd, 0);
562 if ((uword)rp == (uword)MAP_FAILED) {
563 clib_unix_warning ("mmap");
564 return (0);
565 }
566
567 if ((uword) rp != rp->virtual_base) {
568 clib_warning("mmap botch");
569 }
570
571 /*
572 * Try to fix the region mutex if it is held by
573 * a dead process
574 */
575 pid_holding_region_lock = rp->mutex_owner_pid;
576 if (pid_holding_region_lock &&
577 kill (pid_holding_region_lock, 0) < 0) {
578 clib_warning (
579 "region %s mutex held by dead pid %d, tag %d, force unlock",
580 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
581 /* owner pid is nonexistent */
582 rp->mutex.__data.__owner = 0;
583 rp->mutex.__data.__lock = 0;
584 dead_region_recovery = 1;
585 }
586
587 if (dead_region_recovery)
588 clib_warning ("recovery: attempt to re-lock region");
589
590 region_lock(rp, 2);
591 oldheap = svm_push_pvt_heap (rp);
592 vec_add1(rp->client_pids, getpid());
593
594 if (dead_region_recovery)
595 clib_warning ("recovery: attempt svm_data_region_map");
596
597 rv = svm_data_region_map (a, rp);
598 if (rv) {
599 clib_warning ("data_region_map: %d", rv);
600 }
601
602 if (dead_region_recovery)
603 clib_warning ("unlock and continue");
604
605 region_unlock(rp);
606
607 svm_pop_heap(oldheap);
608
609 return ((void *) rp);
610
611 }
612 return 0; /* NOTREACHED */
613}
614
615static void svm_mutex_cleanup (void)
616{
617 int i;
618 for (i = 0; i < nheld; i++) {
619 pthread_mutex_unlock (mutexes_held[i]);
620 }
621}
622
Dave Barach16c75df2016-05-31 14:05:46 -0400623static void svm_region_init_internal (char *root_path, int uid, int gid)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700624{
625 svm_region_t *rp;
Dave Barach16c75df2016-05-31 14:05:46 -0400626 svm_map_region_args_t _a, *a=&_a;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700627 u64 ticks = clib_cpu_time_now();
628 uword randomize_baseva;
629
630 /* guard against klutz calls */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700631 if (root_rp)
632 return;
633
Dave Barach16c75df2016-05-31 14:05:46 -0400634 root_rp_refcount++;
635
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636 atexit(svm_mutex_cleanup);
637
638 /* Randomize the shared-VM base at init time */
Dave Barach95bb8832015-12-12 10:37:00 -0500639 if (MMAP_PAGESIZE <= (4<<10))
640 randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
641 else
642 randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700643
Dave Barach16c75df2016-05-31 14:05:46 -0400644 memset (a, 0, sizeof (*a));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700645 a->root_path = root_path;
646 a->name = SVM_GLOBAL_REGION_NAME;
647 a->baseva = SVM_GLOBAL_REGION_BASEVA + randomize_baseva;
648 a->size = SVM_GLOBAL_REGION_SIZE;
649 a->flags = SVM_FLAGS_NODATA;
Dave Barach16c75df2016-05-31 14:05:46 -0400650 a->uid = uid;
651 a->gid = gid;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700652
653 rp = svm_map_region (a);
654 ASSERT(rp);
655
656 region_lock(rp, 3);
657
658 /* Set up the main region data structures */
659 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT) {
660 svm_main_region_t *mp = 0;
661 void *oldheap;
662
663 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
664
665 oldheap = svm_push_pvt_heap (rp);
666 vec_validate (mp, 0);
667 mp->name_hash = hash_create_string (0, sizeof(uword));
668 mp->root_path = root_path
669 ? format (0, "%s%c", root_path, 0) : 0 ;
670 rp->data_base = mp;
671 svm_pop_heap (oldheap);
672 }
673 region_unlock(rp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700674 root_rp = rp;
675}
676
677void svm_region_init (void)
678{
Dave Barach16c75df2016-05-31 14:05:46 -0400679 svm_region_init_internal (0, 0 /* uid */, 0 /* gid */);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700680}
681
682void svm_region_init_chroot (char *root_path)
683{
Dave Barach16c75df2016-05-31 14:05:46 -0400684 svm_region_init_internal (root_path, 0 /* uid */, 0 /* gid */);
685}
686
687void svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid)
688{
689 svm_region_init_internal (root_path, uid, gid);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700690}
691
692void *svm_region_find_or_create (svm_map_region_args_t *a)
693{
694 svm_main_region_t *mp;
695 svm_region_t *rp;
696 uword need_nbits;
697 int index, i;
698 void *oldheap;
699 uword *p;
700 u8 *name;
701 svm_subregion_t *subp;
702
703 ASSERT(root_rp);
704
705 a->size += MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE;
706 a->size = rnd_pagesize(a->size);
707
708 region_lock (root_rp, 4);
709 oldheap = svm_push_pvt_heap(root_rp);
710 mp = root_rp->data_base;
711
712 ASSERT(mp);
713
714 /* Map the named region from the correct chroot environment */
715 a->root_path = (char *) mp->root_path;
716
717 /*
718 * See if this region is already known. If it is, we're
719 * almost done...
720 */
721 p = hash_get_mem (mp->name_hash, a->name);
722
723 if (p) {
724 rp = svm_map_region (a);
725 region_unlock(root_rp);
726 svm_pop_heap (oldheap);
727 return rp;
728 }
729
730 /* Create the region. */
731 ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
732
733 need_nbits = a->size / MMAP_PAGESIZE;
734
735 index = 1; /* $$$ fixme, figure out how many bit to really skip */
736
737 /*
738 * Scan the virtual space allocation bitmap, looking for a large
739 * enough chunk
740 */
741 do {
742 if (clib_bitmap_get_no_check(root_rp->bitmap, index) == 0) {
743 for (i = 0; i < (need_nbits-1); i++) {
744 if (clib_bitmap_get_no_check(root_rp->bitmap,
745 index+i) == 1) {
746 index = index + i;
747 goto next;
748 }
749 }
750 break;
751 }
752 index++;
753 next:;
754 } while (index < root_rp->bitmap_size);
755
756 /* Completely out of VM? */
757 if (index >= root_rp->bitmap_size) {
758 clib_warning("region %s: not enough VM to allocate 0x%x",
759 root_rp->region_name, a->size);
760 svm_pop_heap (oldheap);
761 region_unlock (root_rp);
762 return 0;
763 }
764
765 /*
766 * Mark virtual space allocated
767 */
768#if CLIB_DEBUG > 1
769 clib_warning ("set %d bits at index %d", need_nbits, index);
770#endif
771
772 for (i = 0; i < need_nbits; i++) {
773 clib_bitmap_set_no_check (root_rp->bitmap, index+i, 1);
774 }
775
776 /* Place this region where it goes... */
777 a->baseva = root_rp->virtual_base + index*MMAP_PAGESIZE;
778
779 rp = svm_map_region (a);
780
781 pool_get (mp->subregions, subp);
782 name = format (0, "%s%c", a->name, 0);
783 subp->subregion_name = name;
784
785 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
786
787 svm_pop_heap (oldheap);
788
789 region_unlock (root_rp);
790
791 return (rp);
792}
793
794/*
795 * svm_region_unmap
796 *
797 * Let go of the indicated region. If the calling process
798 * is the last customer, throw it away completely.
799 * The root region mutex guarantees atomicity with respect to
800 * a new region client showing up at the wrong moment.
801 */
802void svm_region_unmap (void *rp_arg)
803{
804 int i, mypid = getpid();
805 int nclients_left;
806 void *oldheap;
807 uword virtual_base, virtual_size;
808 svm_region_t *rp = rp_arg;
809 char *name;
810
811 /*
812 * If we take a signal while holding one or more shared-memory
813 * mutexes, we may end up back here from an otherwise
814 * benign exit handler. Bail out to avoid a recursive
815 * mutex screw-up.
816 */
817 if (nheld)
818 return;
819
820 ASSERT(rp);
821 ASSERT(root_rp);
822
823 if (CLIB_DEBUG > 1)
824 clib_warning ("[%d] unmap region %s", getpid(), rp->region_name);
825
826 region_lock (root_rp, 5);
827 region_lock (rp, 6);
828
829 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
830
831 /* Remove the caller from the list of mappers */
832 for (i = 0; i < vec_len(rp->client_pids); i++) {
833 if (rp->client_pids[i] == mypid) {
834 vec_delete (rp->client_pids, 1, i);
835 goto found;
836 }
837 }
838 clib_warning("pid %d AWOL", mypid);
839
840 found:
841
842 svm_pop_heap (oldheap);
843
844 nclients_left = vec_len(rp->client_pids);
845 virtual_base = rp->virtual_base;
846 virtual_size = rp->virtual_size;
847
848 if (nclients_left == 0) {
849 int index, nbits, i;
850 svm_main_region_t *mp;
851 uword *p;
852 svm_subregion_t *subp;
853
854 /* Kill the region, last guy on his way out */
855
856 oldheap = svm_push_pvt_heap (root_rp);
857 name = vec_dup (rp->region_name);
858
859 virtual_base = rp->virtual_base;
860 virtual_size = rp->virtual_size;
861
862 /* Figure out which bits to clear in the root region bitmap */
863 index = (virtual_base - root_rp->virtual_base)
864 / MMAP_PAGESIZE;
865
866 nbits = (virtual_size + MMAP_PAGESIZE - 1)
867 / MMAP_PAGESIZE;
868
869#if CLIB_DEBUG > 1
870 clib_warning ("clear %d bits at index %d", nbits, index);
871#endif
872 /* Give back the allocated VM */
873 for (i = 0; i < nbits; i++) {
874 clib_bitmap_set_no_check (root_rp->bitmap, index+i, 0);
875 }
876
877 mp = root_rp->data_base;
878
879 p = hash_get_mem (mp->name_hash, name);
880
881 /* Better never happen ... */
882 if (p == NULL) {
883 region_unlock (rp);
884 region_unlock (root_rp);
885 svm_pop_heap (oldheap);
886 clib_warning ("Region name '%s' not found?", name);
887 return;
888 }
889
890 /* Remove from the root region subregion pool */
891 subp = mp->subregions + p[0];
892 pool_put (mp->subregions, subp);
893
894 hash_unset_mem (mp->name_hash, name);
895
896 vec_free(name);
897
898 region_unlock (rp);
899 shm_unlink(rp->region_name);
900 munmap ((void *)virtual_base, virtual_size);
901 region_unlock (root_rp);
902 svm_pop_heap (oldheap);
903 return;
904 }
905
906 region_unlock(rp);
907 region_unlock(root_rp);
908
909 munmap ((void *)virtual_base, virtual_size);
910}
911
912/*
913 * svm_region_exit
914 * There is no clean way to unlink the
915 * root region when all clients go away,
916 * so remove the pid entry and call it a day.
917 */
918void svm_region_exit ()
919{
920 void *oldheap;
921 int i, mypid = getpid();
922 uword virtual_base, virtual_size;
923
924 /* It felt so nice we did it twice... */
925 if (root_rp == 0)
926 return;
927
928 if (--root_rp_refcount > 0)
929 return;
930
931 /*
932 * If we take a signal while holding one or more shared-memory
933 * mutexes, we may end up back here from an otherwise
934 * benign exit handler. Bail out to avoid a recursive
935 * mutex screw-up.
936 */
937 if (nheld)
938 return;
939
940 region_lock(root_rp, 7);
941 oldheap = svm_push_pvt_heap (root_rp);
942
943 virtual_base = root_rp->virtual_base;
944 virtual_size = root_rp->virtual_size;
945
946 for (i = 0; i < vec_len(root_rp->client_pids); i++) {
947 if (root_rp->client_pids[i] == mypid) {
948 vec_delete (root_rp->client_pids, 1, i);
949 goto found;
950 }
951 }
952 clib_warning("pid %d AWOL", mypid);
953
954 found:
955
956 region_unlock(root_rp);
957 svm_pop_heap (oldheap);
958
959 root_rp = 0;
960 munmap ((void *)virtual_base, virtual_size);
961}
962
963void svm_client_scan_this_region_nolock (svm_region_t *rp)
964{
965 int j;
966 int mypid = getpid();
967 void *oldheap;
968
969 for (j = 0; j < vec_len(rp->client_pids); j++) {
970 if (mypid == rp->client_pids[j])
971 continue;
972 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0)) {
973 clib_warning ("%s: cleanup ghost pid %d",
974 rp->region_name, rp->client_pids[j]);
975 /* nb: client vec in rp->region_heap */
976 oldheap = svm_push_pvt_heap (rp);
977 vec_delete (rp->client_pids, 1, j);
978 j--;
979 svm_pop_heap (oldheap);
980 }
981 }
982}
983
984
985/*
986 * Scan svm regions for dead clients
987 */
988void svm_client_scan(char *root_path)
989{
990 int i, j;
991 svm_main_region_t *mp;
992 svm_map_region_args_t *a = 0;
993 svm_region_t *root_rp;
994 svm_region_t *rp;
995 svm_subregion_t *subp;
996 u8 *name=0;
997 u8 ** svm_names=0;
998 void *oldheap;
999 int mypid = getpid();
1000
1001 vec_validate (a, 0);
1002
1003 svm_region_init_chroot(root_path);
1004
1005 root_rp = svm_get_root_rp();
1006
1007 pthread_mutex_lock (&root_rp->mutex);
1008
1009 mp = root_rp->data_base;
1010
1011 for (j = 0; j < vec_len (root_rp->client_pids); j++) {
1012 if (mypid == root_rp->client_pids[j])
1013 continue;
1014 if (root_rp->client_pids[j]
1015 && (kill (root_rp->client_pids[j], 0) < 0)) {
1016 clib_warning ("%s: cleanup ghost pid %d",
1017 root_rp->region_name, root_rp->client_pids[j]);
1018 /* nb: client vec in root_rp->region_heap */
1019 oldheap = svm_push_pvt_heap (root_rp);
1020 vec_delete (root_rp->client_pids, 1, j);
1021 j--;
1022 svm_pop_heap (oldheap);
1023 }
1024 }
1025
1026 /*
1027 * Snapshoot names, can't hold root rp mutex across
1028 * find_or_create.
1029 */
1030 pool_foreach (subp, mp->subregions, ({
1031 name = vec_dup (subp->subregion_name);
1032 vec_add1(svm_names, name);
1033 }));
1034
1035 pthread_mutex_unlock (&root_rp->mutex);
1036
1037 for (i = 0; i < vec_len(svm_names); i++) {
1038 vec_validate(a, 0);
1039 a->root_path = root_path;
1040 a->name = (char *) svm_names[i];
1041 rp = svm_region_find_or_create (a);
1042 if (rp) {
1043 pthread_mutex_lock (&rp->mutex);
1044
1045 svm_client_scan_this_region_nolock (rp);
1046
1047 pthread_mutex_unlock (&rp->mutex);
1048 svm_region_unmap (rp);
1049 vec_free(svm_names[i]);
1050 }
1051 vec_free (a);
1052 }
1053 vec_free(svm_names);
1054
1055 svm_region_exit ();
1056
1057 vec_free (a);
1058}