blob: c629f932008be41441dcfd9bc3e722052161a0be [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 *------------------------------------------------------------------
3 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
4 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
39#include <vppinfra/mheap.h>
40#include <vppinfra/heap.h>
41#include <vppinfra/pool.h>
42#include <vppinfra/format.h>
43
44#include "svm.h"
45
46static svm_region_t *root_rp;
47static int root_rp_refcount;
48
49#define MAXLOCK 2
50static pthread_mutex_t *mutexes_held [MAXLOCK];
51static int nheld;
52
53svm_region_t *svm_get_root_rp (void)
54{
55 return root_rp;
56}
57
58#define MUTEX_DEBUG
59
60static void region_lock(svm_region_t *rp, int tag)
61{
62 pthread_mutex_lock(&rp->mutex);
63#ifdef MUTEX_DEBUG
64 rp->mutex_owner_pid = getpid();
65 rp->mutex_owner_tag = tag;
66#endif
67 ASSERT(nheld < MAXLOCK);
68 /*
69 * Keep score of held mutexes so we can try to exit
70 * cleanly if the world comes to an end at the worst possible
71 * moment
72 */
73 mutexes_held [nheld++] = &rp->mutex;
74}
75
76static void region_unlock(svm_region_t *rp)
77{
78 int i,j;
79#ifdef MUTEX_DEBUG
80 rp->mutex_owner_pid = 0;
81 rp->mutex_owner_tag = 0;
82#endif
83
84 for (i = nheld-1; i >= 0; i--) {
85 if (mutexes_held[i] == &rp->mutex) {
86 for (j = i; j < MAXLOCK-1; j++)
87 mutexes_held[j] = mutexes_held[j+1];
88 nheld--;
89 goto found;
90 }
91 }
92 ASSERT(0);
93
94found:
95 CLIB_MEMORY_BARRIER();
96 pthread_mutex_unlock(&rp->mutex);
97}
98
99
100static u8 * format_svm_flags (u8 * s, va_list * args)
101{
102 uword f = va_arg (*args, uword);
103
104 if (f & SVM_FLAGS_MHEAP)
105 s = format (s, "MHEAP ");
106 if (f & SVM_FLAGS_FILE)
107 s = format (s, "FILE ");
108 if (f & SVM_FLAGS_NODATA)
109 s = format (s, "NODATA ");
110 if (f & SVM_FLAGS_NEED_DATA_INIT)
111 s = format (s, "INIT ");
112
113 return (s);
114}
115
116static u8 * format_svm_size (u8 * s, va_list * args)
117{
118 uword size = va_arg (*args, uword);
119
120 if (size >= (1>>20)) {
121 s = format (s, "(%d mb)", size >> 20);
122 } else if (size >= (1>>10)) {
123 s = format (s, "(%d kb)", size >> 10);
124 } else {
125 s = format (s, "(%d bytes)", size);
126 }
127 return (s);
128}
129
130u8 * format_svm_region (u8 * s, va_list * args)
131{
132 svm_region_t *rp = va_arg (*args, svm_region_t *);
133 int verbose = va_arg (*args, int);
134 int i;
135 uword lo, hi;
136
137 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
138 rp->region_name, rp->virtual_base,
139 rp->virtual_size, format_svm_size, rp->virtual_size);
140 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
141 rp->user_ctx, rp->bitmap_size);
142
143 if (verbose) {
144 s = format (s, " flags: 0x%x %U\n", rp->flags,
145 format_svm_flags, rp->flags);
146 s = format (s,
147 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
148 rp->region_heap, rp->data_base, rp->data_heap);
149 }
150
151 s = format (s, " %d clients, pids: ",
152 vec_len(rp->client_pids));
153
154 for (i = 0; i < vec_len(rp->client_pids); i++)
155 s = format (s, "%d ", rp->client_pids[i]);
156
157 s = format (s, "\n");
158
159 if (verbose) {
160 lo = hi = ~0;
161
162 s = format (s, " VM in use: ");
163
164 for (i = 0; i < rp->bitmap_size; i++) {
165 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) {
166 if (lo == ~0) {
167 hi = lo = rp->virtual_base + i*MMAP_PAGESIZE;
168 } else {
169 hi = rp->virtual_base + i*MMAP_PAGESIZE;
170 }
171 } else {
172 if (lo != ~0) {
173 hi = rp->virtual_base + i*MMAP_PAGESIZE -1;
174 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
175 (hi - lo)>>10);
176 lo = hi = ~0;
177 }
178 }
179 }
180 s = format (s, " rgn heap stats: %U", format_mheap,
181 rp->region_heap, 0);
182 if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap) {
183 s = format (s, "\n data heap stats: %U", format_mheap,
184 rp->data_heap, 1);
185 }
186 s = format (s, "\n");
187 }
188
189 return(s);
190}
191
192/*
193 * rnd_pagesize
194 * Round to a pagesize multiple, presumably 4k works
195 */
196static unsigned int rnd_pagesize(unsigned int size)
197{
198 unsigned int rv;
199
200 rv = (size + (MMAP_PAGESIZE-1)) & ~(MMAP_PAGESIZE-1);
201 return(rv);
202}
203
204/*
205 * svm_data_region_setup
206 */
207static int svm_data_region_create (svm_map_region_args_t *a,
208 svm_region_t *rp)
209{
210 int fd;
211 u8 junk = 0;
212 uword map_size;
213
214 map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
215
216 if (a->flags & SVM_FLAGS_FILE) {
217 struct stat statb;
218
219 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
220
221 if (fd < 0) {
222 clib_unix_warning ("open");
223 return -1;
224 }
225
226 if (fstat(fd, &statb) < 0) {
227 clib_unix_warning("fstat");
228 return -2;
229 }
230
231 if (statb.st_mode & S_IFREG) {
232 if (statb.st_size == 0) {
233 lseek(fd, map_size, SEEK_SET);
234 if (write(fd, &junk, 1) != 1)
235 clib_unix_warning ("set region size");
236 } else {
237 map_size = rnd_pagesize (statb.st_size);
238 }
239 } else {
240 map_size = a->backing_mmap_size;
241 }
242
243 ASSERT(map_size <= rp->virtual_size -
244 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
245
246 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
247 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
248 clib_unix_warning("mmap");
249 return -3;
250 }
251 close(fd);
252 rp->backing_file = (char *) format(0, "%s\0", a->backing_file);
253 rp->flags |= SVM_FLAGS_FILE;
254 }
255
256 if (a->flags & SVM_FLAGS_MHEAP) {
257 rp->data_heap =
258 mheap_alloc_with_flags ((void *)(rp->data_base), map_size,
259 MHEAP_FLAG_DISABLE_VM);
260 rp->flags |= SVM_FLAGS_MHEAP;
261 }
262 return 0;
263}
264
265static int svm_data_region_map (svm_map_region_args_t *a,
266 svm_region_t *rp)
267{
268 int fd;
269 u8 junk = 0;
270 uword map_size;
271 struct stat statb;
272
273 map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
274
275 if (a->flags & SVM_FLAGS_FILE) {
276
277 fd = open (a->backing_file, O_RDWR, 0777);
278
279 if (fd < 0) {
280 clib_unix_warning ("open");
281 return -1;
282 }
283
284 if (fstat(fd, &statb) < 0) {
285 clib_unix_warning("fstat");
286 return -2;
287 }
288
289 if (statb.st_mode & S_IFREG) {
290 if (statb.st_size == 0) {
291 lseek(fd, map_size, SEEK_SET);
292 if (write(fd, &junk, 1) != 1)
293 clib_unix_warning ("set region size");
294 } else {
295 map_size = rnd_pagesize (statb.st_size);
296 }
297 } else {
298 map_size = a->backing_mmap_size;
299 }
300
301 ASSERT(map_size <= rp->virtual_size
302 - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
303
304 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
305 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
306 clib_unix_warning("mmap");
307 return -3;
308 }
309 close(fd);
310 }
311 return 0;
312}
313
314u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t *a)
315{
316 u8 *path;
317 u8 *shm_name;
318 u8 *split_point;
319 u8 *mkdir_arg = 0;
320 int root_path_offset = 0;
321 int name_offset = 0;
322
323 if (a->root_path) {
324 /* Tolerate present or absent slashes */
325 if (a->root_path[0] == '/')
326 root_path_offset++;
327
328 /* create the root_path under /dev/shm
329 iterate through path creating directories */
330
331 path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
332 split_point = path+1;
333 vec_add1(mkdir_arg, '-');
334
335 while (*split_point) {
336 while (*split_point && *split_point != '/') {
337 vec_add1 (mkdir_arg, *split_point);
338 split_point++;
339 }
340 vec_add1 (mkdir_arg, 0);
341 if (mkdir ((char *) mkdir_arg, 0777) < 0 && errno != EEXIST)
342 clib_unix_warning ("mkdir %s", mkdir_arg);
343
344 /* ready to descend another level */
345 mkdir_arg[vec_len(mkdir_arg)-1] = '-';
346 split_point++;
347 }
348 vec_free(mkdir_arg);
349 vec_free(path);
350
351 if (a->name[0] == '/')
352 name_offset = 1;
353
354 shm_name = format (0, "/%s-%s%c", a->root_path,
355 &a->name[name_offset], 0);
356 }
357 else
358 shm_name = format (0, "%s%c", a->name, 0);
359 return (shm_name);
360}
361
362/*
363 * svm_map_region
364 */
365void *svm_map_region (svm_map_region_args_t *a)
366{
367 int svm_fd;
368 svm_region_t *rp;
369 pthread_mutexattr_t attr;
370 pthread_condattr_t cattr;
371 int deadman=0;
372 u8 junk = 0;
373 void *oldheap;
374 int overhead_space;
375 int rv;
376 uword data_base;
377 int nbits, words, bit;
378 int pid_holding_region_lock;
379 u8 *shm_name;
380 int dead_region_recovery = 0;
381 int time_left;
382 struct stat stat;
383 struct timespec ts, tsrem;
384
385 if (CLIB_DEBUG > 1)
386 clib_warning ("[%d] map region %s", getpid(), a->name);
387
388 ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
389 ASSERT(a->name);
390
391 shm_name = shm_name_from_svm_map_region_args (a);
392
393 svm_fd = shm_open((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
394
395 if (svm_fd >= 0) {
396
397 vec_free(shm_name);
398
399 lseek(svm_fd, a->size, SEEK_SET);
400 if (write(svm_fd, &junk, 1) != 1)
401 clib_warning ("set region size");
402
403 rp = mmap((void *)a->baseva, a->size,
404 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
405
406 if (rp == (svm_region_t *) MAP_FAILED) {
407 clib_unix_warning ("mmap create");
408 return (0);
409 }
410 close(svm_fd);
411 memset(rp, 0, sizeof(*rp));
412
413 if (pthread_mutexattr_init(&attr))
414 clib_unix_warning("mutexattr_init");
415
416 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED))
417 clib_unix_warning("mutexattr_setpshared");
418
419 if (pthread_mutex_init(&rp->mutex, &attr))
420 clib_unix_warning("mutex_init");
421
422 if (pthread_mutexattr_destroy(&attr))
423 clib_unix_warning("mutexattr_destroy");
424
425 if (pthread_condattr_init(&cattr))
426 clib_unix_warning("condattr_init");
427
428 if (pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED))
429 clib_unix_warning("condattr_setpshared");
430
431 if (pthread_cond_init(&rp->condvar, &cattr))
432 clib_unix_warning("cond_init");
433
434 if(pthread_condattr_destroy(&cattr))
435 clib_unix_warning("condattr_destroy");
436
437 region_lock (rp, 1);
438
439 rp->virtual_base = a->baseva;
440 rp->virtual_size = a->size;
441
442 rp->region_heap =
443 mheap_alloc_with_flags ((void *)(a->baseva+MMAP_PAGESIZE),
444 SVM_PVT_MHEAP_SIZE,
445 MHEAP_FLAG_DISABLE_VM);
446 oldheap = svm_push_pvt_heap(rp);
447
448 rp->region_name = (char *)format (0, "%s%c", a->name, 0);
449 vec_add1(rp->client_pids, getpid());
450
451 nbits = rp->virtual_size / MMAP_PAGESIZE;
452
453 ASSERT (nbits > 0);
454 rp->bitmap_size = nbits;
455 words = (nbits + BITS(uword)-1) / BITS(uword);
456 vec_validate (rp->bitmap, words-1);
457
458 overhead_space = MMAP_PAGESIZE /* header */ +
459 SVM_PVT_MHEAP_SIZE;
460
461 bit = 0;
462 data_base = (uword)rp->virtual_base;
463
464 if (a->flags & SVM_FLAGS_NODATA)
465 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
466
467 do {
468 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
469 bit++;
470 overhead_space -= MMAP_PAGESIZE;
471 data_base += MMAP_PAGESIZE;
472 } while (overhead_space > 0);
473
474 rp->data_base = (void *)data_base;
475
476 /*
477 * Note: although the POSIX spec guarantees that only one
478 * process enters this block, we have to play games
479 * to hold off clients until e.g. the mutex is ready
480 */
481 rp->version = SVM_VERSION;
482
483 /* setup the data portion of the region */
484
485 rv = svm_data_region_create (a, rp);
486 if (rv) {
487 clib_warning ("data_region_create: %d", rv);
488 }
489
490 region_unlock(rp);
491
492 svm_pop_heap(oldheap);
493
494 return ((void *) rp);
495 } else {
496 svm_fd = shm_open((char *)shm_name, O_RDWR, 0777);
497
498 vec_free(shm_name);
499
500 if (svm_fd < 0) {
501 perror("svm_region_map(mmap open)");
502 return (0);
503 }
504
505 time_left = 20;
506 while (1) {
507 if (0 != fstat(svm_fd, &stat)) {
508 clib_warning("fstat failed: %d", errno);
509 return (0);
510 }
511 if (stat.st_size > 0) {
512 break;
513 }
514 if (0 == time_left) {
515 clib_warning("waiting for resize of shm file timed out");
516 return (0);
517 }
518 ts.tv_sec = 0;
519 ts.tv_nsec = 100000000;
520 while (nanosleep(&ts, &tsrem) < 0)
521 ts = tsrem;
522 time_left--;
523 }
524
525 rp = mmap(0, MMAP_PAGESIZE,
526 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
527
528 if (rp == (svm_region_t *) MAP_FAILED) {
529 close(svm_fd);
530 clib_warning("mmap");
531 return (0);
532 }
533 /*
534 * We lost the footrace to create this region; make sure
535 * the winner has crossed the finish line.
536 */
537 while (rp->version == 0 && deadman++ < 5) {
538 sleep(1);
539 }
540
541 /*
542 * <bleep>-ed?
543 */
544 if (rp->version == 0) {
545 close(svm_fd);
546 munmap(rp, a->size);
547 clib_warning("rp->version %d not %d", rp->version,
548 SVM_VERSION);
549 return (0);
550 }
551 /* Remap now that the region has been placed */
552 a->baseva = rp->virtual_base;
553 a->size = rp->virtual_size;
554 munmap(rp, MMAP_PAGESIZE);
555
556 rp = (void *) mmap ((void *)a->baseva, a->size,
557 PROT_READ | PROT_WRITE,
558 MAP_SHARED | MAP_FIXED, svm_fd, 0);
559 if ((uword)rp == (uword)MAP_FAILED) {
560 clib_unix_warning ("mmap");
561 return (0);
562 }
563
564 if ((uword) rp != rp->virtual_base) {
565 clib_warning("mmap botch");
566 }
567
568 /*
569 * Try to fix the region mutex if it is held by
570 * a dead process
571 */
572 pid_holding_region_lock = rp->mutex_owner_pid;
573 if (pid_holding_region_lock &&
574 kill (pid_holding_region_lock, 0) < 0) {
575 clib_warning (
576 "region %s mutex held by dead pid %d, tag %d, force unlock",
577 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
578 /* owner pid is nonexistent */
579 rp->mutex.__data.__owner = 0;
580 rp->mutex.__data.__lock = 0;
581 dead_region_recovery = 1;
582 }
583
584 if (dead_region_recovery)
585 clib_warning ("recovery: attempt to re-lock region");
586
587 region_lock(rp, 2);
588 oldheap = svm_push_pvt_heap (rp);
589 vec_add1(rp->client_pids, getpid());
590
591 if (dead_region_recovery)
592 clib_warning ("recovery: attempt svm_data_region_map");
593
594 rv = svm_data_region_map (a, rp);
595 if (rv) {
596 clib_warning ("data_region_map: %d", rv);
597 }
598
599 if (dead_region_recovery)
600 clib_warning ("unlock and continue");
601
602 region_unlock(rp);
603
604 svm_pop_heap(oldheap);
605
606 return ((void *) rp);
607
608 }
609 return 0; /* NOTREACHED */
610}
611
612static void svm_mutex_cleanup (void)
613{
614 int i;
615 for (i = 0; i < nheld; i++) {
616 pthread_mutex_unlock (mutexes_held[i]);
617 }
618}
619
620static void svm_region_init_internal (char *root_path)
621{
622 svm_region_t *rp;
623 svm_map_region_args_t *a=0;
624 u64 ticks = clib_cpu_time_now();
625 uword randomize_baseva;
626
627 /* guard against klutz calls */
628 root_rp_refcount++;
629 if (root_rp)
630 return;
631
632 atexit(svm_mutex_cleanup);
633
634 /* Randomize the shared-VM base at init time */
635 randomize_baseva = (ticks & 15) * 4096;
636
637 vec_validate(a,0);
638 a->root_path = root_path;
639 a->name = SVM_GLOBAL_REGION_NAME;
640 a->baseva = SVM_GLOBAL_REGION_BASEVA + randomize_baseva;
641 a->size = SVM_GLOBAL_REGION_SIZE;
642 a->flags = SVM_FLAGS_NODATA;
643
644 rp = svm_map_region (a);
645 ASSERT(rp);
646
647 region_lock(rp, 3);
648
649 /* Set up the main region data structures */
650 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT) {
651 svm_main_region_t *mp = 0;
652 void *oldheap;
653
654 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
655
656 oldheap = svm_push_pvt_heap (rp);
657 vec_validate (mp, 0);
658 mp->name_hash = hash_create_string (0, sizeof(uword));
659 mp->root_path = root_path
660 ? format (0, "%s%c", root_path, 0) : 0 ;
661 rp->data_base = mp;
662 svm_pop_heap (oldheap);
663 }
664 region_unlock(rp);
665 vec_free (a);
666 root_rp = rp;
667}
668
669void svm_region_init (void)
670{
671 svm_region_init_internal (0);
672}
673
674void svm_region_init_chroot (char *root_path)
675{
676 svm_region_init_internal (root_path);
677}
678
679void *svm_region_find_or_create (svm_map_region_args_t *a)
680{
681 svm_main_region_t *mp;
682 svm_region_t *rp;
683 uword need_nbits;
684 int index, i;
685 void *oldheap;
686 uword *p;
687 u8 *name;
688 svm_subregion_t *subp;
689
690 ASSERT(root_rp);
691
692 a->size += MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE;
693 a->size = rnd_pagesize(a->size);
694
695 region_lock (root_rp, 4);
696 oldheap = svm_push_pvt_heap(root_rp);
697 mp = root_rp->data_base;
698
699 ASSERT(mp);
700
701 /* Map the named region from the correct chroot environment */
702 a->root_path = (char *) mp->root_path;
703
704 /*
705 * See if this region is already known. If it is, we're
706 * almost done...
707 */
708 p = hash_get_mem (mp->name_hash, a->name);
709
710 if (p) {
711 rp = svm_map_region (a);
712 region_unlock(root_rp);
713 svm_pop_heap (oldheap);
714 return rp;
715 }
716
717 /* Create the region. */
718 ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
719
720 need_nbits = a->size / MMAP_PAGESIZE;
721
722 index = 1; /* $$$ fixme, figure out how many bit to really skip */
723
724 /*
725 * Scan the virtual space allocation bitmap, looking for a large
726 * enough chunk
727 */
728 do {
729 if (clib_bitmap_get_no_check(root_rp->bitmap, index) == 0) {
730 for (i = 0; i < (need_nbits-1); i++) {
731 if (clib_bitmap_get_no_check(root_rp->bitmap,
732 index+i) == 1) {
733 index = index + i;
734 goto next;
735 }
736 }
737 break;
738 }
739 index++;
740 next:;
741 } while (index < root_rp->bitmap_size);
742
743 /* Completely out of VM? */
744 if (index >= root_rp->bitmap_size) {
745 clib_warning("region %s: not enough VM to allocate 0x%x",
746 root_rp->region_name, a->size);
747 svm_pop_heap (oldheap);
748 region_unlock (root_rp);
749 return 0;
750 }
751
752 /*
753 * Mark virtual space allocated
754 */
755#if CLIB_DEBUG > 1
756 clib_warning ("set %d bits at index %d", need_nbits, index);
757#endif
758
759 for (i = 0; i < need_nbits; i++) {
760 clib_bitmap_set_no_check (root_rp->bitmap, index+i, 1);
761 }
762
763 /* Place this region where it goes... */
764 a->baseva = root_rp->virtual_base + index*MMAP_PAGESIZE;
765
766 rp = svm_map_region (a);
767
768 pool_get (mp->subregions, subp);
769 name = format (0, "%s%c", a->name, 0);
770 subp->subregion_name = name;
771
772 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
773
774 svm_pop_heap (oldheap);
775
776 region_unlock (root_rp);
777
778 return (rp);
779}
780
781/*
782 * svm_region_unmap
783 *
784 * Let go of the indicated region. If the calling process
785 * is the last customer, throw it away completely.
786 * The root region mutex guarantees atomicity with respect to
787 * a new region client showing up at the wrong moment.
788 */
789void svm_region_unmap (void *rp_arg)
790{
791 int i, mypid = getpid();
792 int nclients_left;
793 void *oldheap;
794 uword virtual_base, virtual_size;
795 svm_region_t *rp = rp_arg;
796 char *name;
797
798 /*
799 * If we take a signal while holding one or more shared-memory
800 * mutexes, we may end up back here from an otherwise
801 * benign exit handler. Bail out to avoid a recursive
802 * mutex screw-up.
803 */
804 if (nheld)
805 return;
806
807 ASSERT(rp);
808 ASSERT(root_rp);
809
810 if (CLIB_DEBUG > 1)
811 clib_warning ("[%d] unmap region %s", getpid(), rp->region_name);
812
813 region_lock (root_rp, 5);
814 region_lock (rp, 6);
815
816 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
817
818 /* Remove the caller from the list of mappers */
819 for (i = 0; i < vec_len(rp->client_pids); i++) {
820 if (rp->client_pids[i] == mypid) {
821 vec_delete (rp->client_pids, 1, i);
822 goto found;
823 }
824 }
825 clib_warning("pid %d AWOL", mypid);
826
827 found:
828
829 svm_pop_heap (oldheap);
830
831 nclients_left = vec_len(rp->client_pids);
832 virtual_base = rp->virtual_base;
833 virtual_size = rp->virtual_size;
834
835 if (nclients_left == 0) {
836 int index, nbits, i;
837 svm_main_region_t *mp;
838 uword *p;
839 svm_subregion_t *subp;
840
841 /* Kill the region, last guy on his way out */
842
843 oldheap = svm_push_pvt_heap (root_rp);
844 name = vec_dup (rp->region_name);
845
846 virtual_base = rp->virtual_base;
847 virtual_size = rp->virtual_size;
848
849 /* Figure out which bits to clear in the root region bitmap */
850 index = (virtual_base - root_rp->virtual_base)
851 / MMAP_PAGESIZE;
852
853 nbits = (virtual_size + MMAP_PAGESIZE - 1)
854 / MMAP_PAGESIZE;
855
856#if CLIB_DEBUG > 1
857 clib_warning ("clear %d bits at index %d", nbits, index);
858#endif
859 /* Give back the allocated VM */
860 for (i = 0; i < nbits; i++) {
861 clib_bitmap_set_no_check (root_rp->bitmap, index+i, 0);
862 }
863
864 mp = root_rp->data_base;
865
866 p = hash_get_mem (mp->name_hash, name);
867
868 /* Better never happen ... */
869 if (p == NULL) {
870 region_unlock (rp);
871 region_unlock (root_rp);
872 svm_pop_heap (oldheap);
873 clib_warning ("Region name '%s' not found?", name);
874 return;
875 }
876
877 /* Remove from the root region subregion pool */
878 subp = mp->subregions + p[0];
879 pool_put (mp->subregions, subp);
880
881 hash_unset_mem (mp->name_hash, name);
882
883 vec_free(name);
884
885 region_unlock (rp);
886 shm_unlink(rp->region_name);
887 munmap ((void *)virtual_base, virtual_size);
888 region_unlock (root_rp);
889 svm_pop_heap (oldheap);
890 return;
891 }
892
893 region_unlock(rp);
894 region_unlock(root_rp);
895
896 munmap ((void *)virtual_base, virtual_size);
897}
898
899/*
900 * svm_region_exit
901 * There is no clean way to unlink the
902 * root region when all clients go away,
903 * so remove the pid entry and call it a day.
904 */
905void svm_region_exit ()
906{
907 void *oldheap;
908 int i, mypid = getpid();
909 uword virtual_base, virtual_size;
910
911 /* It felt so nice we did it twice... */
912 if (root_rp == 0)
913 return;
914
915 if (--root_rp_refcount > 0)
916 return;
917
918 /*
919 * If we take a signal while holding one or more shared-memory
920 * mutexes, we may end up back here from an otherwise
921 * benign exit handler. Bail out to avoid a recursive
922 * mutex screw-up.
923 */
924 if (nheld)
925 return;
926
927 region_lock(root_rp, 7);
928 oldheap = svm_push_pvt_heap (root_rp);
929
930 virtual_base = root_rp->virtual_base;
931 virtual_size = root_rp->virtual_size;
932
933 for (i = 0; i < vec_len(root_rp->client_pids); i++) {
934 if (root_rp->client_pids[i] == mypid) {
935 vec_delete (root_rp->client_pids, 1, i);
936 goto found;
937 }
938 }
939 clib_warning("pid %d AWOL", mypid);
940
941 found:
942
943 region_unlock(root_rp);
944 svm_pop_heap (oldheap);
945
946 root_rp = 0;
947 munmap ((void *)virtual_base, virtual_size);
948}
949
950void svm_client_scan_this_region_nolock (svm_region_t *rp)
951{
952 int j;
953 int mypid = getpid();
954 void *oldheap;
955
956 for (j = 0; j < vec_len(rp->client_pids); j++) {
957 if (mypid == rp->client_pids[j])
958 continue;
959 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0)) {
960 clib_warning ("%s: cleanup ghost pid %d",
961 rp->region_name, rp->client_pids[j]);
962 /* nb: client vec in rp->region_heap */
963 oldheap = svm_push_pvt_heap (rp);
964 vec_delete (rp->client_pids, 1, j);
965 j--;
966 svm_pop_heap (oldheap);
967 }
968 }
969}
970
971
972/*
973 * Scan svm regions for dead clients
974 */
975void svm_client_scan(char *root_path)
976{
977 int i, j;
978 svm_main_region_t *mp;
979 svm_map_region_args_t *a = 0;
980 svm_region_t *root_rp;
981 svm_region_t *rp;
982 svm_subregion_t *subp;
983 u8 *name=0;
984 u8 ** svm_names=0;
985 void *oldheap;
986 int mypid = getpid();
987
988 vec_validate (a, 0);
989
990 svm_region_init_chroot(root_path);
991
992 root_rp = svm_get_root_rp();
993
994 pthread_mutex_lock (&root_rp->mutex);
995
996 mp = root_rp->data_base;
997
998 for (j = 0; j < vec_len (root_rp->client_pids); j++) {
999 if (mypid == root_rp->client_pids[j])
1000 continue;
1001 if (root_rp->client_pids[j]
1002 && (kill (root_rp->client_pids[j], 0) < 0)) {
1003 clib_warning ("%s: cleanup ghost pid %d",
1004 root_rp->region_name, root_rp->client_pids[j]);
1005 /* nb: client vec in root_rp->region_heap */
1006 oldheap = svm_push_pvt_heap (root_rp);
1007 vec_delete (root_rp->client_pids, 1, j);
1008 j--;
1009 svm_pop_heap (oldheap);
1010 }
1011 }
1012
1013 /*
1014 * Snapshoot names, can't hold root rp mutex across
1015 * find_or_create.
1016 */
1017 pool_foreach (subp, mp->subregions, ({
1018 name = vec_dup (subp->subregion_name);
1019 vec_add1(svm_names, name);
1020 }));
1021
1022 pthread_mutex_unlock (&root_rp->mutex);
1023
1024 for (i = 0; i < vec_len(svm_names); i++) {
1025 vec_validate(a, 0);
1026 a->root_path = root_path;
1027 a->name = (char *) svm_names[i];
1028 rp = svm_region_find_or_create (a);
1029 if (rp) {
1030 pthread_mutex_lock (&rp->mutex);
1031
1032 svm_client_scan_this_region_nolock (rp);
1033
1034 pthread_mutex_unlock (&rp->mutex);
1035 svm_region_unmap (rp);
1036 vec_free(svm_names[i]);
1037 }
1038 vec_free (a);
1039 }
1040 vec_free(svm_names);
1041
1042 svm_region_exit ();
1043
1044 vec_free (a);
1045}