blob: 62f317aafb437c7b8bb666697e84194e8a1bc113 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 *------------------------------------------------------------------
3 * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
4 * library
5 *
6 * Copyright (c) 2009 Cisco and/or its affiliates.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *------------------------------------------------------------------
19 */
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26#include <netinet/in.h>
27#include <signal.h>
28#include <pthread.h>
29#include <unistd.h>
30#include <time.h>
31#include <fcntl.h>
32#include <string.h>
33#include <vppinfra/clib.h>
34#include <vppinfra/vec.h>
35#include <vppinfra/hash.h>
36#include <vppinfra/bitmap.h>
37#include <vppinfra/fifo.h>
38#include <vppinfra/time.h>
39#include <vppinfra/mheap.h>
40#include <vppinfra/heap.h>
41#include <vppinfra/pool.h>
42#include <vppinfra/format.h>
43
44#include "svm.h"
45
46static svm_region_t *root_rp;
47static int root_rp_refcount;
48
49#define MAXLOCK 2
50static pthread_mutex_t *mutexes_held [MAXLOCK];
51static int nheld;
52
53svm_region_t *svm_get_root_rp (void)
54{
55 return root_rp;
56}
57
58#define MUTEX_DEBUG
59
60static void region_lock(svm_region_t *rp, int tag)
61{
62 pthread_mutex_lock(&rp->mutex);
63#ifdef MUTEX_DEBUG
64 rp->mutex_owner_pid = getpid();
65 rp->mutex_owner_tag = tag;
66#endif
67 ASSERT(nheld < MAXLOCK);
68 /*
69 * Keep score of held mutexes so we can try to exit
70 * cleanly if the world comes to an end at the worst possible
71 * moment
72 */
73 mutexes_held [nheld++] = &rp->mutex;
74}
75
76static void region_unlock(svm_region_t *rp)
77{
78 int i,j;
79#ifdef MUTEX_DEBUG
80 rp->mutex_owner_pid = 0;
81 rp->mutex_owner_tag = 0;
82#endif
83
84 for (i = nheld-1; i >= 0; i--) {
85 if (mutexes_held[i] == &rp->mutex) {
86 for (j = i; j < MAXLOCK-1; j++)
87 mutexes_held[j] = mutexes_held[j+1];
88 nheld--;
89 goto found;
90 }
91 }
92 ASSERT(0);
93
94found:
95 CLIB_MEMORY_BARRIER();
96 pthread_mutex_unlock(&rp->mutex);
97}
98
99
100static u8 * format_svm_flags (u8 * s, va_list * args)
101{
102 uword f = va_arg (*args, uword);
103
104 if (f & SVM_FLAGS_MHEAP)
105 s = format (s, "MHEAP ");
106 if (f & SVM_FLAGS_FILE)
107 s = format (s, "FILE ");
108 if (f & SVM_FLAGS_NODATA)
109 s = format (s, "NODATA ");
110 if (f & SVM_FLAGS_NEED_DATA_INIT)
111 s = format (s, "INIT ");
112
113 return (s);
114}
115
116static u8 * format_svm_size (u8 * s, va_list * args)
117{
118 uword size = va_arg (*args, uword);
119
Damjan Marion2c29d752015-12-18 10:26:56 +0100120 if (size >= (1<<20)) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121 s = format (s, "(%d mb)", size >> 20);
Damjan Marion2c29d752015-12-18 10:26:56 +0100122 } else if (size >= (1<<10)) {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700123 s = format (s, "(%d kb)", size >> 10);
124 } else {
125 s = format (s, "(%d bytes)", size);
126 }
127 return (s);
128}
129
130u8 * format_svm_region (u8 * s, va_list * args)
131{
132 svm_region_t *rp = va_arg (*args, svm_region_t *);
133 int verbose = va_arg (*args, int);
134 int i;
135 uword lo, hi;
136
137 s = format (s, "%s: base va 0x%x size 0x%x %U\n",
138 rp->region_name, rp->virtual_base,
139 rp->virtual_size, format_svm_size, rp->virtual_size);
140 s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
141 rp->user_ctx, rp->bitmap_size);
142
143 if (verbose) {
144 s = format (s, " flags: 0x%x %U\n", rp->flags,
145 format_svm_flags, rp->flags);
146 s = format (s,
147 " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
148 rp->region_heap, rp->data_base, rp->data_heap);
149 }
150
151 s = format (s, " %d clients, pids: ",
152 vec_len(rp->client_pids));
153
154 for (i = 0; i < vec_len(rp->client_pids); i++)
155 s = format (s, "%d ", rp->client_pids[i]);
156
157 s = format (s, "\n");
158
159 if (verbose) {
160 lo = hi = ~0;
161
162 s = format (s, " VM in use: ");
163
164 for (i = 0; i < rp->bitmap_size; i++) {
165 if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) {
166 if (lo == ~0) {
167 hi = lo = rp->virtual_base + i*MMAP_PAGESIZE;
168 } else {
169 hi = rp->virtual_base + i*MMAP_PAGESIZE;
170 }
171 } else {
172 if (lo != ~0) {
173 hi = rp->virtual_base + i*MMAP_PAGESIZE -1;
174 s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
175 (hi - lo)>>10);
176 lo = hi = ~0;
177 }
178 }
179 }
180 s = format (s, " rgn heap stats: %U", format_mheap,
181 rp->region_heap, 0);
182 if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap) {
183 s = format (s, "\n data heap stats: %U", format_mheap,
184 rp->data_heap, 1);
185 }
186 s = format (s, "\n");
187 }
188
189 return(s);
190}
191
192/*
193 * rnd_pagesize
194 * Round to a pagesize multiple, presumably 4k works
195 */
196static unsigned int rnd_pagesize(unsigned int size)
197{
198 unsigned int rv;
199
200 rv = (size + (MMAP_PAGESIZE-1)) & ~(MMAP_PAGESIZE-1);
201 return(rv);
202}
203
204/*
205 * svm_data_region_setup
206 */
207static int svm_data_region_create (svm_map_region_args_t *a,
208 svm_region_t *rp)
209{
210 int fd;
211 u8 junk = 0;
212 uword map_size;
213
214 map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
215
216 if (a->flags & SVM_FLAGS_FILE) {
217 struct stat statb;
218
219 fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
220
221 if (fd < 0) {
222 clib_unix_warning ("open");
223 return -1;
224 }
225
226 if (fstat(fd, &statb) < 0) {
227 clib_unix_warning("fstat");
228 return -2;
229 }
230
231 if (statb.st_mode & S_IFREG) {
232 if (statb.st_size == 0) {
233 lseek(fd, map_size, SEEK_SET);
234 if (write(fd, &junk, 1) != 1)
235 clib_unix_warning ("set region size");
236 } else {
237 map_size = rnd_pagesize (statb.st_size);
238 }
239 } else {
240 map_size = a->backing_mmap_size;
241 }
242
243 ASSERT(map_size <= rp->virtual_size -
244 (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
245
246 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
247 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
248 clib_unix_warning("mmap");
249 return -3;
250 }
251 close(fd);
252 rp->backing_file = (char *) format(0, "%s\0", a->backing_file);
253 rp->flags |= SVM_FLAGS_FILE;
254 }
255
256 if (a->flags & SVM_FLAGS_MHEAP) {
257 rp->data_heap =
258 mheap_alloc_with_flags ((void *)(rp->data_base), map_size,
259 MHEAP_FLAG_DISABLE_VM);
260 rp->flags |= SVM_FLAGS_MHEAP;
261 }
262 return 0;
263}
264
265static int svm_data_region_map (svm_map_region_args_t *a,
266 svm_region_t *rp)
267{
268 int fd;
269 u8 junk = 0;
270 uword map_size;
271 struct stat statb;
272
273 map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
274
275 if (a->flags & SVM_FLAGS_FILE) {
276
277 fd = open (a->backing_file, O_RDWR, 0777);
278
279 if (fd < 0) {
280 clib_unix_warning ("open");
281 return -1;
282 }
283
284 if (fstat(fd, &statb) < 0) {
285 clib_unix_warning("fstat");
286 return -2;
287 }
288
289 if (statb.st_mode & S_IFREG) {
290 if (statb.st_size == 0) {
291 lseek(fd, map_size, SEEK_SET);
292 if (write(fd, &junk, 1) != 1)
293 clib_unix_warning ("set region size");
294 } else {
295 map_size = rnd_pagesize (statb.st_size);
296 }
297 } else {
298 map_size = a->backing_mmap_size;
299 }
300
301 ASSERT(map_size <= rp->virtual_size
302 - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE));
303
304 if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
305 MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
306 clib_unix_warning("mmap");
307 return -3;
308 }
309 close(fd);
310 }
311 return 0;
312}
313
314u8 *shm_name_from_svm_map_region_args (svm_map_region_args_t *a)
315{
316 u8 *path;
317 u8 *shm_name;
318 u8 *split_point;
319 u8 *mkdir_arg = 0;
320 int root_path_offset = 0;
321 int name_offset = 0;
322
323 if (a->root_path) {
324 /* Tolerate present or absent slashes */
325 if (a->root_path[0] == '/')
326 root_path_offset++;
327
328 /* create the root_path under /dev/shm
329 iterate through path creating directories */
330
331 path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
332 split_point = path+1;
333 vec_add1(mkdir_arg, '-');
334
335 while (*split_point) {
336 while (*split_point && *split_point != '/') {
337 vec_add1 (mkdir_arg, *split_point);
338 split_point++;
339 }
340 vec_add1 (mkdir_arg, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700341
342 /* ready to descend another level */
343 mkdir_arg[vec_len(mkdir_arg)-1] = '-';
344 split_point++;
345 }
346 vec_free(mkdir_arg);
347 vec_free(path);
348
349 if (a->name[0] == '/')
350 name_offset = 1;
351
352 shm_name = format (0, "/%s-%s%c", a->root_path,
353 &a->name[name_offset], 0);
354 }
355 else
356 shm_name = format (0, "%s%c", a->name, 0);
357 return (shm_name);
358}
359
360/*
361 * svm_map_region
362 */
363void *svm_map_region (svm_map_region_args_t *a)
364{
365 int svm_fd;
366 svm_region_t *rp;
367 pthread_mutexattr_t attr;
368 pthread_condattr_t cattr;
369 int deadman=0;
370 u8 junk = 0;
371 void *oldheap;
372 int overhead_space;
373 int rv;
374 uword data_base;
375 int nbits, words, bit;
376 int pid_holding_region_lock;
377 u8 *shm_name;
378 int dead_region_recovery = 0;
379 int time_left;
380 struct stat stat;
381 struct timespec ts, tsrem;
382
383 if (CLIB_DEBUG > 1)
384 clib_warning ("[%d] map region %s", getpid(), a->name);
385
386 ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
387 ASSERT(a->name);
388
389 shm_name = shm_name_from_svm_map_region_args (a);
390
391 svm_fd = shm_open((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
392
393 if (svm_fd >= 0) {
394
395 vec_free(shm_name);
396
397 lseek(svm_fd, a->size, SEEK_SET);
398 if (write(svm_fd, &junk, 1) != 1)
399 clib_warning ("set region size");
400
401 rp = mmap((void *)a->baseva, a->size,
402 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
403
404 if (rp == (svm_region_t *) MAP_FAILED) {
405 clib_unix_warning ("mmap create");
406 return (0);
407 }
408 close(svm_fd);
409 memset(rp, 0, sizeof(*rp));
410
411 if (pthread_mutexattr_init(&attr))
412 clib_unix_warning("mutexattr_init");
413
414 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED))
415 clib_unix_warning("mutexattr_setpshared");
416
417 if (pthread_mutex_init(&rp->mutex, &attr))
418 clib_unix_warning("mutex_init");
419
420 if (pthread_mutexattr_destroy(&attr))
421 clib_unix_warning("mutexattr_destroy");
422
423 if (pthread_condattr_init(&cattr))
424 clib_unix_warning("condattr_init");
425
426 if (pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED))
427 clib_unix_warning("condattr_setpshared");
428
429 if (pthread_cond_init(&rp->condvar, &cattr))
430 clib_unix_warning("cond_init");
431
432 if(pthread_condattr_destroy(&cattr))
433 clib_unix_warning("condattr_destroy");
434
435 region_lock (rp, 1);
436
437 rp->virtual_base = a->baseva;
438 rp->virtual_size = a->size;
439
440 rp->region_heap =
441 mheap_alloc_with_flags ((void *)(a->baseva+MMAP_PAGESIZE),
442 SVM_PVT_MHEAP_SIZE,
443 MHEAP_FLAG_DISABLE_VM);
444 oldheap = svm_push_pvt_heap(rp);
445
446 rp->region_name = (char *)format (0, "%s%c", a->name, 0);
447 vec_add1(rp->client_pids, getpid());
448
449 nbits = rp->virtual_size / MMAP_PAGESIZE;
450
451 ASSERT (nbits > 0);
452 rp->bitmap_size = nbits;
453 words = (nbits + BITS(uword)-1) / BITS(uword);
454 vec_validate (rp->bitmap, words-1);
455
456 overhead_space = MMAP_PAGESIZE /* header */ +
457 SVM_PVT_MHEAP_SIZE;
458
459 bit = 0;
460 data_base = (uword)rp->virtual_base;
461
462 if (a->flags & SVM_FLAGS_NODATA)
463 rp->flags |= SVM_FLAGS_NEED_DATA_INIT;
464
465 do {
466 clib_bitmap_set_no_check (rp->bitmap, bit, 1);
467 bit++;
468 overhead_space -= MMAP_PAGESIZE;
469 data_base += MMAP_PAGESIZE;
470 } while (overhead_space > 0);
471
472 rp->data_base = (void *)data_base;
473
474 /*
475 * Note: although the POSIX spec guarantees that only one
476 * process enters this block, we have to play games
477 * to hold off clients until e.g. the mutex is ready
478 */
479 rp->version = SVM_VERSION;
480
481 /* setup the data portion of the region */
482
483 rv = svm_data_region_create (a, rp);
484 if (rv) {
485 clib_warning ("data_region_create: %d", rv);
486 }
487
488 region_unlock(rp);
489
490 svm_pop_heap(oldheap);
491
492 return ((void *) rp);
493 } else {
494 svm_fd = shm_open((char *)shm_name, O_RDWR, 0777);
495
496 vec_free(shm_name);
497
498 if (svm_fd < 0) {
499 perror("svm_region_map(mmap open)");
500 return (0);
501 }
502
503 time_left = 20;
504 while (1) {
505 if (0 != fstat(svm_fd, &stat)) {
506 clib_warning("fstat failed: %d", errno);
507 return (0);
508 }
509 if (stat.st_size > 0) {
510 break;
511 }
512 if (0 == time_left) {
513 clib_warning("waiting for resize of shm file timed out");
514 return (0);
515 }
516 ts.tv_sec = 0;
517 ts.tv_nsec = 100000000;
518 while (nanosleep(&ts, &tsrem) < 0)
519 ts = tsrem;
520 time_left--;
521 }
522
523 rp = mmap(0, MMAP_PAGESIZE,
524 PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
525
526 if (rp == (svm_region_t *) MAP_FAILED) {
527 close(svm_fd);
528 clib_warning("mmap");
529 return (0);
530 }
531 /*
532 * We lost the footrace to create this region; make sure
533 * the winner has crossed the finish line.
534 */
535 while (rp->version == 0 && deadman++ < 5) {
536 sleep(1);
537 }
538
539 /*
540 * <bleep>-ed?
541 */
542 if (rp->version == 0) {
543 close(svm_fd);
544 munmap(rp, a->size);
545 clib_warning("rp->version %d not %d", rp->version,
546 SVM_VERSION);
547 return (0);
548 }
549 /* Remap now that the region has been placed */
550 a->baseva = rp->virtual_base;
551 a->size = rp->virtual_size;
552 munmap(rp, MMAP_PAGESIZE);
553
554 rp = (void *) mmap ((void *)a->baseva, a->size,
555 PROT_READ | PROT_WRITE,
556 MAP_SHARED | MAP_FIXED, svm_fd, 0);
557 if ((uword)rp == (uword)MAP_FAILED) {
558 clib_unix_warning ("mmap");
559 return (0);
560 }
561
562 if ((uword) rp != rp->virtual_base) {
563 clib_warning("mmap botch");
564 }
565
566 /*
567 * Try to fix the region mutex if it is held by
568 * a dead process
569 */
570 pid_holding_region_lock = rp->mutex_owner_pid;
571 if (pid_holding_region_lock &&
572 kill (pid_holding_region_lock, 0) < 0) {
573 clib_warning (
574 "region %s mutex held by dead pid %d, tag %d, force unlock",
575 rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
576 /* owner pid is nonexistent */
577 rp->mutex.__data.__owner = 0;
578 rp->mutex.__data.__lock = 0;
579 dead_region_recovery = 1;
580 }
581
582 if (dead_region_recovery)
583 clib_warning ("recovery: attempt to re-lock region");
584
585 region_lock(rp, 2);
586 oldheap = svm_push_pvt_heap (rp);
587 vec_add1(rp->client_pids, getpid());
588
589 if (dead_region_recovery)
590 clib_warning ("recovery: attempt svm_data_region_map");
591
592 rv = svm_data_region_map (a, rp);
593 if (rv) {
594 clib_warning ("data_region_map: %d", rv);
595 }
596
597 if (dead_region_recovery)
598 clib_warning ("unlock and continue");
599
600 region_unlock(rp);
601
602 svm_pop_heap(oldheap);
603
604 return ((void *) rp);
605
606 }
607 return 0; /* NOTREACHED */
608}
609
610static void svm_mutex_cleanup (void)
611{
612 int i;
613 for (i = 0; i < nheld; i++) {
614 pthread_mutex_unlock (mutexes_held[i]);
615 }
616}
617
618static void svm_region_init_internal (char *root_path)
619{
620 svm_region_t *rp;
621 svm_map_region_args_t *a=0;
622 u64 ticks = clib_cpu_time_now();
623 uword randomize_baseva;
624
625 /* guard against klutz calls */
626 root_rp_refcount++;
627 if (root_rp)
628 return;
629
630 atexit(svm_mutex_cleanup);
631
632 /* Randomize the shared-VM base at init time */
Dave Barach95bb8832015-12-12 10:37:00 -0500633 if (MMAP_PAGESIZE <= (4<<10))
634 randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
635 else
636 randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700637
638 vec_validate(a,0);
639 a->root_path = root_path;
640 a->name = SVM_GLOBAL_REGION_NAME;
641 a->baseva = SVM_GLOBAL_REGION_BASEVA + randomize_baseva;
642 a->size = SVM_GLOBAL_REGION_SIZE;
643 a->flags = SVM_FLAGS_NODATA;
644
645 rp = svm_map_region (a);
646 ASSERT(rp);
647
648 region_lock(rp, 3);
649
650 /* Set up the main region data structures */
651 if (rp->flags & SVM_FLAGS_NEED_DATA_INIT) {
652 svm_main_region_t *mp = 0;
653 void *oldheap;
654
655 rp->flags &= ~(SVM_FLAGS_NEED_DATA_INIT);
656
657 oldheap = svm_push_pvt_heap (rp);
658 vec_validate (mp, 0);
659 mp->name_hash = hash_create_string (0, sizeof(uword));
660 mp->root_path = root_path
661 ? format (0, "%s%c", root_path, 0) : 0 ;
662 rp->data_base = mp;
663 svm_pop_heap (oldheap);
664 }
665 region_unlock(rp);
666 vec_free (a);
667 root_rp = rp;
668}
669
670void svm_region_init (void)
671{
672 svm_region_init_internal (0);
673}
674
675void svm_region_init_chroot (char *root_path)
676{
677 svm_region_init_internal (root_path);
678}
679
680void *svm_region_find_or_create (svm_map_region_args_t *a)
681{
682 svm_main_region_t *mp;
683 svm_region_t *rp;
684 uword need_nbits;
685 int index, i;
686 void *oldheap;
687 uword *p;
688 u8 *name;
689 svm_subregion_t *subp;
690
691 ASSERT(root_rp);
692
693 a->size += MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE;
694 a->size = rnd_pagesize(a->size);
695
696 region_lock (root_rp, 4);
697 oldheap = svm_push_pvt_heap(root_rp);
698 mp = root_rp->data_base;
699
700 ASSERT(mp);
701
702 /* Map the named region from the correct chroot environment */
703 a->root_path = (char *) mp->root_path;
704
705 /*
706 * See if this region is already known. If it is, we're
707 * almost done...
708 */
709 p = hash_get_mem (mp->name_hash, a->name);
710
711 if (p) {
712 rp = svm_map_region (a);
713 region_unlock(root_rp);
714 svm_pop_heap (oldheap);
715 return rp;
716 }
717
718 /* Create the region. */
719 ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
720
721 need_nbits = a->size / MMAP_PAGESIZE;
722
723 index = 1; /* $$$ fixme, figure out how many bit to really skip */
724
725 /*
726 * Scan the virtual space allocation bitmap, looking for a large
727 * enough chunk
728 */
729 do {
730 if (clib_bitmap_get_no_check(root_rp->bitmap, index) == 0) {
731 for (i = 0; i < (need_nbits-1); i++) {
732 if (clib_bitmap_get_no_check(root_rp->bitmap,
733 index+i) == 1) {
734 index = index + i;
735 goto next;
736 }
737 }
738 break;
739 }
740 index++;
741 next:;
742 } while (index < root_rp->bitmap_size);
743
744 /* Completely out of VM? */
745 if (index >= root_rp->bitmap_size) {
746 clib_warning("region %s: not enough VM to allocate 0x%x",
747 root_rp->region_name, a->size);
748 svm_pop_heap (oldheap);
749 region_unlock (root_rp);
750 return 0;
751 }
752
753 /*
754 * Mark virtual space allocated
755 */
756#if CLIB_DEBUG > 1
757 clib_warning ("set %d bits at index %d", need_nbits, index);
758#endif
759
760 for (i = 0; i < need_nbits; i++) {
761 clib_bitmap_set_no_check (root_rp->bitmap, index+i, 1);
762 }
763
764 /* Place this region where it goes... */
765 a->baseva = root_rp->virtual_base + index*MMAP_PAGESIZE;
766
767 rp = svm_map_region (a);
768
769 pool_get (mp->subregions, subp);
770 name = format (0, "%s%c", a->name, 0);
771 subp->subregion_name = name;
772
773 hash_set_mem (mp->name_hash, name, subp - mp->subregions);
774
775 svm_pop_heap (oldheap);
776
777 region_unlock (root_rp);
778
779 return (rp);
780}
781
782/*
783 * svm_region_unmap
784 *
785 * Let go of the indicated region. If the calling process
786 * is the last customer, throw it away completely.
787 * The root region mutex guarantees atomicity with respect to
788 * a new region client showing up at the wrong moment.
789 */
790void svm_region_unmap (void *rp_arg)
791{
792 int i, mypid = getpid();
793 int nclients_left;
794 void *oldheap;
795 uword virtual_base, virtual_size;
796 svm_region_t *rp = rp_arg;
797 char *name;
798
799 /*
800 * If we take a signal while holding one or more shared-memory
801 * mutexes, we may end up back here from an otherwise
802 * benign exit handler. Bail out to avoid a recursive
803 * mutex screw-up.
804 */
805 if (nheld)
806 return;
807
808 ASSERT(rp);
809 ASSERT(root_rp);
810
811 if (CLIB_DEBUG > 1)
812 clib_warning ("[%d] unmap region %s", getpid(), rp->region_name);
813
814 region_lock (root_rp, 5);
815 region_lock (rp, 6);
816
817 oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
818
819 /* Remove the caller from the list of mappers */
820 for (i = 0; i < vec_len(rp->client_pids); i++) {
821 if (rp->client_pids[i] == mypid) {
822 vec_delete (rp->client_pids, 1, i);
823 goto found;
824 }
825 }
826 clib_warning("pid %d AWOL", mypid);
827
828 found:
829
830 svm_pop_heap (oldheap);
831
832 nclients_left = vec_len(rp->client_pids);
833 virtual_base = rp->virtual_base;
834 virtual_size = rp->virtual_size;
835
836 if (nclients_left == 0) {
837 int index, nbits, i;
838 svm_main_region_t *mp;
839 uword *p;
840 svm_subregion_t *subp;
841
842 /* Kill the region, last guy on his way out */
843
844 oldheap = svm_push_pvt_heap (root_rp);
845 name = vec_dup (rp->region_name);
846
847 virtual_base = rp->virtual_base;
848 virtual_size = rp->virtual_size;
849
850 /* Figure out which bits to clear in the root region bitmap */
851 index = (virtual_base - root_rp->virtual_base)
852 / MMAP_PAGESIZE;
853
854 nbits = (virtual_size + MMAP_PAGESIZE - 1)
855 / MMAP_PAGESIZE;
856
857#if CLIB_DEBUG > 1
858 clib_warning ("clear %d bits at index %d", nbits, index);
859#endif
860 /* Give back the allocated VM */
861 for (i = 0; i < nbits; i++) {
862 clib_bitmap_set_no_check (root_rp->bitmap, index+i, 0);
863 }
864
865 mp = root_rp->data_base;
866
867 p = hash_get_mem (mp->name_hash, name);
868
869 /* Better never happen ... */
870 if (p == NULL) {
871 region_unlock (rp);
872 region_unlock (root_rp);
873 svm_pop_heap (oldheap);
874 clib_warning ("Region name '%s' not found?", name);
875 return;
876 }
877
878 /* Remove from the root region subregion pool */
879 subp = mp->subregions + p[0];
880 pool_put (mp->subregions, subp);
881
882 hash_unset_mem (mp->name_hash, name);
883
884 vec_free(name);
885
886 region_unlock (rp);
887 shm_unlink(rp->region_name);
888 munmap ((void *)virtual_base, virtual_size);
889 region_unlock (root_rp);
890 svm_pop_heap (oldheap);
891 return;
892 }
893
894 region_unlock(rp);
895 region_unlock(root_rp);
896
897 munmap ((void *)virtual_base, virtual_size);
898}
899
900/*
901 * svm_region_exit
902 * There is no clean way to unlink the
903 * root region when all clients go away,
904 * so remove the pid entry and call it a day.
905 */
906void svm_region_exit ()
907{
908 void *oldheap;
909 int i, mypid = getpid();
910 uword virtual_base, virtual_size;
911
912 /* It felt so nice we did it twice... */
913 if (root_rp == 0)
914 return;
915
916 if (--root_rp_refcount > 0)
917 return;
918
919 /*
920 * If we take a signal while holding one or more shared-memory
921 * mutexes, we may end up back here from an otherwise
922 * benign exit handler. Bail out to avoid a recursive
923 * mutex screw-up.
924 */
925 if (nheld)
926 return;
927
928 region_lock(root_rp, 7);
929 oldheap = svm_push_pvt_heap (root_rp);
930
931 virtual_base = root_rp->virtual_base;
932 virtual_size = root_rp->virtual_size;
933
934 for (i = 0; i < vec_len(root_rp->client_pids); i++) {
935 if (root_rp->client_pids[i] == mypid) {
936 vec_delete (root_rp->client_pids, 1, i);
937 goto found;
938 }
939 }
940 clib_warning("pid %d AWOL", mypid);
941
942 found:
943
944 region_unlock(root_rp);
945 svm_pop_heap (oldheap);
946
947 root_rp = 0;
948 munmap ((void *)virtual_base, virtual_size);
949}
950
951void svm_client_scan_this_region_nolock (svm_region_t *rp)
952{
953 int j;
954 int mypid = getpid();
955 void *oldheap;
956
957 for (j = 0; j < vec_len(rp->client_pids); j++) {
958 if (mypid == rp->client_pids[j])
959 continue;
960 if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0)) {
961 clib_warning ("%s: cleanup ghost pid %d",
962 rp->region_name, rp->client_pids[j]);
963 /* nb: client vec in rp->region_heap */
964 oldheap = svm_push_pvt_heap (rp);
965 vec_delete (rp->client_pids, 1, j);
966 j--;
967 svm_pop_heap (oldheap);
968 }
969 }
970}
971
972
973/*
974 * Scan svm regions for dead clients
975 */
976void svm_client_scan(char *root_path)
977{
978 int i, j;
979 svm_main_region_t *mp;
980 svm_map_region_args_t *a = 0;
981 svm_region_t *root_rp;
982 svm_region_t *rp;
983 svm_subregion_t *subp;
984 u8 *name=0;
985 u8 ** svm_names=0;
986 void *oldheap;
987 int mypid = getpid();
988
989 vec_validate (a, 0);
990
991 svm_region_init_chroot(root_path);
992
993 root_rp = svm_get_root_rp();
994
995 pthread_mutex_lock (&root_rp->mutex);
996
997 mp = root_rp->data_base;
998
999 for (j = 0; j < vec_len (root_rp->client_pids); j++) {
1000 if (mypid == root_rp->client_pids[j])
1001 continue;
1002 if (root_rp->client_pids[j]
1003 && (kill (root_rp->client_pids[j], 0) < 0)) {
1004 clib_warning ("%s: cleanup ghost pid %d",
1005 root_rp->region_name, root_rp->client_pids[j]);
1006 /* nb: client vec in root_rp->region_heap */
1007 oldheap = svm_push_pvt_heap (root_rp);
1008 vec_delete (root_rp->client_pids, 1, j);
1009 j--;
1010 svm_pop_heap (oldheap);
1011 }
1012 }
1013
1014 /*
1015 * Snapshoot names, can't hold root rp mutex across
1016 * find_or_create.
1017 */
1018 pool_foreach (subp, mp->subregions, ({
1019 name = vec_dup (subp->subregion_name);
1020 vec_add1(svm_names, name);
1021 }));
1022
1023 pthread_mutex_unlock (&root_rp->mutex);
1024
1025 for (i = 0; i < vec_len(svm_names); i++) {
1026 vec_validate(a, 0);
1027 a->root_path = root_path;
1028 a->name = (char *) svm_names[i];
1029 rp = svm_region_find_or_create (a);
1030 if (rp) {
1031 pthread_mutex_lock (&rp->mutex);
1032
1033 svm_client_scan_this_region_nolock (rp);
1034
1035 pthread_mutex_unlock (&rp->mutex);
1036 svm_region_unmap (rp);
1037 vec_free(svm_names[i]);
1038 }
1039 vec_free (a);
1040 }
1041 vec_free(svm_names);
1042
1043 svm_region_exit ();
1044
1045 vec_free (a);
1046}