32/64 shmem bihash interoperability
Move the binary api segment above 4gb
Change-Id: I40e8aa7a97722a32397f5a538b5ff8344c50d408
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index e13ceb7..fa92c8b 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -23,10 +23,10 @@
nbytes += CLIB_CACHE_LINE_BYTES - 1;
nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1);
- rv = h->alloc_arena_next;
- h->alloc_arena_next += nbytes;
+ rv = alloc_arena_next (h);
+ alloc_arena_next (h) += nbytes;
- if (rv >= (h->alloc_arena + h->alloc_arena_size))
+ if (rv >= (alloc_arena (h) + alloc_arena_size (h)))
os_out_of_memory ();
return (void *) rv;
@@ -52,9 +52,9 @@
*/
ASSERT (memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));
- h->alloc_arena = (uword) clib_mem_vm_alloc (memory_size);
- h->alloc_arena_next = h->alloc_arena;
- h->alloc_arena_size = memory_size;
+ alloc_arena (h) = (uword) clib_mem_vm_alloc (memory_size);
+ alloc_arena_next (h) = alloc_arena (h);
+ alloc_arena_size (h) = memory_size;
bucket_size = nbuckets * sizeof (h->buckets[0]);
h->buckets = BV (alloc_aligned) (h, bucket_size);
@@ -65,6 +65,129 @@
h->fmt_fn = NULL;
}
+#if BIHASH_32_64_SVM
+#if !defined (MFD_ALLOW_SEALING)
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+void BV (clib_bihash_master_init_svm)
+ (BVT (clib_bihash) * h, char *name, u32 nbuckets,
+ u64 base_address, u64 memory_size)
+{
+ uword bucket_size;
+ u8 *mmap_addr;
+ vec_header_t *freelist_vh;
+ int fd;
+
+ ASSERT (base_address);
+ ASSERT (base_address + memory_size < (1ULL << 32));
+
+ /* Set up for memfd sharing */
+ if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
+ {
+ clib_unix_warning ("memfd_create");
+ return;
+ }
+
+ if (ftruncate (fd, memory_size) < 0)
+ {
+ clib_unix_warning ("ftruncate");
+ return;
+ }
+
+ /* Not mission-critical, complain and continue */
+ if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
+ clib_unix_warning ("fcntl (F_ADD_SEALS)");
+
+ mmap_addr = mmap (u64_to_pointer (base_address), memory_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd,
+ 0 /* offset */ );
+
+ if (mmap_addr == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap failed");
+ ASSERT (0);
+ }
+
+ h->sh = (void *) mmap_addr;
+ h->memfd = fd;
+ nbuckets = 1 << (max_log2 (nbuckets));
+
+ h->name = (u8 *) name;
+ h->sh->nbuckets = h->nbuckets = nbuckets;
+ h->log2_nbuckets = max_log2 (nbuckets);
+
+ alloc_arena (h) = (u64) (uword) mmap_addr;
+ alloc_arena_next (h) = alloc_arena (h) + CLIB_CACHE_LINE_BYTES;
+ alloc_arena_size (h) = memory_size;
+
+ bucket_size = nbuckets * sizeof (h->buckets[0]);
+ h->buckets = BV (alloc_aligned) (h, bucket_size);
+ h->sh->buckets_as_u64 = (u64) (uword) h->buckets;
+
+ h->alloc_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES);
+ h->alloc_lock[0] = 0;
+
+ h->sh->alloc_lock_as_u64 = (u64) (uword) (h->alloc_lock);
+ freelist_vh = BV (alloc_aligned) (h, sizeof (vec_header_t) +
+ BIHASH_FREELIST_LENGTH * sizeof (u64));
+ freelist_vh->len = BIHASH_FREELIST_LENGTH;
+ freelist_vh->dlmalloc_header_offset = 0xDEADBEEF;
+ h->sh->freelists_as_u64 = (u64) (uword) freelist_vh->vector_data;
+ h->freelists = (void *) (uword) (h->sh->freelists_as_u64);
+
+ h->fmt_fn = NULL;
+}
+
+void BV (clib_bihash_slave_init_svm)
+ (BVT (clib_bihash) * h, char *name, int fd)
+{
+ u8 *mmap_addr;
+ u64 base_address, memory_size;
+ BVT (clib_bihash_shared_header) * sh;
+
+ /* Trial mapping, to place the segment */
+ mmap_addr = mmap (0, 4096, PROT_READ, MAP_SHARED, fd, 0 /* offset */ );
+ if (mmap_addr == MAP_FAILED)
+ {
+ clib_unix_warning ("trial mmap failed");
+ ASSERT (0);
+ }
+
+ sh = (BVT (clib_bihash_shared_header) *) mmap_addr;
+
+ base_address = sh->alloc_arena;
+ memory_size = sh->alloc_arena_size;
+
+ munmap (mmap_addr, 4096);
+
+ /* Actual mapping, at the required address */
+ mmap_addr = mmap (u64_to_pointer (base_address), memory_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd,
+ 0 /* offset */ );
+
+ if (mmap_addr == MAP_FAILED)
+ {
+ clib_unix_warning ("mmap failed");
+ ASSERT (0);
+ }
+
+ (void) close (fd);
+
+ h->sh = (void *) mmap_addr;
+ h->memfd = -1;
+
+ h->name = (u8 *) name;
+ h->buckets = u64_to_pointer (h->sh->buckets_as_u64);
+ h->nbuckets = h->sh->nbuckets;
+ h->log2_nbuckets = max_log2 (h->nbuckets);
+
+ h->alloc_lock = u64_to_pointer (h->sh->alloc_lock_as_u64);
+ h->freelists = u64_to_pointer (h->sh->freelists_as_u64);
+ h->fmt_fn = NULL;
+}
+#endif /* BIHASH_32_64_SVM */
+
void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
format_function_t * fmt_fn)
{
@@ -74,8 +197,13 @@
void BV (clib_bihash_free) (BVT (clib_bihash) * h)
{
vec_free (h->working_copies);
+#if BIHASH_32_64_SVM == 0
vec_free (h->freelists);
- clib_mem_vm_free ((void *) (h->alloc_arena), h->alloc_arena_size);
+#else
+ if (h->memfd > 0)
+ (void) close (h->memfd);
+#endif
+ clib_mem_vm_free ((void *) (uword) (alloc_arena (h)), alloc_arena_size (h));
memset (h, 0, sizeof (*h));
}
@@ -86,14 +214,19 @@
BVT (clib_bihash_value) * rv = 0;
ASSERT (h->alloc_lock[0]);
+
+#if BIHASH_32_64_SVM
+ ASSERT (log2_pages < vec_len (h->freelists));
+#endif
+
if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0)
{
vec_validate_init_empty (h->freelists, log2_pages, 0);
rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages)));
goto initialize;
}
- rv = h->freelists[log2_pages];
- h->freelists[log2_pages] = rv->next_free;
+ rv = (void *) (uword) h->freelists[log2_pages];
+ h->freelists[log2_pages] = rv->next_free_as_u64;
initialize:
ASSERT (rv);
@@ -117,8 +250,8 @@
if (CLIB_DEBUG > 0)
memset (v, 0xFE, sizeof (*v) * (1 << log2_pages));
- v->next_free = h->freelists[log2_pages];
- h->freelists[log2_pages] = v;
+ v->next_free_as_u64 = (u64) h->freelists[log2_pages];
+ h->freelists[log2_pages] = (u64) (uword) v;
}
static inline void
@@ -361,6 +494,7 @@
CLIB_MEMORY_BARRIER (); /* Make sure the value has settled */
clib_memcpy (&(v->kvp[i]), &add_v->key, sizeof (add_v->key));
b->refcnt++;
+ ASSERT (b->refcnt > 0);
BV (clib_bihash_unlock_bucket) (b);
return (0);
}
@@ -490,6 +624,7 @@
tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
tmp_b.linear_search = mark_bucket_linear;
tmp_b.refcnt = h->saved_bucket.refcnt + 1;
+ ASSERT (tmp_b.refcnt > 0);
tmp_b.lock = 0;
CLIB_MEMORY_BARRIER ();
b->as_u64 = tmp_b.as_u64;
@@ -587,7 +722,7 @@
if (verbose)
{
- s = format (s, "[%d]: heap offset %d, len %d, linear %d\n", i,
+ s = format (s, "[%d]: heap offset %lld, len %d, linear %d\n", i,
b->offset, (1 << b->log2_pages), b->linear_search);
}
@@ -633,24 +768,25 @@
u32 nfree = 0;
BVT (clib_bihash_value) * free_elt;
- free_elt = h->freelists[i];
+ free_elt = (void *) (uword) h->freelists[i];
while (free_elt)
{
nfree++;
- free_elt = free_elt->next_free;
+ free_elt = (void *) (uword) free_elt->next_free_as_u64;
}
- s = format (s, " [len %d] %u free elts\n", 1 << i, nfree);
+ if (nfree || verbose)
+ s = format (s, " [len %d] %u free elts\n", 1 << i, nfree);
}
s = format (s, " %lld linear search buckets\n", linear_buckets);
- used_bytes = h->alloc_arena_next - h->alloc_arena;
+ used_bytes = alloc_arena_next (h) - alloc_arena (h);
s = format (s,
" arena: base %llx, next %llx\n"
" used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n",
- h->alloc_arena, h->alloc_arena_next,
+ alloc_arena (h), alloc_arena_next (h),
used_bytes, used_bytes >> 20,
- h->alloc_arena_size, h->alloc_arena_size >> 20);
+ alloc_arena_size (h), alloc_arena_size (h) >> 20);
return s;
}