vppinfra: add clib_mem_vm_ext_alloc function
Change-Id: Iff33694fc42cc3bcc73cf1372339053a6365039c
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index 4ce1919..790f168 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -37,10 +37,11 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include <vppinfra/linux/sysfs.h>
+
#include <vlib/vlib.h>
#include <vlib/pci/pci.h>
#include <vlib/unix/unix.h>
-#include <vlib/linux/sysfs.h>
#include <sys/types.h>
#include <sys/stat.h>
@@ -104,7 +105,7 @@
format_vlib_pci_addr, &d->bus_address);
s = format (s, "%v/driver%c", dev_dir_name, 0);
- driver_name = vlib_sysfs_link_to_name ((char *) s);
+ driver_name = clib_sysfs_link_to_name ((char *) s);
vec_reset_length (s);
if (driver_name &&
@@ -183,32 +184,32 @@
vec_reset_length (s);
s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
- vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+ clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
vec_reset_length (s);
s = format (s, "%v/driver_override%c", dev_dir_name, 0);
if (access ((char *) s, F_OK) == 0)
{
- vlib_sysfs_write ((char *) s, "%s", uio_driver_name);
+ clib_sysfs_write ((char *) s, "%s", uio_driver_name);
clear_driver_override = 1;
}
else
{
vec_reset_length (s);
s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0);
- vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id,
+ clib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id,
d->device_id);
}
vec_reset_length (s);
s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0);
- vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+ clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
vec_reset_length (s);
if (clear_driver_override)
{
s = format (s, "%v/driver_override%c", dev_dir_name, 0);
- vlib_sysfs_write ((char *) s, "%c", 0);
+ clib_sysfs_write ((char *) s, "%c", 0);
vec_reset_length (s);
}
@@ -602,28 +603,28 @@
dev->numa_node = -1;
vec_reset_length (f);
f = format (f, "%v/numa_node%c", dev_dir_name, 0);
- vlib_sysfs_read ((char *) f, "%u", &dev->numa_node);
+ clib_sysfs_read ((char *) f, "%u", &dev->numa_node);
vec_reset_length (f);
f = format (f, "%v/class%c", dev_dir_name, 0);
- vlib_sysfs_read ((char *) f, "0x%x", &tmp);
+ clib_sysfs_read ((char *) f, "0x%x", &tmp);
dev->device_class = tmp >> 8;
vec_reset_length (f);
f = format (f, "%v/vendor%c", dev_dir_name, 0);
- vlib_sysfs_read ((char *) f, "0x%x", &tmp);
+ clib_sysfs_read ((char *) f, "0x%x", &tmp);
dev->vendor_id = tmp;
vec_reset_length (f);
f = format (f, "%v/device%c", dev_dir_name, 0);
- vlib_sysfs_read ((char *) f, "0x%x", &tmp);
+ clib_sysfs_read ((char *) f, "0x%x", &tmp);
dev->device_id = tmp;
error = init_device (vm, dev, &pdev);
vec_reset_length (f);
f = format (f, "%v/driver%c", dev_dir_name, 0);
- dev->driver_name = vlib_sysfs_link_to_name ((char *) f);
+ dev->driver_name = clib_sysfs_link_to_name ((char *) f);
done:
vec_free (f);
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
index d8c5dc9..3cc42a0 100644
--- a/src/vlib/linux/physmem.c
+++ b/src/vlib/linux/physmem.c
@@ -43,14 +43,12 @@
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
-#include <numa.h>
-#include <numaif.h>
+#include <vppinfra/linux/syscall.h>
+#include <vppinfra/linux/sysfs.h>
#include <vlib/vlib.h>
#include <vlib/physmem.h>
#include <vlib/unix/unix.h>
-#include <vlib/linux/syscall.h>
-#include <vlib/linux/sysfs.h>
static void *
unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx,
@@ -111,31 +109,6 @@
mheap_put (pr->heap, x - pr->heap);
}
-static u64
-get_page_paddr (int fd, uword addr)
-{
- int pagesize = sysconf (_SC_PAGESIZE);
- u64 seek, pagemap = 0;
-
- seek = ((u64) addr / pagesize) * sizeof (u64);
- if (lseek (fd, seek, SEEK_SET) != seek)
- {
- clib_unix_warning ("lseek to 0x%llx", seek);
- return 0;
- }
- if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
- {
- clib_unix_warning ("read ptbits");
- return 0;
- }
- if ((pagemap & (1ULL << 63)) == 0)
- return 0;
-
- pagemap &= pow2_mask (55);
-
- return pagemap * pagesize;
-}
-
static clib_error_t *
unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size,
u8 numa_node, u32 flags,
@@ -144,13 +117,8 @@
vlib_physmem_main_t *vpm = &vm->physmem_main;
vlib_physmem_region_t *pr;
clib_error_t *error = 0;
- int pagemap_fd = -1;
- u8 *mount_dir = 0;
- u8 *filename = 0;
- struct stat st;
- int old_mpol;
- int mmap_flags;
- struct bitmask *old_mask = numa_allocate_nodemask ();
+ clib_mem_vm_alloc_t alloc = { 0 };
+
if (geteuid () != 0 && (flags & VLIB_PHYSMEM_F_FAKE) == 0)
return clib_error_return (0, "not allowed");
@@ -163,113 +131,32 @@
goto error;
}
- pr->index = pr - vpm->regions;
- pr->fd = -1;
- pr->flags = flags;
-
- if (get_mempolicy (&old_mpol, old_mask->maskp, old_mask->size + 1, NULL, 0)
- == -1)
- {
- if ((flags & VLIB_PHYSMEM_F_FAKE) == 0)
- {
- error = clib_error_return_unix (0, "get_mempolicy");
- goto error;
- }
- else
- old_mpol = -1;
- }
+ alloc.name = name;
+ alloc.size = size;
+ alloc.numa_node = numa_node;
+ alloc.flags = CLIB_MEM_VM_F_SHARED;
if ((flags & VLIB_PHYSMEM_F_FAKE) == 0)
{
- if ((pagemap_fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
- {
- error = clib_error_return_unix (0, "open '/proc/self/pagemap'");
- goto error;
- }
-
- mount_dir = format (0, "%s/physmem_region%d%c",
- vlib_unix_get_runtime_dir (), pr->index, 0);
- filename = format (0, "%s/mem%c", mount_dir, 0);
-
- unlink ((char *) mount_dir);
-
- error = vlib_unix_recursive_mkdir ((char *) mount_dir);
- if (error)
- goto error;
-
- if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL))
- {
- error = clib_error_return_unix (0, "mount hugetlb directory '%s'",
- mount_dir);
- goto error;
- }
-
- if ((pr->fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
- {
- error = clib_error_return_unix (0, "open");
- goto error;
- }
-
- mmap_flags = MAP_SHARED | MAP_HUGETLB | MAP_LOCKED;
+ alloc.flags |= CLIB_MEM_VM_F_HUGETLB;
+ alloc.flags |= CLIB_MEM_VM_F_HUGETLB_PREALLOC;
+ alloc.flags |= CLIB_MEM_VM_F_NUMA_FORCE;
}
else
{
- if ((pr->fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
- return clib_error_return_unix (0, "memfd_create");
-
- if ((fcntl (pr->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
- {
- error =
- clib_error_return_unix (0, "fcntl (F_ADD_SEALS, F_SEAL_SHRINK)");
- goto error;
- }
- mmap_flags = MAP_SHARED;
+ alloc.flags |= CLIB_MEM_VM_F_NUMA_PREFER;
}
- if (fstat (pr->fd, &st))
- {
- error = clib_error_return_unix (0, "fstat");
- goto error;
- }
+ error = clib_mem_vm_ext_alloc (&alloc);
+ if (error)
+ goto error;
- pr->log2_page_size = min_log2 (st.st_blksize);
- pr->n_pages = ((size - 1) >> pr->log2_page_size) + 1;
- size = pr->n_pages * (1 << pr->log2_page_size);
-
- if ((ftruncate (pr->fd, size)) == -1)
- {
- error = clib_error_return_unix (0, "ftruncate length: %d", size);
- goto error;
- }
-
- if ((flags & VLIB_PHYSMEM_F_FAKE) == 0)
- {
- error = vlib_sysfs_prealloc_hugepages (numa_node,
- 1 << (pr->log2_page_size - 10),
- pr->n_pages);
- if (error)
- goto error;
- }
-
- if (old_mpol != -1)
- numa_set_preferred (numa_node);
-
- pr->mem = mmap (0, size, (PROT_READ | PROT_WRITE), mmap_flags, pr->fd, 0);
-
- if (pr->mem == MAP_FAILED)
- {
- pr->mem = 0;
- error = clib_error_return_unix (0, "mmap");
- goto error;
- }
-
- if (old_mpol != -1 &&
- set_mempolicy (old_mpol, old_mask->maskp, old_mask->size + 1) == -1)
- {
- error = clib_error_return_unix (0, "set_mempolicy");
- goto error;
- }
-
+ pr->index = pr - vpm->regions;
+ pr->flags = flags;
+ pr->fd = alloc.fd;
+ pr->mem = alloc.addr;
+ pr->log2_page_size = alloc.log2_page_size;
+ pr->n_pages = alloc.n_pages;
pr->size = pr->n_pages << pr->log2_page_size;
pr->page_mask = (1 << pr->log2_page_size) - 1;
pr->numa_node = numa_node;
@@ -285,13 +172,14 @@
move_pages (0, 1, &ptr, 0, &node, 0);
if (numa_node != node)
{
- clib_warning
- ("physmem page for region \'%s\' allocated on the wrong"
- " numa node (requested %u actual %u)", pr->name,
- pr->numa_node, node, i);
+ clib_warning ("physmem page for region \'%s\' allocated on the"
+ " wrong numa node (requested %u actual %u)",
+ pr->name, pr->numa_node, node, i);
break;
}
}
+ pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size,
+ pr->n_pages);
}
if (flags & VLIB_PHYSMEM_F_INIT_MHEAP)
@@ -309,41 +197,13 @@
*idx = pr->index;
- if ((flags & VLIB_PHYSMEM_F_FAKE) == 0)
- {
- int i;
- for (i = 0; i < pr->n_pages; i++)
- {
- uword vaddr =
- pointer_to_uword (pr->mem) + (((u64) i) << pr->log2_page_size);
- u64 page_paddr = get_page_paddr (pagemap_fd, vaddr);
- vec_add1 (pr->page_table, page_paddr);
- }
- }
-
goto done;
error:
- if (pr->fd > -1)
- close (pr->fd);
-
- if (pr->mem)
- munmap (pr->mem, size);
-
memset (pr, 0, sizeof (*pr));
pool_put (vpm->regions, pr);
done:
- if (mount_dir)
- {
- umount2 ((char *) mount_dir, MNT_DETACH);
- rmdir ((char *) mount_dir);
- vec_free (mount_dir);
- }
- numa_free_cpumask (old_mask);
- vec_free (filename);
- if (pagemap_fd > -1)
- close (pagemap_fd);
return error;
}
diff --git a/src/vlib/linux/syscall.h b/src/vlib/linux/syscall.h
deleted file mode 100644
index 9e37997..0000000
--- a/src/vlib/linux/syscall.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_linux_syscall_h
-#define included_linux_syscall_h
-
-#ifndef __NR_memfd_create
-#if defined __x86_64__
-#define __NR_memfd_create 319
-#elif defined __arm__
-#define __NR_memfd_create 385
-#elif defined __aarch64__
-#define __NR_memfd_create 279
-#else
-#error "__NR_memfd_create unknown for this architecture"
-#endif
-#endif
-
-static inline int
-memfd_create (const char *name, unsigned int flags)
-{
- return syscall (__NR_memfd_create, name, flags);
-}
-
-#ifndef F_LINUX_SPECIFIC_BASE
-#define F_LINUX_SPECIFIC_BASE 1024
-#endif
-#define MFD_ALLOW_SEALING 0x0002U
-#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
-#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
-
-#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
-#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
-#define F_SEAL_GROW 0x0004 /* prevent file from growing */
-#define F_SEAL_WRITE 0x0008 /* prevent writes */
-
-
-#endif /* included_linux_syscall_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vlib/linux/sysfs.c b/src/vlib/linux/sysfs.c
deleted file mode 100644
index f92f9ef..0000000
--- a/src/vlib/linux/sysfs.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <dirent.h>
-
-clib_error_t *
-vlib_sysfs_write (char *file_name, char *fmt, ...)
-{
- u8 *s;
- int fd;
- clib_error_t *error = 0;
-
- fd = open (file_name, O_WRONLY);
- if (fd < 0)
- return clib_error_return_unix (0, "open `%s'", file_name);
-
- va_list va;
- va_start (va, fmt);
- s = va_format (0, fmt, &va);
- va_end (va);
-
- if (write (fd, s, vec_len (s)) < 0)
- error = clib_error_return_unix (0, "write `%s'", file_name);
-
- vec_free (s);
- close (fd);
- return error;
-}
-
-clib_error_t *
-vlib_sysfs_read (char *file_name, char *fmt, ...)
-{
- unformat_input_t input;
- u8 *s = 0;
- int fd;
- ssize_t sz;
- uword result;
-
- fd = open (file_name, O_RDONLY);
- if (fd < 0)
- return clib_error_return_unix (0, "open `%s'", file_name);
-
- vec_validate (s, 4095);
-
- sz = read (fd, s, vec_len (s));
- if (sz < 0)
- {
- close (fd);
- vec_free (s);
- return clib_error_return_unix (0, "read `%s'", file_name);
- }
-
- _vec_len (s) = sz;
- unformat_init_vector (&input, s);
-
- va_list va;
- va_start (va, fmt);
- result = va_unformat (&input, fmt, &va);
- va_end (va);
-
- vec_free (s);
- close (fd);
-
- if (result == 0)
- return clib_error_return (0, "unformat error");
-
- return 0;
-}
-
-u8 *
-vlib_sysfs_link_to_name (char *link)
-{
- char *p, buffer[64];
- unformat_input_t in;
- u8 *s = 0;
- int r;
-
- r = readlink (link, buffer, sizeof (buffer) - 1);
-
- if (r < 0)
- return 0;
-
- buffer[r] = 0;
- p = strrchr (buffer, '/');
-
- if (!p)
- return 0;
-
- unformat_init_string (&in, p + 1, strlen (p + 1));
- if (unformat (&in, "%s", &s) != 1)
- clib_unix_warning ("no string?");
- unformat_free (&in);
-
- return s;
-}
-
-clib_error_t *
-vlib_sysfs_set_nr_hugepages (unsigned int numa_node, int page_size, int nr)
-{
- clib_error_t *error = 0;
- struct stat sb;
- u8 *p = 0;
-
- p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0);
-
- if (stat ((char *) p, &sb) == 0)
- {
- if (S_ISDIR (sb.st_mode) == 0)
- {
- error = clib_error_return (0, "'%s' is not directory", p);
- goto done;
- }
- }
- else if (numa_node == 0)
- {
- vec_reset_length (p);
- p = format (p, "/sys/kernel/mm%c", 0);
- if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0)
- {
- error = clib_error_return (0, "'%s' does not exist or it is not "
- "directory", p);
- goto done;
- }
- }
- else
- {
- error = clib_error_return (0, "'%s' does not exist", p);
- goto done;
- }
-
- _vec_len (p) -= 1;
- p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0);
- vlib_sysfs_write ((char *) p, "%d", nr);
-
-done:
- vec_free (p);
- return error;
-}
-
-
-static clib_error_t *
-vlib_sysfs_get_xxx_hugepages (char *type, unsigned int numa_node,
- int page_size, int *val)
-{
- clib_error_t *error = 0;
- struct stat sb;
- u8 *p = 0;
-
- p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0);
-
- if (stat ((char *) p, &sb) == 0)
- {
- if (S_ISDIR (sb.st_mode) == 0)
- {
- error = clib_error_return (0, "'%s' is not directory", p);
- goto done;
- }
- }
- else if (numa_node == 0)
- {
- vec_reset_length (p);
- p = format (p, "/sys/kernel/mm%c", 0);
- if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0)
- {
- error = clib_error_return (0, "'%s' does not exist or it is not "
- "directory", p);
- goto done;
- }
- }
- else
- {
- error = clib_error_return (0, "'%s' does not exist", p);
- goto done;
- }
-
- _vec_len (p) -= 1;
- p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size,
- type, 0);
- error = vlib_sysfs_read ((char *) p, "%d", val);
-
-done:
- vec_free (p);
- return error;
-}
-
-clib_error_t *
-vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size, int *v)
-{
- return vlib_sysfs_get_xxx_hugepages ("free", numa_node, page_size, v);
-}
-
-clib_error_t *
-vlib_sysfs_get_nr_hugepages (unsigned int numa_node, int page_size, int *v)
-{
- return vlib_sysfs_get_xxx_hugepages ("nr", numa_node, page_size, v);
-}
-
-clib_error_t *
-vlib_sysfs_get_surplus_hugepages (unsigned int numa_node, int page_size,
- int *v)
-{
- return vlib_sysfs_get_xxx_hugepages ("surplus", numa_node, page_size, v);
-}
-
-clib_error_t *
-vlib_sysfs_prealloc_hugepages (unsigned int numa_node, int page_size, int nr)
-{
- clib_error_t *error = 0;
- int n, needed;
- error = vlib_sysfs_get_free_hugepages (numa_node, page_size, &n);
- if (error)
- return error;
- needed = nr - n;
- if (needed <= 0)
- return 0;
-
- error = vlib_sysfs_get_nr_hugepages (numa_node, page_size, &n);
- if (error)
- return error;
- clib_warning ("pre-allocating %u additional %uK hugepages on numa node %u",
- needed, page_size, numa_node);
- return vlib_sysfs_set_nr_hugepages (numa_node, page_size, n + needed);
-}
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vlib/linux/sysfs.h b/src/vlib/linux/sysfs.h
deleted file mode 100644
index 14b7131..0000000
--- a/src/vlib/linux/sysfs.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_linux_sysfs_h
-#define included_linux_sysfs_h
-
-clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...);
-
-clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...);
-
-u8 *vlib_sysfs_link_to_name (char *link);
-
-clib_error_t *vlib_sysfs_set_nr_hugepages (unsigned int numa_node,
- int page_size, int nr);
-clib_error_t *vlib_sysfs_get_nr_hugepages (unsigned int numa_node,
- int page_size, int *v);
-clib_error_t *vlib_sysfs_get_free_hugepages (unsigned int numa_node,
- int page_size, int *v);
-clib_error_t *vlib_sysfs_get_surplus_hugepages (unsigned int numa_node,
- int page_size, int *v);
-clib_error_t *vlib_sysfs_prealloc_hugepages (unsigned int numa_node,
- int page_size, int nr);
-
-#endif /* included_linux_sysfs_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */