blob: 27a5bacffb046431444f73cfc58efec32750fede [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * physmem.c: Unix physical memory
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/unix/physmem.h>
41
42static physmem_main_t physmem_main;
43
44static void *
Dave Barach9b8ffd92016-07-08 08:13:45 -040045unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes,
46 uword alignment)
Ed Warnickecb9cada2015-12-08 15:45:58 -070047{
Dave Barach9b8ffd92016-07-08 08:13:45 -040048 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070049 uword lo_offset, hi_offset;
Dave Barach9b8ffd92016-07-08 08:13:45 -040050 uword *to_free = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070051
Ed Warnickecb9cada2015-12-08 15:45:58 -070052 /* IO memory is always at least cache aligned. */
53 alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
54
55 while (1)
56 {
57 mheap_get_aligned (pm->heap, n_bytes,
58 /* align */ alignment,
59 /* align offset */ 0,
60 &lo_offset);
61
62 /* Allocation failed? */
63 if (lo_offset == ~0)
64 break;
65
66 /* Make sure allocation does not span DMA physical chunk boundary. */
67 hi_offset = lo_offset + n_bytes - 1;
68
69 if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
70 (hi_offset >> vpm->log2_n_bytes_per_page))
71 break;
72
73 /* Allocation would span chunk boundary, queue it to be freed as soon as
Dave Barach9b8ffd92016-07-08 08:13:45 -040074 we find suitable chunk. */
Ed Warnickecb9cada2015-12-08 15:45:58 -070075 vec_add1 (to_free, lo_offset);
76 }
77
78 if (to_free != 0)
79 {
80 uword i;
81 for (i = 0; i < vec_len (to_free); i++)
82 mheap_put (pm->heap, to_free[i]);
83 vec_free (to_free);
84 }
85
86 return lo_offset != ~0 ? pm->heap + lo_offset : 0;
87}
88
Dave Barach9b8ffd92016-07-08 08:13:45 -040089static void
90unix_physmem_free (void *x)
Ed Warnickecb9cada2015-12-08 15:45:58 -070091{
Dave Barach9b8ffd92016-07-08 08:13:45 -040092 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070093
94 /* Return object to region's heap. */
95 mheap_put (pm->heap, x - pm->heap);
96}
97
Dave Barach9b8ffd92016-07-08 08:13:45 -040098static void
99htlb_shutdown (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700100{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400101 physmem_main_t *pm = &physmem_main;
102
103 if (!pm->shmid)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700104 return;
105 shmctl (pm->shmid, IPC_RMID, 0);
106 pm->shmid = 0;
107}
108
109/* try to use huge TLB pgs if possible */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400110static int
111htlb_init (vlib_main_t * vm)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700112{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400113 vlib_physmem_main_t *vpm = &vm->physmem_main;
114 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700115 u64 hugepagesize, pagesize;
116 u64 pfn, seek_loc;
117 u64 cur, physaddr, ptbits;
118 int fd, i;
119
Dave Barach9b8ffd92016-07-08 08:13:45 -0400120 pm->shmid = shmget (11 /* key, my amp goes to 11 */ , pm->mem_size,
121 IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700122 if (pm->shmid < 0)
123 {
124 clib_unix_warning ("shmget");
125 return 0;
126 }
127
Dave Barach9b8ffd92016-07-08 08:13:45 -0400128 pm->mem = shmat (pm->shmid, NULL, 0 /* flags */ );
Ed Warnickecb9cada2015-12-08 15:45:58 -0700129 if (pm->mem == 0)
130 {
131 shmctl (pm->shmid, IPC_RMID, 0);
132 return 0;
133 }
134
135 memset (pm->mem, 0, pm->mem_size);
136
137 /* $$$ get page size info from /proc/meminfo */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400138 hugepagesize = 2 << 20;
139 pagesize = 4 << 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140 vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
141 vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
142
143 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
144 vpm->virtual.start = pointer_to_uword (pm->mem);
145 vpm->virtual.size = pm->mem_size;
146 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
147
Dave Barach9b8ffd92016-07-08 08:13:45 -0400148 fd = open ("/proc/self/pagemap", O_RDONLY);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700149
Dave Barach9b8ffd92016-07-08 08:13:45 -0400150 if (fd < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700151 {
152 (void) shmdt (pm->mem);
153 return 0;
154 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400155
156 pm->heap = mheap_alloc_with_flags (pm->mem, pm->mem_size,
157 /* Don't want mheap mmap/munmap with IO memory. */
Damjan Marion6b0f5892017-07-27 04:01:24 -0400158 MHEAP_FLAG_DISABLE_VM |
159 MHEAP_FLAG_THREAD_SAFE);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400160
161 cur = pointer_to_uword (pm->mem);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700162 i = 0;
163
Dave Barach9b8ffd92016-07-08 08:13:45 -0400164 while (cur < pointer_to_uword (pm->mem) + pm->mem_size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700165 {
166 pfn = (u64) cur / pagesize;
167 seek_loc = pfn * sizeof (u64);
168 if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400169 {
170 clib_unix_warning ("lseek to 0x%llx", seek_loc);
171 shmctl (pm->shmid, IPC_RMID, 0);
172 close (fd);
173 return 0;
174 }
175 if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof (ptbits)))
176 {
177 clib_unix_warning ("read ptbits");
178 shmctl (pm->shmid, IPC_RMID, 0);
179 close (fd);
180 return 0;
181 }
182
Ed Warnickecb9cada2015-12-08 15:45:58 -0700183 /* bits 0-54 are the physical page number */
184 physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
185 if (CLIB_DEBUG > 1)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400186 fformat (stderr, "pm: virtual 0x%llx physical 0x%llx\n",
187 cur, physaddr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700188 vpm->page_table[i++] = physaddr;
189
190 cur += hugepagesize;
191 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400192 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700193 atexit (htlb_shutdown);
194 return 1;
195}
196
Dave Barach9b8ffd92016-07-08 08:13:45 -0400197int vlib_app_physmem_init (vlib_main_t * vm,
198 physmem_main_t * pm, int) __attribute__ ((weak));
199int
200vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700201{
202 return 0;
203}
204
Dave Barach9b8ffd92016-07-08 08:13:45 -0400205clib_error_t *
206unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700207{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400208 vlib_physmem_main_t *vpm = &vm->physmem_main;
209 physmem_main_t *pm = &physmem_main;
210 clib_error_t *error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700211
212 /* Avoid multiple calls. */
213 if (vm->os_physmem_alloc_aligned)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400214 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700215
216 vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
217 vm->os_physmem_free = unix_physmem_free;
218 pm->mem = MAP_FAILED;
219
220 if (pm->mem_size == 0)
221 pm->mem_size = 16 << 20;
222
223 /* OK, Mr. App, you tell us */
224 if (vlib_app_physmem_init (vm, pm, physical_memory_required))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400225 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700226
Dave Barach9b8ffd92016-07-08 08:13:45 -0400227 if (!pm->no_hugepages && htlb_init (vm))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700228 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400229 fformat (stderr, "%s: use huge pages\n", __FUNCTION__);
Damjan Marion5a206ea2016-05-12 22:11:03 +0200230 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700231 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700232
Dave Barach9b8ffd92016-07-08 08:13:45 -0400233 pm->mem =
234 mmap (0, pm->mem_size, PROT_READ | PROT_WRITE,
235 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
Damjan Marion5a206ea2016-05-12 22:11:03 +0200236 if (pm->mem == MAP_FAILED)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700237 {
Damjan Marion5a206ea2016-05-12 22:11:03 +0200238 error = clib_error_return_unix (0, "mmap");
239 goto done;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700240 }
Damjan Marion5a206ea2016-05-12 22:11:03 +0200241
242 pm->heap = mheap_alloc (pm->mem, pm->mem_size);
243
244 /* Identity map with a single page. */
245 vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
246 vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700247
Dave Barachbfdedbd2016-01-20 09:11:55 -0500248 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
249 vpm->virtual.start = pointer_to_uword (pm->mem);
250 vpm->virtual.size = pm->mem_size;
251 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
Damjan Marionb4d89272016-05-12 22:14:45 +0200252 vpm->is_fake = 1;
Dave Barachbfdedbd2016-01-20 09:11:55 -0500253
Dave Barach9b8ffd92016-07-08 08:13:45 -0400254 fformat (stderr, "%s: use fake dma pages\n", __FUNCTION__);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700255
Dave Barach9b8ffd92016-07-08 08:13:45 -0400256done:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700257 if (error)
258 {
259 if (pm->mem != MAP_FAILED)
260 munmap (pm->mem, pm->mem_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700261 }
262 return error;
263}
264
265static clib_error_t *
266show_physmem (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400267 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700268{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400269 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700270
271 if (pm->heap)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400272 vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700273 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400274 vlib_cli_output (vm, "No physmem allocated.");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700275 return 0;
276}
277
Dave Barach9b8ffd92016-07-08 08:13:45 -0400278/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700279VLIB_CLI_COMMAND (show_physmem_command, static) = {
280 .path = "show physmem",
281 .short_help = "Show physical memory allocation",
282 .function = show_physmem,
283};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400284/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700285
286static clib_error_t *
287show_affinity (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400288 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700289{
290 cpu_set_t set;
291 cpu_set_t *setp = &set;
292 int i, rv;
293 u8 *s = 0;
294 int first_set_bit_in_run = -1;
295 int last_set_bit_in_run = -1;
296 int output_done = 0;
297
Dave Barach9b8ffd92016-07-08 08:13:45 -0400298 rv = sched_getaffinity (0 /* pid, 0 = this proc */ ,
299 sizeof (*setp), setp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300 if (rv < 0)
301 {
302 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
Dave Barach9b8ffd92016-07-08 08:13:45 -0400303 strerror (errno));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700304 return 0;
305 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400306
Ed Warnickecb9cada2015-12-08 15:45:58 -0700307 for (i = 0; i < 64; i++)
308 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400309 if (CPU_ISSET (i, setp))
310 {
311 if (first_set_bit_in_run == -1)
312 {
313 first_set_bit_in_run = i;
314 last_set_bit_in_run = i;
315 if (output_done)
316 s = format (s, ",");
317 s = format (s, "%d-", i);
318 output_done = 1;
319 }
320 else
321 {
322 if (i == (last_set_bit_in_run + 1))
323 last_set_bit_in_run = i;
324 }
325 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700326 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400327 {
328 if (first_set_bit_in_run != -1)
329 {
330 if (first_set_bit_in_run == (i - 1))
331 {
332 _vec_len (s) -= 2 + ((first_set_bit_in_run / 10));
333 }
334 s = format (s, "%d", last_set_bit_in_run);
335 first_set_bit_in_run = -1;
336 last_set_bit_in_run = -1;
337 }
338 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700339 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400340
341 if (first_set_bit_in_run != -1)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700342 s = format (s, "%d", first_set_bit_in_run);
343
344 vlib_cli_output (vm, "Process runs on: %v", s);
345 return 0;
346}
347
Dave Barach9b8ffd92016-07-08 08:13:45 -0400348/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700349VLIB_CLI_COMMAND (show_affinity_command, static) = {
350 .path = "show affinity",
351 .short_help = "Show process cpu affinity",
352 .function = show_affinity,
353};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400354/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700355
356static clib_error_t *
357set_affinity (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400358 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700359{
360 cpu_set_t set;
361 cpu_set_t *setp = &set;
362 int i, rv;
363 int another_round;
364 u32 first, last;
365
366 memset (setp, 0, sizeof (*setp));
367
Dave Barach9b8ffd92016-07-08 08:13:45 -0400368 do
369 {
370 another_round = 0;
371 if (unformat (input, "%d-%d,", &first, &last))
372 {
373 if (first > 64 || last > 64)
374 {
375 barf1:
376 vlib_cli_output (vm, "range %d-%d invalid", first, last);
377 return 0;
378 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700379
Dave Barach9b8ffd92016-07-08 08:13:45 -0400380 for (i = first; i <= last; i++)
381 CPU_SET (i, setp);
382 another_round = 1;
383 }
384 else if (unformat (input, "%d-%d", &first, &last))
385 {
386 if (first > 64 || last > 64)
387 goto barf1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700388
Dave Barach9b8ffd92016-07-08 08:13:45 -0400389 for (i = first; i <= last; i++)
390 CPU_SET (i, setp);
391 }
392 else if (unformat (input, "%d,", &first))
393 {
394 if (first > 64)
395 {
396 barf2:
397 vlib_cli_output (vm, "cpu %d invalid", first);
398 return 0;
399 }
400 CPU_SET (first, setp);
401 another_round = 1;
402 }
403 else if (unformat (input, "%d", &first))
404 {
405 if (first > 64)
406 goto barf2;
407
408 CPU_SET (first, setp);
409 }
410 }
411 while (another_round);
412
413 rv = sched_setaffinity (0 /* pid, 0 = this proc */ ,
414 sizeof (*setp), setp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700415
416 if (rv < 0)
417 {
418 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
Dave Barach9b8ffd92016-07-08 08:13:45 -0400419 strerror (errno));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700420 return 0;
421 }
422 return show_affinity (vm, input, cmd);
423}
424
Dave Barach9b8ffd92016-07-08 08:13:45 -0400425/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700426VLIB_CLI_COMMAND (set_affinity_command, static) = {
427 .path = "set affinity",
428 .short_help = "Set process cpu affinity",
429 .function = set_affinity,
430};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400431/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700432
433static clib_error_t *
434vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
435{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400436 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700437 u32 size_in_mb;
438
439 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
440 {
441 if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400442 pm->no_hugepages = 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700443
Dave Barach9b8ffd92016-07-08 08:13:45 -0400444 else if (unformat (input, "size-in-mb %d", &size_in_mb) ||
445 unformat (input, "size %d", &size_in_mb))
446 pm->mem_size = size_in_mb << 20;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700447 else
448 return unformat_parse_error (input);
449 }
450
451 unformat_free (input);
452 return 0;
453}
454
455VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");
Dave Barach9b8ffd92016-07-08 08:13:45 -0400456
457/*
458 * fd.io coding-style-patch-verification: ON
459 *
460 * Local Variables:
461 * eval: (c-set-style "gnu")
462 * End:
463 */