blob: 80ab7b9d6f89ab27f7b764a603f26230e6da3bb9 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * physmem.c: Unix physical memory
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/unix/physmem.h>
41
42static physmem_main_t physmem_main;
43
44static void *
Dave Barach9b8ffd92016-07-08 08:13:45 -040045unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes,
46 uword alignment)
Ed Warnickecb9cada2015-12-08 15:45:58 -070047{
Dave Barach9b8ffd92016-07-08 08:13:45 -040048 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070049 uword lo_offset, hi_offset;
Dave Barach9b8ffd92016-07-08 08:13:45 -040050 uword *to_free = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070051
52#if DPDK > 0
53 clib_warning ("unsafe alloc!");
54#endif
55
56 /* IO memory is always at least cache aligned. */
57 alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
58
59 while (1)
60 {
61 mheap_get_aligned (pm->heap, n_bytes,
62 /* align */ alignment,
63 /* align offset */ 0,
64 &lo_offset);
65
66 /* Allocation failed? */
67 if (lo_offset == ~0)
68 break;
69
70 /* Make sure allocation does not span DMA physical chunk boundary. */
71 hi_offset = lo_offset + n_bytes - 1;
72
73 if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
74 (hi_offset >> vpm->log2_n_bytes_per_page))
75 break;
76
77 /* Allocation would span chunk boundary, queue it to be freed as soon as
Dave Barach9b8ffd92016-07-08 08:13:45 -040078 we find suitable chunk. */
Ed Warnickecb9cada2015-12-08 15:45:58 -070079 vec_add1 (to_free, lo_offset);
80 }
81
82 if (to_free != 0)
83 {
84 uword i;
85 for (i = 0; i < vec_len (to_free); i++)
86 mheap_put (pm->heap, to_free[i]);
87 vec_free (to_free);
88 }
89
90 return lo_offset != ~0 ? pm->heap + lo_offset : 0;
91}
92
Dave Barach9b8ffd92016-07-08 08:13:45 -040093static void
94unix_physmem_free (void *x)
Ed Warnickecb9cada2015-12-08 15:45:58 -070095{
Dave Barach9b8ffd92016-07-08 08:13:45 -040096 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070097
98 /* Return object to region's heap. */
99 mheap_put (pm->heap, x - pm->heap);
100}
101
Dave Barach9b8ffd92016-07-08 08:13:45 -0400102static void
103htlb_shutdown (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700104{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400105 physmem_main_t *pm = &physmem_main;
106
107 if (!pm->shmid)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700108 return;
109 shmctl (pm->shmid, IPC_RMID, 0);
110 pm->shmid = 0;
111}
112
113/* try to use huge TLB pgs if possible */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400114static int
115htlb_init (vlib_main_t * vm)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400117 vlib_physmem_main_t *vpm = &vm->physmem_main;
118 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700119 u64 hugepagesize, pagesize;
120 u64 pfn, seek_loc;
121 u64 cur, physaddr, ptbits;
122 int fd, i;
123
Dave Barach9b8ffd92016-07-08 08:13:45 -0400124 pm->shmid = shmget (11 /* key, my amp goes to 11 */ , pm->mem_size,
125 IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126 if (pm->shmid < 0)
127 {
128 clib_unix_warning ("shmget");
129 return 0;
130 }
131
Dave Barach9b8ffd92016-07-08 08:13:45 -0400132 pm->mem = shmat (pm->shmid, NULL, 0 /* flags */ );
Ed Warnickecb9cada2015-12-08 15:45:58 -0700133 if (pm->mem == 0)
134 {
135 shmctl (pm->shmid, IPC_RMID, 0);
136 return 0;
137 }
138
139 memset (pm->mem, 0, pm->mem_size);
140
141 /* $$$ get page size info from /proc/meminfo */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400142 hugepagesize = 2 << 20;
143 pagesize = 4 << 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700144 vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
145 vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
146
147 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
148 vpm->virtual.start = pointer_to_uword (pm->mem);
149 vpm->virtual.size = pm->mem_size;
150 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
151
Dave Barach9b8ffd92016-07-08 08:13:45 -0400152 fd = open ("/proc/self/pagemap", O_RDONLY);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700153
Dave Barach9b8ffd92016-07-08 08:13:45 -0400154 if (fd < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700155 {
156 (void) shmdt (pm->mem);
157 return 0;
158 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400159
160 pm->heap = mheap_alloc_with_flags (pm->mem, pm->mem_size,
161 /* Don't want mheap mmap/munmap with IO memory. */
162 MHEAP_FLAG_DISABLE_VM);
163
164 cur = pointer_to_uword (pm->mem);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700165 i = 0;
166
Dave Barach9b8ffd92016-07-08 08:13:45 -0400167 while (cur < pointer_to_uword (pm->mem) + pm->mem_size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700168 {
169 pfn = (u64) cur / pagesize;
170 seek_loc = pfn * sizeof (u64);
171 if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400172 {
173 clib_unix_warning ("lseek to 0x%llx", seek_loc);
174 shmctl (pm->shmid, IPC_RMID, 0);
175 close (fd);
176 return 0;
177 }
178 if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof (ptbits)))
179 {
180 clib_unix_warning ("read ptbits");
181 shmctl (pm->shmid, IPC_RMID, 0);
182 close (fd);
183 return 0;
184 }
185
Ed Warnickecb9cada2015-12-08 15:45:58 -0700186 /* bits 0-54 are the physical page number */
187 physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
188 if (CLIB_DEBUG > 1)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400189 fformat (stderr, "pm: virtual 0x%llx physical 0x%llx\n",
190 cur, physaddr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700191 vpm->page_table[i++] = physaddr;
192
193 cur += hugepagesize;
194 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400195 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700196 atexit (htlb_shutdown);
197 return 1;
198}
199
Dave Barach9b8ffd92016-07-08 08:13:45 -0400200int vlib_app_physmem_init (vlib_main_t * vm,
201 physmem_main_t * pm, int) __attribute__ ((weak));
202int
203vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700204{
205 return 0;
206}
207
Dave Barach9b8ffd92016-07-08 08:13:45 -0400208clib_error_t *
209unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700210{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400211 vlib_physmem_main_t *vpm = &vm->physmem_main;
212 physmem_main_t *pm = &physmem_main;
213 clib_error_t *error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700214
215 /* Avoid multiple calls. */
216 if (vm->os_physmem_alloc_aligned)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400217 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700218
219 vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
220 vm->os_physmem_free = unix_physmem_free;
221 pm->mem = MAP_FAILED;
222
223 if (pm->mem_size == 0)
224 pm->mem_size = 16 << 20;
225
226 /* OK, Mr. App, you tell us */
227 if (vlib_app_physmem_init (vm, pm, physical_memory_required))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400228 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229
Dave Barach9b8ffd92016-07-08 08:13:45 -0400230 if (!pm->no_hugepages && htlb_init (vm))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700231 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400232 fformat (stderr, "%s: use huge pages\n", __FUNCTION__);
Damjan Marion5a206ea2016-05-12 22:11:03 +0200233 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700234 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700235
Dave Barach9b8ffd92016-07-08 08:13:45 -0400236 pm->mem =
237 mmap (0, pm->mem_size, PROT_READ | PROT_WRITE,
238 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
Damjan Marion5a206ea2016-05-12 22:11:03 +0200239 if (pm->mem == MAP_FAILED)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700240 {
Damjan Marion5a206ea2016-05-12 22:11:03 +0200241 error = clib_error_return_unix (0, "mmap");
242 goto done;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700243 }
Damjan Marion5a206ea2016-05-12 22:11:03 +0200244
245 pm->heap = mheap_alloc (pm->mem, pm->mem_size);
246
247 /* Identity map with a single page. */
248 vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
249 vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700250
Dave Barachbfdedbd2016-01-20 09:11:55 -0500251 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
252 vpm->virtual.start = pointer_to_uword (pm->mem);
253 vpm->virtual.size = pm->mem_size;
254 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
Damjan Marionb4d89272016-05-12 22:14:45 +0200255 vpm->is_fake = 1;
Dave Barachbfdedbd2016-01-20 09:11:55 -0500256
Dave Barach9b8ffd92016-07-08 08:13:45 -0400257 fformat (stderr, "%s: use fake dma pages\n", __FUNCTION__);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258
Dave Barach9b8ffd92016-07-08 08:13:45 -0400259done:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700260 if (error)
261 {
262 if (pm->mem != MAP_FAILED)
263 munmap (pm->mem, pm->mem_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700264 }
265 return error;
266}
267
268static clib_error_t *
269show_physmem (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400270 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700271{
272#if DPDK > 0
Dave Barach9b8ffd92016-07-08 08:13:45 -0400273 vlib_cli_output (vm, "Not supported with DPDK drivers.");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700274#else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400275 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700276
277 if (pm->heap)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400278 vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700279 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400280 vlib_cli_output (vm, "No physmem allocated.");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700281#endif
282 return 0;
283}
284
Dave Barach9b8ffd92016-07-08 08:13:45 -0400285/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700286VLIB_CLI_COMMAND (show_physmem_command, static) = {
287 .path = "show physmem",
288 .short_help = "Show physical memory allocation",
289 .function = show_physmem,
290};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400291/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700292
293static clib_error_t *
294show_affinity (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400295 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700296{
297 cpu_set_t set;
298 cpu_set_t *setp = &set;
299 int i, rv;
300 u8 *s = 0;
301 int first_set_bit_in_run = -1;
302 int last_set_bit_in_run = -1;
303 int output_done = 0;
304
Dave Barach9b8ffd92016-07-08 08:13:45 -0400305 rv = sched_getaffinity (0 /* pid, 0 = this proc */ ,
306 sizeof (*setp), setp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700307 if (rv < 0)
308 {
309 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
Dave Barach9b8ffd92016-07-08 08:13:45 -0400310 strerror (errno));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311 return 0;
312 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400313
Ed Warnickecb9cada2015-12-08 15:45:58 -0700314 for (i = 0; i < 64; i++)
315 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400316 if (CPU_ISSET (i, setp))
317 {
318 if (first_set_bit_in_run == -1)
319 {
320 first_set_bit_in_run = i;
321 last_set_bit_in_run = i;
322 if (output_done)
323 s = format (s, ",");
324 s = format (s, "%d-", i);
325 output_done = 1;
326 }
327 else
328 {
329 if (i == (last_set_bit_in_run + 1))
330 last_set_bit_in_run = i;
331 }
332 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700333 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400334 {
335 if (first_set_bit_in_run != -1)
336 {
337 if (first_set_bit_in_run == (i - 1))
338 {
339 _vec_len (s) -= 2 + ((first_set_bit_in_run / 10));
340 }
341 s = format (s, "%d", last_set_bit_in_run);
342 first_set_bit_in_run = -1;
343 last_set_bit_in_run = -1;
344 }
345 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700346 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400347
348 if (first_set_bit_in_run != -1)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700349 s = format (s, "%d", first_set_bit_in_run);
350
351 vlib_cli_output (vm, "Process runs on: %v", s);
352 return 0;
353}
354
Dave Barach9b8ffd92016-07-08 08:13:45 -0400355/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700356VLIB_CLI_COMMAND (show_affinity_command, static) = {
357 .path = "show affinity",
358 .short_help = "Show process cpu affinity",
359 .function = show_affinity,
360};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400361/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700362
363static clib_error_t *
364set_affinity (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400365 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700366{
367 cpu_set_t set;
368 cpu_set_t *setp = &set;
369 int i, rv;
370 int another_round;
371 u32 first, last;
372
373 memset (setp, 0, sizeof (*setp));
374
Dave Barach9b8ffd92016-07-08 08:13:45 -0400375 do
376 {
377 another_round = 0;
378 if (unformat (input, "%d-%d,", &first, &last))
379 {
380 if (first > 64 || last > 64)
381 {
382 barf1:
383 vlib_cli_output (vm, "range %d-%d invalid", first, last);
384 return 0;
385 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700386
Dave Barach9b8ffd92016-07-08 08:13:45 -0400387 for (i = first; i <= last; i++)
388 CPU_SET (i, setp);
389 another_round = 1;
390 }
391 else if (unformat (input, "%d-%d", &first, &last))
392 {
393 if (first > 64 || last > 64)
394 goto barf1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700395
Dave Barach9b8ffd92016-07-08 08:13:45 -0400396 for (i = first; i <= last; i++)
397 CPU_SET (i, setp);
398 }
399 else if (unformat (input, "%d,", &first))
400 {
401 if (first > 64)
402 {
403 barf2:
404 vlib_cli_output (vm, "cpu %d invalid", first);
405 return 0;
406 }
407 CPU_SET (first, setp);
408 another_round = 1;
409 }
410 else if (unformat (input, "%d", &first))
411 {
412 if (first > 64)
413 goto barf2;
414
415 CPU_SET (first, setp);
416 }
417 }
418 while (another_round);
419
420 rv = sched_setaffinity (0 /* pid, 0 = this proc */ ,
421 sizeof (*setp), setp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700422
423 if (rv < 0)
424 {
425 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
Dave Barach9b8ffd92016-07-08 08:13:45 -0400426 strerror (errno));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700427 return 0;
428 }
429 return show_affinity (vm, input, cmd);
430}
431
Dave Barach9b8ffd92016-07-08 08:13:45 -0400432/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700433VLIB_CLI_COMMAND (set_affinity_command, static) = {
434 .path = "set affinity",
435 .short_help = "Set process cpu affinity",
436 .function = set_affinity,
437};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400438/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700439
440static clib_error_t *
441vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
442{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400443 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700444 u32 size_in_mb;
445
446 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
447 {
448 if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400449 pm->no_hugepages = 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700450
Dave Barach9b8ffd92016-07-08 08:13:45 -0400451 else if (unformat (input, "size-in-mb %d", &size_in_mb) ||
452 unformat (input, "size %d", &size_in_mb))
453 pm->mem_size = size_in_mb << 20;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700454 else
455 return unformat_parse_error (input);
456 }
457
458 unformat_free (input);
459 return 0;
460}
461
462VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");
Dave Barach9b8ffd92016-07-08 08:13:45 -0400463
464/*
465 * fd.io coding-style-patch-verification: ON
466 *
467 * Local Variables:
468 * eval: (c-set-style "gnu")
469 * End:
470 */