blob: 8d10ad2e88dc85272da4c0f82561ab824239be81 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * physmem.c: Unix physical memory
17 *
18 * Copyright (c) 2008 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vlib/unix/physmem.h>
41
42static physmem_main_t physmem_main;
43
44static void *
Dave Barach9b8ffd92016-07-08 08:13:45 -040045unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes,
46 uword alignment)
Ed Warnickecb9cada2015-12-08 15:45:58 -070047{
Damjan Marion878c6092017-01-04 13:19:27 +010048 vlib_main_t *vm = vlib_get_main ();
Dave Barach9b8ffd92016-07-08 08:13:45 -040049 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070050 uword lo_offset, hi_offset;
Dave Barach9b8ffd92016-07-08 08:13:45 -040051 uword *to_free = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070052
Damjan Marion878c6092017-01-04 13:19:27 +010053 if (vm->buffer_main->extern_buffer_mgmt)
54 clib_warning ("unsafe alloc!");
Ed Warnickecb9cada2015-12-08 15:45:58 -070055
56 /* IO memory is always at least cache aligned. */
57 alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
58
59 while (1)
60 {
61 mheap_get_aligned (pm->heap, n_bytes,
62 /* align */ alignment,
63 /* align offset */ 0,
64 &lo_offset);
65
66 /* Allocation failed? */
67 if (lo_offset == ~0)
68 break;
69
70 /* Make sure allocation does not span DMA physical chunk boundary. */
71 hi_offset = lo_offset + n_bytes - 1;
72
73 if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
74 (hi_offset >> vpm->log2_n_bytes_per_page))
75 break;
76
77 /* Allocation would span chunk boundary, queue it to be freed as soon as
Dave Barach9b8ffd92016-07-08 08:13:45 -040078 we find suitable chunk. */
Ed Warnickecb9cada2015-12-08 15:45:58 -070079 vec_add1 (to_free, lo_offset);
80 }
81
82 if (to_free != 0)
83 {
84 uword i;
85 for (i = 0; i < vec_len (to_free); i++)
86 mheap_put (pm->heap, to_free[i]);
87 vec_free (to_free);
88 }
89
90 return lo_offset != ~0 ? pm->heap + lo_offset : 0;
91}
92
Dave Barach9b8ffd92016-07-08 08:13:45 -040093static void
94unix_physmem_free (void *x)
Ed Warnickecb9cada2015-12-08 15:45:58 -070095{
Dave Barach9b8ffd92016-07-08 08:13:45 -040096 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070097
98 /* Return object to region's heap. */
99 mheap_put (pm->heap, x - pm->heap);
100}
101
Dave Barach9b8ffd92016-07-08 08:13:45 -0400102static void
103htlb_shutdown (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700104{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400105 physmem_main_t *pm = &physmem_main;
106
107 if (!pm->shmid)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700108 return;
109 shmctl (pm->shmid, IPC_RMID, 0);
110 pm->shmid = 0;
111}
112
113/* try to use huge TLB pgs if possible */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400114static int
115htlb_init (vlib_main_t * vm)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400117 vlib_physmem_main_t *vpm = &vm->physmem_main;
118 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700119 u64 hugepagesize, pagesize;
120 u64 pfn, seek_loc;
121 u64 cur, physaddr, ptbits;
122 int fd, i;
123
Dave Barach9b8ffd92016-07-08 08:13:45 -0400124 pm->shmid = shmget (11 /* key, my amp goes to 11 */ , pm->mem_size,
125 IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126 if (pm->shmid < 0)
127 {
128 clib_unix_warning ("shmget");
129 return 0;
130 }
131
Dave Barach9b8ffd92016-07-08 08:13:45 -0400132 pm->mem = shmat (pm->shmid, NULL, 0 /* flags */ );
Ed Warnickecb9cada2015-12-08 15:45:58 -0700133 if (pm->mem == 0)
134 {
135 shmctl (pm->shmid, IPC_RMID, 0);
136 return 0;
137 }
138
139 memset (pm->mem, 0, pm->mem_size);
140
141 /* $$$ get page size info from /proc/meminfo */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400142 hugepagesize = 2 << 20;
143 pagesize = 4 << 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700144 vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
145 vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
146
147 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
148 vpm->virtual.start = pointer_to_uword (pm->mem);
149 vpm->virtual.size = pm->mem_size;
150 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
151
Dave Barach9b8ffd92016-07-08 08:13:45 -0400152 fd = open ("/proc/self/pagemap", O_RDONLY);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700153
Dave Barach9b8ffd92016-07-08 08:13:45 -0400154 if (fd < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700155 {
156 (void) shmdt (pm->mem);
157 return 0;
158 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400159
160 pm->heap = mheap_alloc_with_flags (pm->mem, pm->mem_size,
161 /* Don't want mheap mmap/munmap with IO memory. */
162 MHEAP_FLAG_DISABLE_VM);
163
164 cur = pointer_to_uword (pm->mem);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700165 i = 0;
166
Dave Barach9b8ffd92016-07-08 08:13:45 -0400167 while (cur < pointer_to_uword (pm->mem) + pm->mem_size)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700168 {
169 pfn = (u64) cur / pagesize;
170 seek_loc = pfn * sizeof (u64);
171 if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400172 {
173 clib_unix_warning ("lseek to 0x%llx", seek_loc);
174 shmctl (pm->shmid, IPC_RMID, 0);
175 close (fd);
176 return 0;
177 }
178 if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof (ptbits)))
179 {
180 clib_unix_warning ("read ptbits");
181 shmctl (pm->shmid, IPC_RMID, 0);
182 close (fd);
183 return 0;
184 }
185
Ed Warnickecb9cada2015-12-08 15:45:58 -0700186 /* bits 0-54 are the physical page number */
187 physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
188 if (CLIB_DEBUG > 1)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400189 fformat (stderr, "pm: virtual 0x%llx physical 0x%llx\n",
190 cur, physaddr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700191 vpm->page_table[i++] = physaddr;
192
193 cur += hugepagesize;
194 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400195 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700196 atexit (htlb_shutdown);
197 return 1;
198}
199
Dave Barach9b8ffd92016-07-08 08:13:45 -0400200int vlib_app_physmem_init (vlib_main_t * vm,
201 physmem_main_t * pm, int) __attribute__ ((weak));
202int
203vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700204{
205 return 0;
206}
207
Dave Barach9b8ffd92016-07-08 08:13:45 -0400208clib_error_t *
209unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700210{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400211 vlib_physmem_main_t *vpm = &vm->physmem_main;
212 physmem_main_t *pm = &physmem_main;
213 clib_error_t *error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700214
215 /* Avoid multiple calls. */
216 if (vm->os_physmem_alloc_aligned)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400217 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700218
219 vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
220 vm->os_physmem_free = unix_physmem_free;
221 pm->mem = MAP_FAILED;
222
223 if (pm->mem_size == 0)
224 pm->mem_size = 16 << 20;
225
226 /* OK, Mr. App, you tell us */
227 if (vlib_app_physmem_init (vm, pm, physical_memory_required))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400228 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229
Dave Barach9b8ffd92016-07-08 08:13:45 -0400230 if (!pm->no_hugepages && htlb_init (vm))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700231 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400232 fformat (stderr, "%s: use huge pages\n", __FUNCTION__);
Damjan Marion5a206ea2016-05-12 22:11:03 +0200233 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700234 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700235
Dave Barach9b8ffd92016-07-08 08:13:45 -0400236 pm->mem =
237 mmap (0, pm->mem_size, PROT_READ | PROT_WRITE,
238 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
Damjan Marion5a206ea2016-05-12 22:11:03 +0200239 if (pm->mem == MAP_FAILED)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700240 {
Damjan Marion5a206ea2016-05-12 22:11:03 +0200241 error = clib_error_return_unix (0, "mmap");
242 goto done;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700243 }
Damjan Marion5a206ea2016-05-12 22:11:03 +0200244
245 pm->heap = mheap_alloc (pm->mem, pm->mem_size);
246
247 /* Identity map with a single page. */
248 vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
249 vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700250
Dave Barachbfdedbd2016-01-20 09:11:55 -0500251 vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
252 vpm->virtual.start = pointer_to_uword (pm->mem);
253 vpm->virtual.size = pm->mem_size;
254 vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
Damjan Marionb4d89272016-05-12 22:14:45 +0200255 vpm->is_fake = 1;
Dave Barachbfdedbd2016-01-20 09:11:55 -0500256
Dave Barach9b8ffd92016-07-08 08:13:45 -0400257 fformat (stderr, "%s: use fake dma pages\n", __FUNCTION__);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258
Dave Barach9b8ffd92016-07-08 08:13:45 -0400259done:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700260 if (error)
261 {
262 if (pm->mem != MAP_FAILED)
263 munmap (pm->mem, pm->mem_size);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700264 }
265 return error;
266}
267
268static clib_error_t *
269show_physmem (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400270 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700271{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400272 physmem_main_t *pm = &physmem_main;
Damjan Marion878c6092017-01-04 13:19:27 +0100273 if (vm->buffer_main->extern_buffer_mgmt)
274 {
275 vlib_cli_output (vm, "Not supported with external buffer management.");
276 return 0;
277 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700278
279 if (pm->heap)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400280 vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700281 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400282 vlib_cli_output (vm, "No physmem allocated.");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700283 return 0;
284}
285
Dave Barach9b8ffd92016-07-08 08:13:45 -0400286/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700287VLIB_CLI_COMMAND (show_physmem_command, static) = {
288 .path = "show physmem",
289 .short_help = "Show physical memory allocation",
290 .function = show_physmem,
291};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400292/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700293
294static clib_error_t *
295show_affinity (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400296 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700297{
298 cpu_set_t set;
299 cpu_set_t *setp = &set;
300 int i, rv;
301 u8 *s = 0;
302 int first_set_bit_in_run = -1;
303 int last_set_bit_in_run = -1;
304 int output_done = 0;
305
Dave Barach9b8ffd92016-07-08 08:13:45 -0400306 rv = sched_getaffinity (0 /* pid, 0 = this proc */ ,
307 sizeof (*setp), setp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700308 if (rv < 0)
309 {
310 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
Dave Barach9b8ffd92016-07-08 08:13:45 -0400311 strerror (errno));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700312 return 0;
313 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400314
Ed Warnickecb9cada2015-12-08 15:45:58 -0700315 for (i = 0; i < 64; i++)
316 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400317 if (CPU_ISSET (i, setp))
318 {
319 if (first_set_bit_in_run == -1)
320 {
321 first_set_bit_in_run = i;
322 last_set_bit_in_run = i;
323 if (output_done)
324 s = format (s, ",");
325 s = format (s, "%d-", i);
326 output_done = 1;
327 }
328 else
329 {
330 if (i == (last_set_bit_in_run + 1))
331 last_set_bit_in_run = i;
332 }
333 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700334 else
Dave Barach9b8ffd92016-07-08 08:13:45 -0400335 {
336 if (first_set_bit_in_run != -1)
337 {
338 if (first_set_bit_in_run == (i - 1))
339 {
340 _vec_len (s) -= 2 + ((first_set_bit_in_run / 10));
341 }
342 s = format (s, "%d", last_set_bit_in_run);
343 first_set_bit_in_run = -1;
344 last_set_bit_in_run = -1;
345 }
346 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700347 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400348
349 if (first_set_bit_in_run != -1)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700350 s = format (s, "%d", first_set_bit_in_run);
351
352 vlib_cli_output (vm, "Process runs on: %v", s);
353 return 0;
354}
355
Dave Barach9b8ffd92016-07-08 08:13:45 -0400356/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700357VLIB_CLI_COMMAND (show_affinity_command, static) = {
358 .path = "show affinity",
359 .short_help = "Show process cpu affinity",
360 .function = show_affinity,
361};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400362/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700363
364static clib_error_t *
365set_affinity (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400366 unformat_input_t * input, vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700367{
368 cpu_set_t set;
369 cpu_set_t *setp = &set;
370 int i, rv;
371 int another_round;
372 u32 first, last;
373
374 memset (setp, 0, sizeof (*setp));
375
Dave Barach9b8ffd92016-07-08 08:13:45 -0400376 do
377 {
378 another_round = 0;
379 if (unformat (input, "%d-%d,", &first, &last))
380 {
381 if (first > 64 || last > 64)
382 {
383 barf1:
384 vlib_cli_output (vm, "range %d-%d invalid", first, last);
385 return 0;
386 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700387
Dave Barach9b8ffd92016-07-08 08:13:45 -0400388 for (i = first; i <= last; i++)
389 CPU_SET (i, setp);
390 another_round = 1;
391 }
392 else if (unformat (input, "%d-%d", &first, &last))
393 {
394 if (first > 64 || last > 64)
395 goto barf1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700396
Dave Barach9b8ffd92016-07-08 08:13:45 -0400397 for (i = first; i <= last; i++)
398 CPU_SET (i, setp);
399 }
400 else if (unformat (input, "%d,", &first))
401 {
402 if (first > 64)
403 {
404 barf2:
405 vlib_cli_output (vm, "cpu %d invalid", first);
406 return 0;
407 }
408 CPU_SET (first, setp);
409 another_round = 1;
410 }
411 else if (unformat (input, "%d", &first))
412 {
413 if (first > 64)
414 goto barf2;
415
416 CPU_SET (first, setp);
417 }
418 }
419 while (another_round);
420
421 rv = sched_setaffinity (0 /* pid, 0 = this proc */ ,
422 sizeof (*setp), setp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700423
424 if (rv < 0)
425 {
426 vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
Dave Barach9b8ffd92016-07-08 08:13:45 -0400427 strerror (errno));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700428 return 0;
429 }
430 return show_affinity (vm, input, cmd);
431}
432
Dave Barach9b8ffd92016-07-08 08:13:45 -0400433/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700434VLIB_CLI_COMMAND (set_affinity_command, static) = {
435 .path = "set affinity",
436 .short_help = "Set process cpu affinity",
437 .function = set_affinity,
438};
Dave Barach9b8ffd92016-07-08 08:13:45 -0400439/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700440
441static clib_error_t *
442vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
443{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400444 physmem_main_t *pm = &physmem_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700445 u32 size_in_mb;
446
447 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
448 {
449 if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
Dave Barach9b8ffd92016-07-08 08:13:45 -0400450 pm->no_hugepages = 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700451
Dave Barach9b8ffd92016-07-08 08:13:45 -0400452 else if (unformat (input, "size-in-mb %d", &size_in_mb) ||
453 unformat (input, "size %d", &size_in_mb))
454 pm->mem_size = size_in_mb << 20;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700455 else
456 return unformat_parse_error (input);
457 }
458
459 unformat_free (input);
460 return 0;
461}
462
463VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");
Dave Barach9b8ffd92016-07-08 08:13:45 -0400464
465/*
466 * fd.io coding-style-patch-verification: ON
467 *
468 * Local Variables:
469 * eval: (c-set-style "gnu")
470 * End:
471 */