blob: d7130629219ab3c77f50c2ed3270bbed15a5a674 [file] [log] [blame]
Klement Sekerade34c352019-06-25 11:19:22 +00001/*
2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16/**
17 * @file
18 * @brief IPv4 Shallow Virtual Reassembly.
19 *
20 * This file contains the source code for IPv4 Shallow Virtual reassembly.
21 */
22
23#include <vppinfra/vec.h>
24#include <vnet/vnet.h>
25#include <vnet/ip/ip.h>
26#include <vnet/ip/ip4_to_ip6.h>
27#include <vppinfra/fifo.h>
28#include <vppinfra/bihash_16_8.h>
29#include <vnet/ip/reass/ip4_sv_reass.h>
30
31#define MSEC_PER_SEC 1000
32#define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
33#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35#define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
37
38typedef enum
39{
40 IP4_SV_REASS_RC_OK,
41 IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42 IP4_SV_REASS_RC_UNSUPP_IP_PROTO,
43} ip4_sv_reass_rc_t;
44
45typedef struct
46{
47 union
48 {
49 struct
50 {
51 u32 xx_id;
52 ip4_address_t src;
53 ip4_address_t dst;
54 u16 frag_id;
55 u8 proto;
56 u8 unused;
57 };
58 u64 as_u64[2];
59 };
60} ip4_sv_reass_key_t;
61
62typedef union
63{
64 struct
65 {
66 u32 reass_index;
67 u32 thread_index;
68 };
69 u64 as_u64;
70} ip4_sv_reass_val_t;
71
72typedef union
73{
74 struct
75 {
76 ip4_sv_reass_key_t k;
77 ip4_sv_reass_val_t v;
78 };
79 clib_bihash_kv_16_8_t kv;
80} ip4_sv_reass_kv_t;
81
82typedef struct
83{
84 // hash table key
85 ip4_sv_reass_key_t key;
86 // time when last packet was received
87 f64 last_heard;
88 // internal id of this reassembly
89 u64 id;
90 // trace operation counter
91 u32 trace_op_counter;
92 // minimum fragment length for this reassembly - used to estimate MTU
93 u16 min_fragment_length;
94 // buffer indexes of buffers in this reassembly in chronological order -
95 // including overlaps and duplicate fragments
96 u32 *cached_buffers;
97 // set to true when this reassembly is completed
98 bool is_complete;
99 // ip protocol
100 u8 ip_proto;
101 // l4 src port
102 u16 l4_src_port;
103 // l4 dst port
104 u16 l4_dst_port;
105 u32 next_index;
106 // lru indexes
107 u32 lru_prev;
108 u32 lru_next;
109} ip4_sv_reass_t;
110
111typedef struct
112{
113 ip4_sv_reass_t *pool;
114 u32 reass_n;
115 u32 id_counter;
116 clib_spinlock_t lock;
117 // lru indexes
118 u32 lru_first;
119 u32 lru_last;
120
121} ip4_sv_reass_per_thread_t;
122
123typedef struct
124{
125 // IPv4 config
126 u32 timeout_ms;
127 f64 timeout;
128 u32 expire_walk_interval_ms;
129 // maximum number of fragments in one reassembly
130 u32 max_reass_len;
131 // maximum number of reassemblies
132 u32 max_reass_n;
133
134 // IPv4 runtime
135 clib_bihash_16_8_t hash;
136 // per-thread data
137 ip4_sv_reass_per_thread_t *per_thread_data;
138
139 // convenience
140 vlib_main_t *vlib_main;
141 vnet_main_t *vnet_main;
142
143 // node index of ip4-drop node
144 u32 ip4_drop_idx;
145 u32 ip4_sv_reass_expire_node_idx;
146
147 /** Worker handoff */
148 u32 fq_index;
149 u32 fq_feature_index;
150
Klement Sekera63c73532019-09-30 14:35:36 +0000151 // reference count for enabling/disabling feature - per interface
152 u32 *feature_use_refcount_per_intf;
Klement Sekerade34c352019-06-25 11:19:22 +0000153
154} ip4_sv_reass_main_t;
155
156extern ip4_sv_reass_main_t ip4_sv_reass_main;
157
158#ifndef CLIB_MARCH_VARIANT
159ip4_sv_reass_main_t ip4_sv_reass_main;
160#endif /* CLIB_MARCH_VARIANT */
161
162typedef enum
163{
164 IP4_SV_REASSEMBLY_NEXT_INPUT,
165 IP4_SV_REASSEMBLY_NEXT_DROP,
166 IP4_SV_REASSEMBLY_NEXT_HANDOFF,
167 IP4_SV_REASSEMBLY_N_NEXT,
168} ip4_sv_reass_next_t;
169
170typedef enum
171{
172 REASS_FRAGMENT_CACHE,
173 REASS_FINISH,
174 REASS_FRAGMENT_FORWARD,
175} ip4_sv_reass_trace_operation_e;
176
177typedef struct
178{
179 ip4_sv_reass_trace_operation_e action;
180 u32 reass_id;
181 u32 op_id;
182 u8 ip_proto;
183 u16 l4_src_port;
184 u16 l4_dst_port;
185} ip4_sv_reass_trace_t;
186
187extern vlib_node_registration_t ip4_sv_reass_node;
188extern vlib_node_registration_t ip4_sv_reass_node_feature;
189
190static u8 *
191format_ip4_sv_reass_trace (u8 * s, va_list * args)
192{
193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
195 ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
196 s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
197 switch (t->action)
198 {
199 case REASS_FRAGMENT_CACHE:
200 s = format (s, "[cached]");
201 break;
202 case REASS_FINISH:
203 s =
204 format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
205 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
206 clib_net_to_host_u16 (t->l4_dst_port));
207 break;
208 case REASS_FRAGMENT_FORWARD:
209 s =
210 format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
211 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
212 clib_net_to_host_u16 (t->l4_dst_port));
213 break;
214 }
215 return s;
216}
217
218static void
219ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
220 ip4_sv_reass_main_t * rm, ip4_sv_reass_t * reass,
221 u32 bi, ip4_sv_reass_trace_operation_e action,
222 u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
223{
224 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
225 ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
226 t->reass_id = reass->id;
227 t->action = action;
228 t->op_id = reass->trace_op_counter;
229 t->ip_proto = ip_proto;
230 t->l4_src_port = l4_src_port;
231 t->l4_dst_port = l4_dst_port;
232 ++reass->trace_op_counter;
233#if 0
234 static u8 *s = NULL;
235 s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
236 printf ("%.*s\n", vec_len (s), s);
237 fflush (stdout);
238 vec_reset_length (s);
239#endif
240}
241
242
243always_inline void
244ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
245 ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
246{
247 clib_bihash_kv_16_8_t kv;
248 kv.key[0] = reass->key.as_u64[0];
249 kv.key[1] = reass->key.as_u64[1];
250 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
251 vlib_buffer_free (vm, reass->cached_buffers,
252 vec_len (reass->cached_buffers));
253 vec_free (reass->cached_buffers);
254 reass->cached_buffers = NULL;
255 if (~0 != reass->lru_prev)
256 {
257 ip4_sv_reass_t *lru_prev =
258 pool_elt_at_index (rt->pool, reass->lru_prev);
259 lru_prev->lru_next = reass->lru_next;
260 }
261 if (~0 != reass->lru_next)
262 {
263 ip4_sv_reass_t *lru_next =
264 pool_elt_at_index (rt->pool, reass->lru_next);
265 lru_next->lru_prev = reass->lru_prev;
266 }
267 if (rt->lru_first == reass - rt->pool)
268 {
269 rt->lru_first = reass->lru_next;
270 }
271 if (rt->lru_last == reass - rt->pool)
272 {
273 rt->lru_last = reass->lru_prev;
274 }
275 pool_put (rt->pool, reass);
276 --rt->reass_n;
277}
278
279always_inline void
280ip4_sv_reass_init (ip4_sv_reass_t * reass)
281{
282 reass->cached_buffers = NULL;
283 reass->is_complete = false;
284}
285
286always_inline ip4_sv_reass_t *
287ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
288 ip4_sv_reass_per_thread_t * rt,
289 ip4_sv_reass_kv_t * kv, u8 * do_handoff)
290{
291 ip4_sv_reass_t *reass = NULL;
292 f64 now = vlib_time_now (rm->vlib_main);
293
294 if (!clib_bihash_search_16_8
295 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, (clib_bihash_kv_16_8_t *) kv))
296 {
297 if (vm->thread_index != kv->v.thread_index)
298 {
299 *do_handoff = 1;
300 return NULL;
301 }
302 reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
303
304 if (now > reass->last_heard + rm->timeout)
305 {
306 ip4_sv_reass_free (vm, rm, rt, reass);
307 reass = NULL;
308 }
309 }
310
311 if (reass)
312 {
313 reass->last_heard = now;
314 return reass;
315 }
316
317 if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
318 {
319 reass = pool_elt_at_index (rt->pool, rt->lru_last);
320 ip4_sv_reass_free (vm, rm, rt, reass);
321 }
322
323 pool_get (rt->pool, reass);
324 clib_memset (reass, 0, sizeof (*reass));
325 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
326 ++rt->id_counter;
327 ip4_sv_reass_init (reass);
328 ++rt->reass_n;
329 reass->lru_prev = reass->lru_next = ~0;
330
331 if (~0 != rt->lru_last)
332 {
333 ip4_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
334 reass->lru_prev = rt->lru_last;
335 lru_last->lru_next = rt->lru_last = reass - rt->pool;
336 }
337
338 if (~0 == rt->lru_first)
339 {
340 rt->lru_first = rt->lru_last = reass - rt->pool;
341 }
342
343 reass->key.as_u64[0] = ((clib_bihash_kv_16_8_t *) kv)->key[0];
344 reass->key.as_u64[1] = ((clib_bihash_kv_16_8_t *) kv)->key[1];
345 kv->v.reass_index = (reass - rt->pool);
346 kv->v.thread_index = vm->thread_index;
347 reass->last_heard = now;
348
349 if (clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 1))
350 {
351 ip4_sv_reass_free (vm, rm, rt, reass);
352 reass = NULL;
353 }
354
355 return reass;
356}
357
358always_inline ip4_sv_reass_rc_t
359ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
360 ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
361 ip4_sv_reass_t * reass, u32 bi0)
362{
363 vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
364 ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
365 ip4_header_t *fip = vlib_buffer_get_current (fb);
366 const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
367 if (0 == fragment_first)
368 {
369 reass->ip_proto = fip->protocol;
370 reass->l4_src_port = ip4_get_port (fip, 1);
371 reass->l4_dst_port = ip4_get_port (fip, 0);
372 if (!reass->l4_src_port || !reass->l4_dst_port)
373 return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
374 reass->is_complete = true;
375 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
376 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
377 {
378 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
379 reass->ip_proto, reass->l4_src_port,
380 reass->l4_dst_port);
381 }
382 }
383 vec_add1 (reass->cached_buffers, bi0);
384 if (!reass->is_complete)
385 {
386 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
387 {
388 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
389 REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
390 }
391 if (vec_len (reass->cached_buffers) > rm->max_reass_len)
392 {
393 rc = IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS;
394 }
395 }
396 return rc;
397}
398
399always_inline uword
400ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
401 vlib_frame_t * frame, bool is_feature, bool is_custom)
402{
403 u32 *from = vlib_frame_vector_args (frame);
404 u32 n_left_from, n_left_to_next, *to_next, next_index;
405 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
406 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
407 clib_spinlock_lock (&rt->lock);
408
409 n_left_from = frame->n_vectors;
410 next_index = node->cached_next_index;
411
412 while (n_left_from > 0)
413 {
414 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
415
416 while (n_left_from > 0 && n_left_to_next > 0)
417 {
418 u32 bi0;
419 vlib_buffer_t *b0;
420 u32 next0;
421 u32 error0 = IP4_ERROR_NONE;
422
423 bi0 = from[0];
424 b0 = vlib_get_buffer (vm, bi0);
425
426 ip4_header_t *ip0 = vlib_buffer_get_current (b0);
427 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
428 {
429 // this is a regular packet - no fragmentation
430 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
431 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
432 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
433 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
434 goto packet_enqueue;
435 }
436 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
437 const u32 fragment_length =
438 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
439 const u32 fragment_last = fragment_first + fragment_length - 1;
440 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
441 {
442 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
443 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
444 goto packet_enqueue;
445 }
446 ip4_sv_reass_kv_t kv;
447 u8 do_handoff = 0;
448
449 kv.k.as_u64[0] =
450 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
451 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
452 (u64) ip0->src_address.as_u32 << 32;
453 kv.k.as_u64[1] =
454 (u64) ip0->dst_address.
455 as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
456
457 ip4_sv_reass_t *reass =
458 ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
459
460 if (PREDICT_FALSE (do_handoff))
461 {
462 next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
463 vnet_buffer (b0)->ip.reass.owner_thread_index =
464 kv.v.thread_index;
465 goto packet_enqueue;
466 }
467
468 if (!reass)
469 {
470 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
471 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
472 goto packet_enqueue;
473 }
474
475 if (reass->is_complete)
476 {
477 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
478 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
479 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
480 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
481 error0 = IP4_ERROR_NONE;
482 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
483 {
484 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
485 REASS_FRAGMENT_FORWARD,
486 reass->ip_proto,
487 reass->l4_src_port,
488 reass->l4_dst_port);
489 }
490 goto packet_enqueue;
491 }
492
493 ip4_sv_reass_rc_t rc =
494 ip4_sv_reass_update (vm, node, rm, rt, reass, bi0);
495 switch (rc)
496 {
497 case IP4_SV_REASS_RC_OK:
498 /* nothing to do here */
499 break;
500 case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
501 vlib_node_increment_counter (vm, node->node_index,
502 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
503 1);
504 ip4_sv_reass_free (vm, rm, rt, reass);
505 goto next_packet;
506 break;
507 case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
508 vlib_node_increment_counter (vm, node->node_index,
509 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
510 1);
511 ip4_sv_reass_free (vm, rm, rt, reass);
512 goto next_packet;
513 break;
514 }
515 if (reass->is_complete)
516 {
517 u32 idx;
518 vec_foreach_index (idx, reass->cached_buffers)
519 {
520 u32 bi0 = vec_elt (reass->cached_buffers, idx);
521 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
522 u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
523 if (is_feature)
524 {
525 vnet_feature_next (&next0, b0);
526 }
527 if (is_custom)
528 {
529 next0 = vnet_buffer (b0)->ip.reass.next_index;
530 }
531 if (0 == n_left_to_next)
532 {
533 vlib_put_next_frame (vm, node, next_index,
534 n_left_to_next);
535 vlib_get_next_frame (vm, node, next_index, to_next,
536 n_left_to_next);
537 }
538 to_next[0] = bi0;
539 to_next += 1;
540 n_left_to_next -= 1;
541 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
542 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
543 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
544 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
545 {
546 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
547 REASS_FRAGMENT_FORWARD,
548 reass->ip_proto,
549 reass->l4_src_port,
550 reass->l4_dst_port);
551 }
552 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
553 to_next, n_left_to_next, bi0,
554 next0);
555 }
556 _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
557 }
558 goto next_packet;
559
560 packet_enqueue:
561 b0->error = node->errors[error0];
562
563 to_next[0] = bi0;
564 to_next += 1;
565 n_left_to_next -= 1;
566 if (is_feature && IP4_ERROR_NONE == error0)
567 {
568 b0 = vlib_get_buffer (vm, bi0);
569 vnet_feature_next (&next0, b0);
570 }
571 if (is_custom)
572 {
573 next0 = vnet_buffer (b0)->ip.reass.next_index;
574 }
575 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
576 to_next, n_left_to_next,
577 bi0, next0);
578
579 next_packet:
580 from += 1;
581 n_left_from -= 1;
582 }
583
584 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
585 }
586
587 clib_spinlock_unlock (&rt->lock);
588 return frame->n_vectors;
589}
590
591static char *ip4_sv_reass_error_strings[] = {
592#define _(sym, string) string,
593 foreach_ip4_error
594#undef _
595};
596
597VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
598 vlib_node_runtime_t * node,
599 vlib_frame_t * frame)
600{
601 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
602 false /* is_custom */ );
603}
604
605/* *INDENT-OFF* */
606VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
607 .name = "ip4-sv-reassembly",
608 .vector_size = sizeof (u32),
609 .format_trace = format_ip4_sv_reass_trace,
610 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
611 .error_strings = ip4_sv_reass_error_strings,
612 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
613 .next_nodes =
614 {
615 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
616 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
617 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
618
619 },
620};
621/* *INDENT-ON* */
622
623VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
624 vlib_node_runtime_t * node,
625 vlib_frame_t * frame)
626{
627 return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
628 false /* is_custom */ );
629}
630
631/* *INDENT-OFF* */
632VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
633 .name = "ip4-sv-reassembly-feature",
634 .vector_size = sizeof (u32),
635 .format_trace = format_ip4_sv_reass_trace,
636 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
637 .error_strings = ip4_sv_reass_error_strings,
638 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
639 .next_nodes =
640 {
641 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
642 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
643 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
644 },
645};
646/* *INDENT-ON* */
647
648/* *INDENT-OFF* */
649VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
650 .arc_name = "ip4-unicast",
651 .node_name = "ip4-sv-reassembly-feature",
652 .runs_before = VNET_FEATURES ("ip4-lookup"),
653 .runs_after = 0,
654};
655/* *INDENT-ON* */
656
657/* *INDENT-OFF* */
658VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
659 .name = "ip4-sv-reassembly-custom-next",
660 .vector_size = sizeof (u32),
661 .format_trace = format_ip4_sv_reass_trace,
662 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
663 .error_strings = ip4_sv_reass_error_strings,
664 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
665 .next_nodes =
666 {
667 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
668 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
669 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
670
671 },
672};
673/* *INDENT-ON* */
674
675VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
676 vlib_node_runtime_t * node,
677 vlib_frame_t * frame)
678{
679 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
680 true /* is_custom */ );
681}
682
683#ifndef CLIB_MARCH_VARIANT
684always_inline u32
685ip4_sv_reass_get_nbuckets ()
686{
687 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
688 u32 nbuckets;
689 u8 i;
690
691 nbuckets = (u32) (rm->max_reass_n / IP4_SV_REASS_HT_LOAD_FACTOR);
692
693 for (i = 0; i < 31; i++)
694 if ((1 << i) >= nbuckets)
695 break;
696 nbuckets = 1 << i;
697
698 return nbuckets;
699}
700#endif /* CLIB_MARCH_VARIANT */
701
702typedef enum
703{
704 IP4_EVENT_CONFIG_CHANGED = 1,
705} ip4_sv_reass_event_t;
706
707typedef struct
708{
709 int failure;
710 clib_bihash_16_8_t *new_hash;
711} ip4_rehash_cb_ctx;
712
713#ifndef CLIB_MARCH_VARIANT
Neale Rannsf50bac12019-12-06 05:53:17 +0000714static int
Klement Sekerade34c352019-06-25 11:19:22 +0000715ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
716{
717 ip4_rehash_cb_ctx *ctx = _ctx;
718 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
719 {
720 ctx->failure = 1;
721 }
Neale Rannsf50bac12019-12-06 05:53:17 +0000722 return (BIHASH_WALK_CONTINUE);
Klement Sekerade34c352019-06-25 11:19:22 +0000723}
724
725static void
726ip4_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
727 u32 max_reassembly_length,
728 u32 expire_walk_interval_ms)
729{
730 ip4_sv_reass_main.timeout_ms = timeout_ms;
731 ip4_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
732 ip4_sv_reass_main.max_reass_n = max_reassemblies;
733 ip4_sv_reass_main.max_reass_len = max_reassembly_length;
734 ip4_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
735}
736
737vnet_api_error_t
738ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
739 u32 max_reassembly_length, u32 expire_walk_interval_ms)
740{
741 u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
742 ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
743 max_reassembly_length, expire_walk_interval_ms);
744 vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
745 ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
746 IP4_EVENT_CONFIG_CHANGED, 0);
747 u32 new_nbuckets = ip4_sv_reass_get_nbuckets ();
748 if (ip4_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
749 {
750 clib_bihash_16_8_t new_hash;
751 clib_memset (&new_hash, 0, sizeof (new_hash));
752 ip4_rehash_cb_ctx ctx;
753 ctx.failure = 0;
754 ctx.new_hash = &new_hash;
755 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
756 new_nbuckets * 1024);
757 clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
758 ip4_rehash_cb, &ctx);
759 if (ctx.failure)
760 {
761 clib_bihash_free_16_8 (&new_hash);
762 return -1;
763 }
764 else
765 {
766 clib_bihash_free_16_8 (&ip4_sv_reass_main.hash);
767 clib_memcpy_fast (&ip4_sv_reass_main.hash, &new_hash,
768 sizeof (ip4_sv_reass_main.hash));
769 clib_bihash_copied (&ip4_sv_reass_main.hash, &new_hash);
770 }
771 }
772 return 0;
773}
774
775vnet_api_error_t
776ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
777 u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
778{
779 *timeout_ms = ip4_sv_reass_main.timeout_ms;
780 *max_reassemblies = ip4_sv_reass_main.max_reass_n;
781 *max_reassembly_length = ip4_sv_reass_main.max_reass_len;
782 *expire_walk_interval_ms = ip4_sv_reass_main.expire_walk_interval_ms;
783 return 0;
784}
785
786static clib_error_t *
787ip4_sv_reass_init_function (vlib_main_t * vm)
788{
789 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
790 clib_error_t *error = 0;
791 u32 nbuckets;
792 vlib_node_t *node;
793
794 rm->vlib_main = vm;
795 rm->vnet_main = vnet_get_main ();
796
797 vec_validate (rm->per_thread_data, vlib_num_workers ());
798 ip4_sv_reass_per_thread_t *rt;
799 vec_foreach (rt, rm->per_thread_data)
800 {
801 clib_spinlock_init (&rt->lock);
802 pool_alloc (rt->pool, rm->max_reass_n);
803 rt->lru_first = rt->lru_last = ~0;
804 }
805
806 node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
807 ASSERT (node);
808 rm->ip4_sv_reass_expire_node_idx = node->index;
809
810 ip4_sv_reass_set_params (IP4_SV_REASS_TIMEOUT_DEFAULT_MS,
811 IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
812 IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
813 IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
814
815 nbuckets = ip4_sv_reass_get_nbuckets ();
816 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
817
818 node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
819 ASSERT (node);
820 rm->ip4_drop_idx = node->index;
821
822 rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
823 rm->fq_feature_index =
824 vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
825
Klement Sekera63c73532019-09-30 14:35:36 +0000826 rm->feature_use_refcount_per_intf = NULL;
827
Klement Sekerade34c352019-06-25 11:19:22 +0000828 return error;
829}
830
831VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
832#endif /* CLIB_MARCH_VARIANT */
833
834static uword
835ip4_sv_reass_walk_expired (vlib_main_t * vm,
836 vlib_node_runtime_t * node, vlib_frame_t * f)
837{
838 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
839 uword event_type, *event_data = 0;
840
841 while (true)
842 {
843 vlib_process_wait_for_event_or_clock (vm,
844 (f64)
845 rm->expire_walk_interval_ms /
846 (f64) MSEC_PER_SEC);
847 event_type = vlib_process_get_events (vm, &event_data);
848
849 switch (event_type)
850 {
851 case ~0: /* no events => timeout */
852 /* nothing to do here */
853 break;
854 case IP4_EVENT_CONFIG_CHANGED:
855 break;
856 default:
857 clib_warning ("BUG: event type 0x%wx", event_type);
858 break;
859 }
860 f64 now = vlib_time_now (vm);
861
862 ip4_sv_reass_t *reass;
863 int *pool_indexes_to_free = NULL;
864
865 uword thread_index = 0;
866 int index;
867 const uword nthreads = vlib_num_workers () + 1;
868 for (thread_index = 0; thread_index < nthreads; ++thread_index)
869 {
870 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
871 clib_spinlock_lock (&rt->lock);
872
873 vec_reset_length (pool_indexes_to_free);
874 /* *INDENT-OFF* */
875 pool_foreach_index (index, rt->pool, ({
876 reass = pool_elt_at_index (rt->pool, index);
877 if (now > reass->last_heard + rm->timeout)
878 {
879 vec_add1 (pool_indexes_to_free, index);
880 }
881 }));
882 /* *INDENT-ON* */
883 int *i;
884 /* *INDENT-OFF* */
885 vec_foreach (i, pool_indexes_to_free)
886 {
887 ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
888 ip4_sv_reass_free (vm, rm, rt, reass);
889 }
890 /* *INDENT-ON* */
891
892 clib_spinlock_unlock (&rt->lock);
893 }
894
895 vec_free (pool_indexes_to_free);
896 if (event_data)
897 {
898 _vec_len (event_data) = 0;
899 }
900 }
901
902 return 0;
903}
904
905/* *INDENT-OFF* */
906VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
907 .function = ip4_sv_reass_walk_expired,
908 .type = VLIB_NODE_TYPE_PROCESS,
909 .name = "ip4-sv-reassembly-expire-walk",
910 .format_trace = format_ip4_sv_reass_trace,
911 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
912 .error_strings = ip4_sv_reass_error_strings,
913
914};
915/* *INDENT-ON* */
916
917static u8 *
918format_ip4_sv_reass_key (u8 * s, va_list * args)
919{
920 ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
921 s =
922 format (s,
923 "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
924 key->xx_id, format_ip4_address, &key->src, format_ip4_address,
925 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
926 return s;
927}
928
929static u8 *
930format_ip4_sv_reass (u8 * s, va_list * args)
931{
932 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
933 ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
934
935 s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
936 reass->id, format_ip4_sv_reass_key, &reass->key,
937 reass->trace_op_counter);
938
939 vlib_buffer_t *b;
940 u32 *bip;
941 u32 counter = 0;
942 vec_foreach (bip, reass->cached_buffers)
943 {
944 u32 bi = *bip;
945 do
946 {
947 b = vlib_get_buffer (vm, bi);
948 s = format (s, " #%03u: bi: %u, ", counter, bi);
949 ++counter;
950 bi = b->next_buffer;
951 }
952 while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
953 }
954 return s;
955}
956
957static clib_error_t *
958show_ip4_reass (vlib_main_t * vm,
959 unformat_input_t * input,
960 CLIB_UNUSED (vlib_cli_command_t * lmd))
961{
962 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
963
964 vlib_cli_output (vm, "---------------------");
965 vlib_cli_output (vm, "IP4 reassembly status");
966 vlib_cli_output (vm, "---------------------");
967 bool details = false;
968 if (unformat (input, "details"))
969 {
970 details = true;
971 }
972
973 u32 sum_reass_n = 0;
974 ip4_sv_reass_t *reass;
975 uword thread_index;
976 const uword nthreads = vlib_num_workers () + 1;
977 for (thread_index = 0; thread_index < nthreads; ++thread_index)
978 {
979 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
980 clib_spinlock_lock (&rt->lock);
981 if (details)
982 {
983 /* *INDENT-OFF* */
984 pool_foreach (reass, rt->pool, {
985 vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
986 });
987 /* *INDENT-ON* */
988 }
989 sum_reass_n += rt->reass_n;
990 clib_spinlock_unlock (&rt->lock);
991 }
992 vlib_cli_output (vm, "---------------------");
993 vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
994 (long unsigned) sum_reass_n);
995 vlib_cli_output (vm,
996 "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n",
997 (long unsigned) rm->max_reass_n);
998 return 0;
999}
1000
1001/* *INDENT-OFF* */
1002VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
1003 .path = "show ip4-sv-reassembly",
1004 .short_help = "show ip4-sv-reassembly [details]",
1005 .function = show_ip4_reass,
1006};
1007/* *INDENT-ON* */
1008
1009#ifndef CLIB_MARCH_VARIANT
1010vnet_api_error_t
1011ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1012{
1013 return vnet_feature_enable_disable ("ip4-unicast",
1014 "ip4-sv-reassembly-feature",
1015 sw_if_index, enable_disable, 0, 0);
1016}
1017#endif /* CLIB_MARCH_VARIANT */
1018
1019
1020#define foreach_ip4_sv_reass_handoff_error \
1021_(CONGESTION_DROP, "congestion drop")
1022
1023
1024typedef enum
1025{
1026#define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1027 foreach_ip4_sv_reass_handoff_error
1028#undef _
1029 IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
1030} ip4_sv_reass_handoff_error_t;
1031
1032static char *ip4_sv_reass_handoff_error_strings[] = {
1033#define _(sym,string) string,
1034 foreach_ip4_sv_reass_handoff_error
1035#undef _
1036};
1037
1038typedef struct
1039{
1040 u32 next_worker_index;
1041} ip4_sv_reass_handoff_trace_t;
1042
1043static u8 *
1044format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
1045{
1046 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1047 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1048 ip4_sv_reass_handoff_trace_t *t =
1049 va_arg (*args, ip4_sv_reass_handoff_trace_t *);
1050
1051 s =
1052 format (s, "ip4-sv-reassembly-handoff: next-worker %d",
1053 t->next_worker_index);
1054
1055 return s;
1056}
1057
1058always_inline uword
1059ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
1060 vlib_node_runtime_t * node,
1061 vlib_frame_t * frame, bool is_feature)
1062{
1063 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1064
1065 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1066 u32 n_enq, n_left_from, *from;
1067 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1068 u32 fq_index;
1069
1070 from = vlib_frame_vector_args (frame);
1071 n_left_from = frame->n_vectors;
1072 vlib_get_buffers (vm, from, bufs, n_left_from);
1073
1074 b = bufs;
1075 ti = thread_indices;
1076
1077 fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
1078
1079 while (n_left_from > 0)
1080 {
1081 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1082
1083 if (PREDICT_FALSE
1084 ((node->flags & VLIB_NODE_FLAG_TRACE)
1085 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1086 {
1087 ip4_sv_reass_handoff_trace_t *t =
1088 vlib_add_trace (vm, node, b[0], sizeof (*t));
1089 t->next_worker_index = ti[0];
1090 }
1091
1092 n_left_from -= 1;
1093 ti += 1;
1094 b += 1;
1095 }
1096 n_enq =
1097 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1098 frame->n_vectors, 1);
1099
1100 if (n_enq < frame->n_vectors)
1101 vlib_node_increment_counter (vm, node->node_index,
1102 IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1103 frame->n_vectors - n_enq);
1104 return frame->n_vectors;
1105}
1106
1107VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
1108 vlib_node_runtime_t * node,
1109 vlib_frame_t * frame)
1110{
1111 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1112 false /* is_feature */ );
1113}
1114
1115
1116/* *INDENT-OFF* */
1117VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
1118 .name = "ip4-sv-reassembly-handoff",
1119 .vector_size = sizeof (u32),
1120 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1121 .error_strings = ip4_sv_reass_handoff_error_strings,
1122 .format_trace = format_ip4_sv_reass_handoff_trace,
1123
1124 .n_next_nodes = 1,
1125
1126 .next_nodes = {
1127 [0] = "error-drop",
1128 },
1129};
1130/* *INDENT-ON* */
1131
1132
1133/* *INDENT-OFF* */
1134VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
1135 vlib_node_runtime_t *
1136 node,
1137 vlib_frame_t * frame)
1138{
1139 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1140 true /* is_feature */ );
1141}
1142/* *INDENT-ON* */
1143
1144
1145/* *INDENT-OFF* */
1146VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
1147 .name = "ip4-sv-reass-feature-hoff",
1148 .vector_size = sizeof (u32),
1149 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1150 .error_strings = ip4_sv_reass_handoff_error_strings,
1151 .format_trace = format_ip4_sv_reass_handoff_trace,
1152
1153 .n_next_nodes = 1,
1154
1155 .next_nodes = {
1156 [0] = "error-drop",
1157 },
1158};
1159/* *INDENT-ON* */
1160
1161#ifndef CLIB_MARCH_VARIANT
1162int
1163ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1164{
1165 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
Klement Sekera63c73532019-09-30 14:35:36 +00001166 vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
Klement Sekerade34c352019-06-25 11:19:22 +00001167 if (is_enable)
1168 {
Klement Sekera63c73532019-09-30 14:35:36 +00001169 if (!rm->feature_use_refcount_per_intf[sw_if_index])
Klement Sekerade34c352019-06-25 11:19:22 +00001170 {
Klement Sekera63c73532019-09-30 14:35:36 +00001171 ++rm->feature_use_refcount_per_intf[sw_if_index];
Klement Sekerade34c352019-06-25 11:19:22 +00001172 return vnet_feature_enable_disable ("ip4-unicast",
1173 "ip4-sv-reassembly-feature",
1174 sw_if_index, 1, 0, 0);
1175 }
Klement Sekera63c73532019-09-30 14:35:36 +00001176 ++rm->feature_use_refcount_per_intf[sw_if_index];
Klement Sekerade34c352019-06-25 11:19:22 +00001177 }
1178 else
1179 {
Klement Sekera63c73532019-09-30 14:35:36 +00001180 --rm->feature_use_refcount_per_intf[sw_if_index];
1181 if (!rm->feature_use_refcount_per_intf[sw_if_index])
Klement Sekerade34c352019-06-25 11:19:22 +00001182 return vnet_feature_enable_disable ("ip4-unicast",
1183 "ip4-sv-reassembly-feature",
1184 sw_if_index, 0, 0, 0);
1185 }
Klement Sekera407f5932019-12-11 13:06:27 +00001186 return 0;
Klement Sekerade34c352019-06-25 11:19:22 +00001187}
1188
1189uword
1190ip4_sv_reass_custom_register_next_node (uword node_index)
1191{
1192 return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
1193 node_index);
1194}
1195#endif
1196
1197/*
1198 * fd.io coding-style-patch-verification: ON
1199 *
1200 * Local Variables:
1201 * eval: (c-set-style "gnu")
1202 * End:
1203 */