blob: 91a50036b4f0de58b09b1191af4c99a802b05d6e [file] [log] [blame]
Klement Sekerade34c352019-06-25 11:19:22 +00001/*
2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16/**
17 * @file
18 * @brief IPv4 Shallow Virtual Reassembly.
19 *
20 * This file contains the source code for IPv4 Shallow Virtual reassembly.
21 */
22
23#include <vppinfra/vec.h>
24#include <vnet/vnet.h>
25#include <vnet/ip/ip.h>
26#include <vnet/ip/ip4_to_ip6.h>
27#include <vppinfra/fifo.h>
28#include <vppinfra/bihash_16_8.h>
29#include <vnet/ip/reass/ip4_sv_reass.h>
30
31#define MSEC_PER_SEC 1000
32#define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
33#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35#define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
37
38typedef enum
39{
40 IP4_SV_REASS_RC_OK,
41 IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42 IP4_SV_REASS_RC_UNSUPP_IP_PROTO,
43} ip4_sv_reass_rc_t;
44
45typedef struct
46{
47 union
48 {
49 struct
50 {
51 u32 xx_id;
52 ip4_address_t src;
53 ip4_address_t dst;
54 u16 frag_id;
55 u8 proto;
56 u8 unused;
57 };
58 u64 as_u64[2];
59 };
60} ip4_sv_reass_key_t;
61
62typedef union
63{
64 struct
65 {
66 u32 reass_index;
67 u32 thread_index;
68 };
69 u64 as_u64;
70} ip4_sv_reass_val_t;
71
72typedef union
73{
74 struct
75 {
76 ip4_sv_reass_key_t k;
77 ip4_sv_reass_val_t v;
78 };
79 clib_bihash_kv_16_8_t kv;
80} ip4_sv_reass_kv_t;
81
82typedef struct
83{
84 // hash table key
85 ip4_sv_reass_key_t key;
86 // time when last packet was received
87 f64 last_heard;
88 // internal id of this reassembly
89 u64 id;
90 // trace operation counter
91 u32 trace_op_counter;
92 // minimum fragment length for this reassembly - used to estimate MTU
93 u16 min_fragment_length;
94 // buffer indexes of buffers in this reassembly in chronological order -
95 // including overlaps and duplicate fragments
96 u32 *cached_buffers;
97 // set to true when this reassembly is completed
98 bool is_complete;
99 // ip protocol
100 u8 ip_proto;
101 // l4 src port
102 u16 l4_src_port;
103 // l4 dst port
104 u16 l4_dst_port;
105 u32 next_index;
106 // lru indexes
107 u32 lru_prev;
108 u32 lru_next;
109} ip4_sv_reass_t;
110
111typedef struct
112{
113 ip4_sv_reass_t *pool;
114 u32 reass_n;
115 u32 id_counter;
116 clib_spinlock_t lock;
117 // lru indexes
118 u32 lru_first;
119 u32 lru_last;
120
121} ip4_sv_reass_per_thread_t;
122
123typedef struct
124{
125 // IPv4 config
126 u32 timeout_ms;
127 f64 timeout;
128 u32 expire_walk_interval_ms;
129 // maximum number of fragments in one reassembly
130 u32 max_reass_len;
131 // maximum number of reassemblies
132 u32 max_reass_n;
133
134 // IPv4 runtime
135 clib_bihash_16_8_t hash;
136 // per-thread data
137 ip4_sv_reass_per_thread_t *per_thread_data;
138
139 // convenience
140 vlib_main_t *vlib_main;
141 vnet_main_t *vnet_main;
142
143 // node index of ip4-drop node
144 u32 ip4_drop_idx;
145 u32 ip4_sv_reass_expire_node_idx;
146
147 /** Worker handoff */
148 u32 fq_index;
149 u32 fq_feature_index;
150
Klement Sekera63c73532019-09-30 14:35:36 +0000151 // reference count for enabling/disabling feature - per interface
152 u32 *feature_use_refcount_per_intf;
Klement Sekerade34c352019-06-25 11:19:22 +0000153
154} ip4_sv_reass_main_t;
155
156extern ip4_sv_reass_main_t ip4_sv_reass_main;
157
158#ifndef CLIB_MARCH_VARIANT
159ip4_sv_reass_main_t ip4_sv_reass_main;
160#endif /* CLIB_MARCH_VARIANT */
161
162typedef enum
163{
164 IP4_SV_REASSEMBLY_NEXT_INPUT,
165 IP4_SV_REASSEMBLY_NEXT_DROP,
166 IP4_SV_REASSEMBLY_NEXT_HANDOFF,
167 IP4_SV_REASSEMBLY_N_NEXT,
168} ip4_sv_reass_next_t;
169
170typedef enum
171{
172 REASS_FRAGMENT_CACHE,
173 REASS_FINISH,
174 REASS_FRAGMENT_FORWARD,
175} ip4_sv_reass_trace_operation_e;
176
177typedef struct
178{
179 ip4_sv_reass_trace_operation_e action;
180 u32 reass_id;
181 u32 op_id;
182 u8 ip_proto;
183 u16 l4_src_port;
184 u16 l4_dst_port;
185} ip4_sv_reass_trace_t;
186
187extern vlib_node_registration_t ip4_sv_reass_node;
188extern vlib_node_registration_t ip4_sv_reass_node_feature;
189
190static u8 *
191format_ip4_sv_reass_trace (u8 * s, va_list * args)
192{
193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
195 ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
196 s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
197 switch (t->action)
198 {
199 case REASS_FRAGMENT_CACHE:
200 s = format (s, "[cached]");
201 break;
202 case REASS_FINISH:
203 s =
204 format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
205 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
206 clib_net_to_host_u16 (t->l4_dst_port));
207 break;
208 case REASS_FRAGMENT_FORWARD:
209 s =
210 format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
211 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
212 clib_net_to_host_u16 (t->l4_dst_port));
213 break;
214 }
215 return s;
216}
217
218static void
219ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
220 ip4_sv_reass_main_t * rm, ip4_sv_reass_t * reass,
221 u32 bi, ip4_sv_reass_trace_operation_e action,
222 u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
223{
224 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
225 ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
226 t->reass_id = reass->id;
227 t->action = action;
228 t->op_id = reass->trace_op_counter;
229 t->ip_proto = ip_proto;
230 t->l4_src_port = l4_src_port;
231 t->l4_dst_port = l4_dst_port;
232 ++reass->trace_op_counter;
233#if 0
234 static u8 *s = NULL;
235 s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
236 printf ("%.*s\n", vec_len (s), s);
237 fflush (stdout);
238 vec_reset_length (s);
239#endif
240}
241
242
243always_inline void
244ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
245 ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
246{
247 clib_bihash_kv_16_8_t kv;
248 kv.key[0] = reass->key.as_u64[0];
249 kv.key[1] = reass->key.as_u64[1];
250 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
251 vlib_buffer_free (vm, reass->cached_buffers,
252 vec_len (reass->cached_buffers));
253 vec_free (reass->cached_buffers);
254 reass->cached_buffers = NULL;
255 if (~0 != reass->lru_prev)
256 {
257 ip4_sv_reass_t *lru_prev =
258 pool_elt_at_index (rt->pool, reass->lru_prev);
259 lru_prev->lru_next = reass->lru_next;
260 }
261 if (~0 != reass->lru_next)
262 {
263 ip4_sv_reass_t *lru_next =
264 pool_elt_at_index (rt->pool, reass->lru_next);
265 lru_next->lru_prev = reass->lru_prev;
266 }
267 if (rt->lru_first == reass - rt->pool)
268 {
269 rt->lru_first = reass->lru_next;
270 }
271 if (rt->lru_last == reass - rt->pool)
272 {
273 rt->lru_last = reass->lru_prev;
274 }
275 pool_put (rt->pool, reass);
276 --rt->reass_n;
277}
278
279always_inline void
280ip4_sv_reass_init (ip4_sv_reass_t * reass)
281{
282 reass->cached_buffers = NULL;
283 reass->is_complete = false;
284}
285
286always_inline ip4_sv_reass_t *
287ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
288 ip4_sv_reass_per_thread_t * rt,
289 ip4_sv_reass_kv_t * kv, u8 * do_handoff)
290{
291 ip4_sv_reass_t *reass = NULL;
292 f64 now = vlib_time_now (rm->vlib_main);
293
294 if (!clib_bihash_search_16_8
295 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, (clib_bihash_kv_16_8_t *) kv))
296 {
297 if (vm->thread_index != kv->v.thread_index)
298 {
299 *do_handoff = 1;
300 return NULL;
301 }
302 reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
303
304 if (now > reass->last_heard + rm->timeout)
305 {
306 ip4_sv_reass_free (vm, rm, rt, reass);
307 reass = NULL;
308 }
309 }
310
311 if (reass)
312 {
313 reass->last_heard = now;
314 return reass;
315 }
316
317 if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
318 {
319 reass = pool_elt_at_index (rt->pool, rt->lru_last);
320 ip4_sv_reass_free (vm, rm, rt, reass);
321 }
322
323 pool_get (rt->pool, reass);
324 clib_memset (reass, 0, sizeof (*reass));
325 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
326 ++rt->id_counter;
327 ip4_sv_reass_init (reass);
328 ++rt->reass_n;
329 reass->lru_prev = reass->lru_next = ~0;
330
331 if (~0 != rt->lru_last)
332 {
333 ip4_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
334 reass->lru_prev = rt->lru_last;
335 lru_last->lru_next = rt->lru_last = reass - rt->pool;
336 }
337
338 if (~0 == rt->lru_first)
339 {
340 rt->lru_first = rt->lru_last = reass - rt->pool;
341 }
342
343 reass->key.as_u64[0] = ((clib_bihash_kv_16_8_t *) kv)->key[0];
344 reass->key.as_u64[1] = ((clib_bihash_kv_16_8_t *) kv)->key[1];
345 kv->v.reass_index = (reass - rt->pool);
346 kv->v.thread_index = vm->thread_index;
347 reass->last_heard = now;
348
349 if (clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 1))
350 {
351 ip4_sv_reass_free (vm, rm, rt, reass);
352 reass = NULL;
353 }
354
355 return reass;
356}
357
358always_inline ip4_sv_reass_rc_t
359ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
360 ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
361 ip4_sv_reass_t * reass, u32 bi0)
362{
363 vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
364 ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
365 ip4_header_t *fip = vlib_buffer_get_current (fb);
366 const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
367 if (0 == fragment_first)
368 {
369 reass->ip_proto = fip->protocol;
370 reass->l4_src_port = ip4_get_port (fip, 1);
371 reass->l4_dst_port = ip4_get_port (fip, 0);
372 if (!reass->l4_src_port || !reass->l4_dst_port)
373 return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
374 reass->is_complete = true;
375 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
376 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
377 {
378 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
379 reass->ip_proto, reass->l4_src_port,
380 reass->l4_dst_port);
381 }
382 }
383 vec_add1 (reass->cached_buffers, bi0);
384 if (!reass->is_complete)
385 {
386 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
387 {
388 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
389 REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
390 }
391 if (vec_len (reass->cached_buffers) > rm->max_reass_len)
392 {
393 rc = IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS;
394 }
395 }
396 return rc;
397}
398
399always_inline uword
400ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
401 vlib_frame_t * frame, bool is_feature, bool is_custom)
402{
403 u32 *from = vlib_frame_vector_args (frame);
404 u32 n_left_from, n_left_to_next, *to_next, next_index;
405 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
406 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
407 clib_spinlock_lock (&rt->lock);
408
409 n_left_from = frame->n_vectors;
410 next_index = node->cached_next_index;
411
412 while (n_left_from > 0)
413 {
414 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
415
416 while (n_left_from > 0 && n_left_to_next > 0)
417 {
418 u32 bi0;
419 vlib_buffer_t *b0;
420 u32 next0;
421 u32 error0 = IP4_ERROR_NONE;
422
423 bi0 = from[0];
424 b0 = vlib_get_buffer (vm, bi0);
425
426 ip4_header_t *ip0 = vlib_buffer_get_current (b0);
427 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
428 {
429 // this is a regular packet - no fragmentation
430 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
431 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
432 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
433 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
434 goto packet_enqueue;
435 }
436 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
437 const u32 fragment_length =
438 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
439 const u32 fragment_last = fragment_first + fragment_length - 1;
440 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
441 {
442 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
443 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
444 goto packet_enqueue;
445 }
446 ip4_sv_reass_kv_t kv;
447 u8 do_handoff = 0;
448
449 kv.k.as_u64[0] =
450 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
451 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
452 (u64) ip0->src_address.as_u32 << 32;
453 kv.k.as_u64[1] =
454 (u64) ip0->dst_address.
455 as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
456
457 ip4_sv_reass_t *reass =
458 ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
459
460 if (PREDICT_FALSE (do_handoff))
461 {
462 next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
463 vnet_buffer (b0)->ip.reass.owner_thread_index =
464 kv.v.thread_index;
465 goto packet_enqueue;
466 }
467
468 if (!reass)
469 {
470 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
471 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
472 goto packet_enqueue;
473 }
474
475 if (reass->is_complete)
476 {
477 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
478 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
479 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
480 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
481 error0 = IP4_ERROR_NONE;
482 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
483 {
484 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
485 REASS_FRAGMENT_FORWARD,
486 reass->ip_proto,
487 reass->l4_src_port,
488 reass->l4_dst_port);
489 }
490 goto packet_enqueue;
491 }
492
493 ip4_sv_reass_rc_t rc =
494 ip4_sv_reass_update (vm, node, rm, rt, reass, bi0);
495 switch (rc)
496 {
497 case IP4_SV_REASS_RC_OK:
498 /* nothing to do here */
499 break;
500 case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
501 vlib_node_increment_counter (vm, node->node_index,
502 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
503 1);
504 ip4_sv_reass_free (vm, rm, rt, reass);
505 goto next_packet;
506 break;
507 case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
508 vlib_node_increment_counter (vm, node->node_index,
509 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
510 1);
511 ip4_sv_reass_free (vm, rm, rt, reass);
512 goto next_packet;
513 break;
514 }
515 if (reass->is_complete)
516 {
517 u32 idx;
518 vec_foreach_index (idx, reass->cached_buffers)
519 {
520 u32 bi0 = vec_elt (reass->cached_buffers, idx);
521 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
522 u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
523 if (is_feature)
524 {
525 vnet_feature_next (&next0, b0);
526 }
527 if (is_custom)
528 {
529 next0 = vnet_buffer (b0)->ip.reass.next_index;
530 }
531 if (0 == n_left_to_next)
532 {
533 vlib_put_next_frame (vm, node, next_index,
534 n_left_to_next);
535 vlib_get_next_frame (vm, node, next_index, to_next,
536 n_left_to_next);
537 }
538 to_next[0] = bi0;
539 to_next += 1;
540 n_left_to_next -= 1;
541 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
542 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
543 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
544 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
545 {
546 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
547 REASS_FRAGMENT_FORWARD,
548 reass->ip_proto,
549 reass->l4_src_port,
550 reass->l4_dst_port);
551 }
552 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
553 to_next, n_left_to_next, bi0,
554 next0);
555 }
556 _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
557 }
558 goto next_packet;
559
560 packet_enqueue:
561 b0->error = node->errors[error0];
562
563 to_next[0] = bi0;
564 to_next += 1;
565 n_left_to_next -= 1;
566 if (is_feature && IP4_ERROR_NONE == error0)
567 {
568 b0 = vlib_get_buffer (vm, bi0);
569 vnet_feature_next (&next0, b0);
570 }
571 if (is_custom)
572 {
573 next0 = vnet_buffer (b0)->ip.reass.next_index;
574 }
575 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
576 to_next, n_left_to_next,
577 bi0, next0);
578
579 next_packet:
580 from += 1;
581 n_left_from -= 1;
582 }
583
584 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
585 }
586
587 clib_spinlock_unlock (&rt->lock);
588 return frame->n_vectors;
589}
590
591static char *ip4_sv_reass_error_strings[] = {
592#define _(sym, string) string,
593 foreach_ip4_error
594#undef _
595};
596
597VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
598 vlib_node_runtime_t * node,
599 vlib_frame_t * frame)
600{
601 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
602 false /* is_custom */ );
603}
604
605/* *INDENT-OFF* */
606VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
607 .name = "ip4-sv-reassembly",
608 .vector_size = sizeof (u32),
609 .format_trace = format_ip4_sv_reass_trace,
610 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
611 .error_strings = ip4_sv_reass_error_strings,
612 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
613 .next_nodes =
614 {
615 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
616 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
617 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
618
619 },
620};
621/* *INDENT-ON* */
622
623VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
624 vlib_node_runtime_t * node,
625 vlib_frame_t * frame)
626{
627 return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
628 false /* is_custom */ );
629}
630
631/* *INDENT-OFF* */
632VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
633 .name = "ip4-sv-reassembly-feature",
634 .vector_size = sizeof (u32),
635 .format_trace = format_ip4_sv_reass_trace,
636 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
637 .error_strings = ip4_sv_reass_error_strings,
638 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
639 .next_nodes =
640 {
641 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
642 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
643 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
644 },
645};
646/* *INDENT-ON* */
647
648/* *INDENT-OFF* */
649VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
650 .arc_name = "ip4-unicast",
651 .node_name = "ip4-sv-reassembly-feature",
652 .runs_before = VNET_FEATURES ("ip4-lookup"),
653 .runs_after = 0,
654};
655/* *INDENT-ON* */
656
657/* *INDENT-OFF* */
658VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
659 .name = "ip4-sv-reassembly-custom-next",
660 .vector_size = sizeof (u32),
661 .format_trace = format_ip4_sv_reass_trace,
662 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
663 .error_strings = ip4_sv_reass_error_strings,
664 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
665 .next_nodes =
666 {
667 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
668 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
669 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
670
671 },
672};
673/* *INDENT-ON* */
674
675VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
676 vlib_node_runtime_t * node,
677 vlib_frame_t * frame)
678{
679 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
680 true /* is_custom */ );
681}
682
683#ifndef CLIB_MARCH_VARIANT
684always_inline u32
685ip4_sv_reass_get_nbuckets ()
686{
687 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
688 u32 nbuckets;
689 u8 i;
690
691 nbuckets = (u32) (rm->max_reass_n / IP4_SV_REASS_HT_LOAD_FACTOR);
692
693 for (i = 0; i < 31; i++)
694 if ((1 << i) >= nbuckets)
695 break;
696 nbuckets = 1 << i;
697
698 return nbuckets;
699}
700#endif /* CLIB_MARCH_VARIANT */
701
702typedef enum
703{
704 IP4_EVENT_CONFIG_CHANGED = 1,
705} ip4_sv_reass_event_t;
706
707typedef struct
708{
709 int failure;
710 clib_bihash_16_8_t *new_hash;
711} ip4_rehash_cb_ctx;
712
713#ifndef CLIB_MARCH_VARIANT
714static void
715ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
716{
717 ip4_rehash_cb_ctx *ctx = _ctx;
718 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
719 {
720 ctx->failure = 1;
721 }
722}
723
724static void
725ip4_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
726 u32 max_reassembly_length,
727 u32 expire_walk_interval_ms)
728{
729 ip4_sv_reass_main.timeout_ms = timeout_ms;
730 ip4_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
731 ip4_sv_reass_main.max_reass_n = max_reassemblies;
732 ip4_sv_reass_main.max_reass_len = max_reassembly_length;
733 ip4_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
734}
735
736vnet_api_error_t
737ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
738 u32 max_reassembly_length, u32 expire_walk_interval_ms)
739{
740 u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
741 ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
742 max_reassembly_length, expire_walk_interval_ms);
743 vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
744 ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
745 IP4_EVENT_CONFIG_CHANGED, 0);
746 u32 new_nbuckets = ip4_sv_reass_get_nbuckets ();
747 if (ip4_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
748 {
749 clib_bihash_16_8_t new_hash;
750 clib_memset (&new_hash, 0, sizeof (new_hash));
751 ip4_rehash_cb_ctx ctx;
752 ctx.failure = 0;
753 ctx.new_hash = &new_hash;
754 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
755 new_nbuckets * 1024);
756 clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
757 ip4_rehash_cb, &ctx);
758 if (ctx.failure)
759 {
760 clib_bihash_free_16_8 (&new_hash);
761 return -1;
762 }
763 else
764 {
765 clib_bihash_free_16_8 (&ip4_sv_reass_main.hash);
766 clib_memcpy_fast (&ip4_sv_reass_main.hash, &new_hash,
767 sizeof (ip4_sv_reass_main.hash));
768 clib_bihash_copied (&ip4_sv_reass_main.hash, &new_hash);
769 }
770 }
771 return 0;
772}
773
774vnet_api_error_t
775ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
776 u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
777{
778 *timeout_ms = ip4_sv_reass_main.timeout_ms;
779 *max_reassemblies = ip4_sv_reass_main.max_reass_n;
780 *max_reassembly_length = ip4_sv_reass_main.max_reass_len;
781 *expire_walk_interval_ms = ip4_sv_reass_main.expire_walk_interval_ms;
782 return 0;
783}
784
785static clib_error_t *
786ip4_sv_reass_init_function (vlib_main_t * vm)
787{
788 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
789 clib_error_t *error = 0;
790 u32 nbuckets;
791 vlib_node_t *node;
792
793 rm->vlib_main = vm;
794 rm->vnet_main = vnet_get_main ();
795
796 vec_validate (rm->per_thread_data, vlib_num_workers ());
797 ip4_sv_reass_per_thread_t *rt;
798 vec_foreach (rt, rm->per_thread_data)
799 {
800 clib_spinlock_init (&rt->lock);
801 pool_alloc (rt->pool, rm->max_reass_n);
802 rt->lru_first = rt->lru_last = ~0;
803 }
804
805 node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
806 ASSERT (node);
807 rm->ip4_sv_reass_expire_node_idx = node->index;
808
809 ip4_sv_reass_set_params (IP4_SV_REASS_TIMEOUT_DEFAULT_MS,
810 IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
811 IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
812 IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
813
814 nbuckets = ip4_sv_reass_get_nbuckets ();
815 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
816
817 node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
818 ASSERT (node);
819 rm->ip4_drop_idx = node->index;
820
821 rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
822 rm->fq_feature_index =
823 vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
824
Klement Sekera63c73532019-09-30 14:35:36 +0000825 rm->feature_use_refcount_per_intf = NULL;
826
Klement Sekerade34c352019-06-25 11:19:22 +0000827 return error;
828}
829
830VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
831#endif /* CLIB_MARCH_VARIANT */
832
833static uword
834ip4_sv_reass_walk_expired (vlib_main_t * vm,
835 vlib_node_runtime_t * node, vlib_frame_t * f)
836{
837 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
838 uword event_type, *event_data = 0;
839
840 while (true)
841 {
842 vlib_process_wait_for_event_or_clock (vm,
843 (f64)
844 rm->expire_walk_interval_ms /
845 (f64) MSEC_PER_SEC);
846 event_type = vlib_process_get_events (vm, &event_data);
847
848 switch (event_type)
849 {
850 case ~0: /* no events => timeout */
851 /* nothing to do here */
852 break;
853 case IP4_EVENT_CONFIG_CHANGED:
854 break;
855 default:
856 clib_warning ("BUG: event type 0x%wx", event_type);
857 break;
858 }
859 f64 now = vlib_time_now (vm);
860
861 ip4_sv_reass_t *reass;
862 int *pool_indexes_to_free = NULL;
863
864 uword thread_index = 0;
865 int index;
866 const uword nthreads = vlib_num_workers () + 1;
867 for (thread_index = 0; thread_index < nthreads; ++thread_index)
868 {
869 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
870 clib_spinlock_lock (&rt->lock);
871
872 vec_reset_length (pool_indexes_to_free);
873 /* *INDENT-OFF* */
874 pool_foreach_index (index, rt->pool, ({
875 reass = pool_elt_at_index (rt->pool, index);
876 if (now > reass->last_heard + rm->timeout)
877 {
878 vec_add1 (pool_indexes_to_free, index);
879 }
880 }));
881 /* *INDENT-ON* */
882 int *i;
883 /* *INDENT-OFF* */
884 vec_foreach (i, pool_indexes_to_free)
885 {
886 ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
887 ip4_sv_reass_free (vm, rm, rt, reass);
888 }
889 /* *INDENT-ON* */
890
891 clib_spinlock_unlock (&rt->lock);
892 }
893
894 vec_free (pool_indexes_to_free);
895 if (event_data)
896 {
897 _vec_len (event_data) = 0;
898 }
899 }
900
901 return 0;
902}
903
904/* *INDENT-OFF* */
905VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
906 .function = ip4_sv_reass_walk_expired,
907 .type = VLIB_NODE_TYPE_PROCESS,
908 .name = "ip4-sv-reassembly-expire-walk",
909 .format_trace = format_ip4_sv_reass_trace,
910 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
911 .error_strings = ip4_sv_reass_error_strings,
912
913};
914/* *INDENT-ON* */
915
916static u8 *
917format_ip4_sv_reass_key (u8 * s, va_list * args)
918{
919 ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
920 s =
921 format (s,
922 "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
923 key->xx_id, format_ip4_address, &key->src, format_ip4_address,
924 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
925 return s;
926}
927
928static u8 *
929format_ip4_sv_reass (u8 * s, va_list * args)
930{
931 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
932 ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
933
934 s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
935 reass->id, format_ip4_sv_reass_key, &reass->key,
936 reass->trace_op_counter);
937
938 vlib_buffer_t *b;
939 u32 *bip;
940 u32 counter = 0;
941 vec_foreach (bip, reass->cached_buffers)
942 {
943 u32 bi = *bip;
944 do
945 {
946 b = vlib_get_buffer (vm, bi);
947 s = format (s, " #%03u: bi: %u, ", counter, bi);
948 ++counter;
949 bi = b->next_buffer;
950 }
951 while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
952 }
953 return s;
954}
955
956static clib_error_t *
957show_ip4_reass (vlib_main_t * vm,
958 unformat_input_t * input,
959 CLIB_UNUSED (vlib_cli_command_t * lmd))
960{
961 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
962
963 vlib_cli_output (vm, "---------------------");
964 vlib_cli_output (vm, "IP4 reassembly status");
965 vlib_cli_output (vm, "---------------------");
966 bool details = false;
967 if (unformat (input, "details"))
968 {
969 details = true;
970 }
971
972 u32 sum_reass_n = 0;
973 ip4_sv_reass_t *reass;
974 uword thread_index;
975 const uword nthreads = vlib_num_workers () + 1;
976 for (thread_index = 0; thread_index < nthreads; ++thread_index)
977 {
978 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
979 clib_spinlock_lock (&rt->lock);
980 if (details)
981 {
982 /* *INDENT-OFF* */
983 pool_foreach (reass, rt->pool, {
984 vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
985 });
986 /* *INDENT-ON* */
987 }
988 sum_reass_n += rt->reass_n;
989 clib_spinlock_unlock (&rt->lock);
990 }
991 vlib_cli_output (vm, "---------------------");
992 vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
993 (long unsigned) sum_reass_n);
994 vlib_cli_output (vm,
995 "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n",
996 (long unsigned) rm->max_reass_n);
997 return 0;
998}
999
1000/* *INDENT-OFF* */
1001VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
1002 .path = "show ip4-sv-reassembly",
1003 .short_help = "show ip4-sv-reassembly [details]",
1004 .function = show_ip4_reass,
1005};
1006/* *INDENT-ON* */
1007
1008#ifndef CLIB_MARCH_VARIANT
1009vnet_api_error_t
1010ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1011{
1012 return vnet_feature_enable_disable ("ip4-unicast",
1013 "ip4-sv-reassembly-feature",
1014 sw_if_index, enable_disable, 0, 0);
1015}
1016#endif /* CLIB_MARCH_VARIANT */
1017
1018
1019#define foreach_ip4_sv_reass_handoff_error \
1020_(CONGESTION_DROP, "congestion drop")
1021
1022
1023typedef enum
1024{
1025#define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1026 foreach_ip4_sv_reass_handoff_error
1027#undef _
1028 IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
1029} ip4_sv_reass_handoff_error_t;
1030
1031static char *ip4_sv_reass_handoff_error_strings[] = {
1032#define _(sym,string) string,
1033 foreach_ip4_sv_reass_handoff_error
1034#undef _
1035};
1036
1037typedef struct
1038{
1039 u32 next_worker_index;
1040} ip4_sv_reass_handoff_trace_t;
1041
1042static u8 *
1043format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
1044{
1045 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1046 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1047 ip4_sv_reass_handoff_trace_t *t =
1048 va_arg (*args, ip4_sv_reass_handoff_trace_t *);
1049
1050 s =
1051 format (s, "ip4-sv-reassembly-handoff: next-worker %d",
1052 t->next_worker_index);
1053
1054 return s;
1055}
1056
1057always_inline uword
1058ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
1059 vlib_node_runtime_t * node,
1060 vlib_frame_t * frame, bool is_feature)
1061{
1062 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1063
1064 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1065 u32 n_enq, n_left_from, *from;
1066 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1067 u32 fq_index;
1068
1069 from = vlib_frame_vector_args (frame);
1070 n_left_from = frame->n_vectors;
1071 vlib_get_buffers (vm, from, bufs, n_left_from);
1072
1073 b = bufs;
1074 ti = thread_indices;
1075
1076 fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
1077
1078 while (n_left_from > 0)
1079 {
1080 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1081
1082 if (PREDICT_FALSE
1083 ((node->flags & VLIB_NODE_FLAG_TRACE)
1084 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1085 {
1086 ip4_sv_reass_handoff_trace_t *t =
1087 vlib_add_trace (vm, node, b[0], sizeof (*t));
1088 t->next_worker_index = ti[0];
1089 }
1090
1091 n_left_from -= 1;
1092 ti += 1;
1093 b += 1;
1094 }
1095 n_enq =
1096 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1097 frame->n_vectors, 1);
1098
1099 if (n_enq < frame->n_vectors)
1100 vlib_node_increment_counter (vm, node->node_index,
1101 IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1102 frame->n_vectors - n_enq);
1103 return frame->n_vectors;
1104}
1105
1106VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
1107 vlib_node_runtime_t * node,
1108 vlib_frame_t * frame)
1109{
1110 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1111 false /* is_feature */ );
1112}
1113
1114
1115/* *INDENT-OFF* */
1116VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
1117 .name = "ip4-sv-reassembly-handoff",
1118 .vector_size = sizeof (u32),
1119 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1120 .error_strings = ip4_sv_reass_handoff_error_strings,
1121 .format_trace = format_ip4_sv_reass_handoff_trace,
1122
1123 .n_next_nodes = 1,
1124
1125 .next_nodes = {
1126 [0] = "error-drop",
1127 },
1128};
1129/* *INDENT-ON* */
1130
1131
1132/* *INDENT-OFF* */
1133VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
1134 vlib_node_runtime_t *
1135 node,
1136 vlib_frame_t * frame)
1137{
1138 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1139 true /* is_feature */ );
1140}
1141/* *INDENT-ON* */
1142
1143
1144/* *INDENT-OFF* */
1145VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
1146 .name = "ip4-sv-reass-feature-hoff",
1147 .vector_size = sizeof (u32),
1148 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1149 .error_strings = ip4_sv_reass_handoff_error_strings,
1150 .format_trace = format_ip4_sv_reass_handoff_trace,
1151
1152 .n_next_nodes = 1,
1153
1154 .next_nodes = {
1155 [0] = "error-drop",
1156 },
1157};
1158/* *INDENT-ON* */
1159
1160#ifndef CLIB_MARCH_VARIANT
1161int
1162ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1163{
1164 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
Klement Sekera63c73532019-09-30 14:35:36 +00001165 vec_validate (rm->feature_use_refcount_per_intf, sw_if_index);
Klement Sekerade34c352019-06-25 11:19:22 +00001166 if (is_enable)
1167 {
Klement Sekera63c73532019-09-30 14:35:36 +00001168 if (!rm->feature_use_refcount_per_intf[sw_if_index])
Klement Sekerade34c352019-06-25 11:19:22 +00001169 {
Klement Sekera63c73532019-09-30 14:35:36 +00001170 ++rm->feature_use_refcount_per_intf[sw_if_index];
Klement Sekerade34c352019-06-25 11:19:22 +00001171 return vnet_feature_enable_disable ("ip4-unicast",
1172 "ip4-sv-reassembly-feature",
1173 sw_if_index, 1, 0, 0);
1174 }
Klement Sekera63c73532019-09-30 14:35:36 +00001175 ++rm->feature_use_refcount_per_intf[sw_if_index];
Klement Sekerade34c352019-06-25 11:19:22 +00001176 }
1177 else
1178 {
Klement Sekera63c73532019-09-30 14:35:36 +00001179 --rm->feature_use_refcount_per_intf[sw_if_index];
1180 if (!rm->feature_use_refcount_per_intf[sw_if_index])
Klement Sekerade34c352019-06-25 11:19:22 +00001181 return vnet_feature_enable_disable ("ip4-unicast",
1182 "ip4-sv-reassembly-feature",
1183 sw_if_index, 0, 0, 0);
1184 }
1185 return -1;
1186}
1187
1188uword
1189ip4_sv_reass_custom_register_next_node (uword node_index)
1190{
1191 return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
1192 node_index);
1193}
1194#endif
1195
1196/*
1197 * fd.io coding-style-patch-verification: ON
1198 *
1199 * Local Variables:
1200 * eval: (c-set-style "gnu")
1201 * End:
1202 */