blob: 25119e8d0a7df2eda6b3db0735163468db211171 [file] [log] [blame]
Klement Sekerade34c352019-06-25 11:19:22 +00001/*
2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16/**
17 * @file
18 * @brief IPv4 Shallow Virtual Reassembly.
19 *
20 * This file contains the source code for IPv4 Shallow Virtual reassembly.
21 */
22
23#include <vppinfra/vec.h>
24#include <vnet/vnet.h>
25#include <vnet/ip/ip.h>
26#include <vnet/ip/ip4_to_ip6.h>
27#include <vppinfra/fifo.h>
28#include <vppinfra/bihash_16_8.h>
29#include <vnet/ip/reass/ip4_sv_reass.h>
30
31#define MSEC_PER_SEC 1000
32#define IP4_SV_REASS_TIMEOUT_DEFAULT_MS 100
33#define IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
34#define IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT 1024
35#define IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
36#define IP4_SV_REASS_HT_LOAD_FACTOR (0.75)
37
38typedef enum
39{
40 IP4_SV_REASS_RC_OK,
41 IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS,
42 IP4_SV_REASS_RC_UNSUPP_IP_PROTO,
43} ip4_sv_reass_rc_t;
44
45typedef struct
46{
47 union
48 {
49 struct
50 {
51 u32 xx_id;
52 ip4_address_t src;
53 ip4_address_t dst;
54 u16 frag_id;
55 u8 proto;
56 u8 unused;
57 };
58 u64 as_u64[2];
59 };
60} ip4_sv_reass_key_t;
61
62typedef union
63{
64 struct
65 {
66 u32 reass_index;
67 u32 thread_index;
68 };
69 u64 as_u64;
70} ip4_sv_reass_val_t;
71
72typedef union
73{
74 struct
75 {
76 ip4_sv_reass_key_t k;
77 ip4_sv_reass_val_t v;
78 };
79 clib_bihash_kv_16_8_t kv;
80} ip4_sv_reass_kv_t;
81
82typedef struct
83{
84 // hash table key
85 ip4_sv_reass_key_t key;
86 // time when last packet was received
87 f64 last_heard;
88 // internal id of this reassembly
89 u64 id;
90 // trace operation counter
91 u32 trace_op_counter;
92 // minimum fragment length for this reassembly - used to estimate MTU
93 u16 min_fragment_length;
94 // buffer indexes of buffers in this reassembly in chronological order -
95 // including overlaps and duplicate fragments
96 u32 *cached_buffers;
97 // set to true when this reassembly is completed
98 bool is_complete;
99 // ip protocol
100 u8 ip_proto;
101 // l4 src port
102 u16 l4_src_port;
103 // l4 dst port
104 u16 l4_dst_port;
105 u32 next_index;
106 // lru indexes
107 u32 lru_prev;
108 u32 lru_next;
109} ip4_sv_reass_t;
110
111typedef struct
112{
113 ip4_sv_reass_t *pool;
114 u32 reass_n;
115 u32 id_counter;
116 clib_spinlock_t lock;
117 // lru indexes
118 u32 lru_first;
119 u32 lru_last;
120
121} ip4_sv_reass_per_thread_t;
122
123typedef struct
124{
125 // IPv4 config
126 u32 timeout_ms;
127 f64 timeout;
128 u32 expire_walk_interval_ms;
129 // maximum number of fragments in one reassembly
130 u32 max_reass_len;
131 // maximum number of reassemblies
132 u32 max_reass_n;
133
134 // IPv4 runtime
135 clib_bihash_16_8_t hash;
136 // per-thread data
137 ip4_sv_reass_per_thread_t *per_thread_data;
138
139 // convenience
140 vlib_main_t *vlib_main;
141 vnet_main_t *vnet_main;
142
143 // node index of ip4-drop node
144 u32 ip4_drop_idx;
145 u32 ip4_sv_reass_expire_node_idx;
146
147 /** Worker handoff */
148 u32 fq_index;
149 u32 fq_feature_index;
150
151 // reference count for enabling/disabling feature
152 u32 feature_use_refcount;
153
154} ip4_sv_reass_main_t;
155
156extern ip4_sv_reass_main_t ip4_sv_reass_main;
157
158#ifndef CLIB_MARCH_VARIANT
159ip4_sv_reass_main_t ip4_sv_reass_main;
160#endif /* CLIB_MARCH_VARIANT */
161
162typedef enum
163{
164 IP4_SV_REASSEMBLY_NEXT_INPUT,
165 IP4_SV_REASSEMBLY_NEXT_DROP,
166 IP4_SV_REASSEMBLY_NEXT_HANDOFF,
167 IP4_SV_REASSEMBLY_N_NEXT,
168} ip4_sv_reass_next_t;
169
170typedef enum
171{
172 REASS_FRAGMENT_CACHE,
173 REASS_FINISH,
174 REASS_FRAGMENT_FORWARD,
175} ip4_sv_reass_trace_operation_e;
176
177typedef struct
178{
179 ip4_sv_reass_trace_operation_e action;
180 u32 reass_id;
181 u32 op_id;
182 u8 ip_proto;
183 u16 l4_src_port;
184 u16 l4_dst_port;
185} ip4_sv_reass_trace_t;
186
187extern vlib_node_registration_t ip4_sv_reass_node;
188extern vlib_node_registration_t ip4_sv_reass_node_feature;
189
190static u8 *
191format_ip4_sv_reass_trace (u8 * s, va_list * args)
192{
193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
195 ip4_sv_reass_trace_t *t = va_arg (*args, ip4_sv_reass_trace_t *);
196 s = format (s, "reass id: %u, op id: %u ", t->reass_id, t->op_id);
197 switch (t->action)
198 {
199 case REASS_FRAGMENT_CACHE:
200 s = format (s, "[cached]");
201 break;
202 case REASS_FINISH:
203 s =
204 format (s, "[finish, ip proto=%u, src_port=%u, dst_port=%u]",
205 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
206 clib_net_to_host_u16 (t->l4_dst_port));
207 break;
208 case REASS_FRAGMENT_FORWARD:
209 s =
210 format (s, "[forward, ip proto=%u, src_port=%u, dst_port=%u]",
211 t->ip_proto, clib_net_to_host_u16 (t->l4_src_port),
212 clib_net_to_host_u16 (t->l4_dst_port));
213 break;
214 }
215 return s;
216}
217
218static void
219ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
220 ip4_sv_reass_main_t * rm, ip4_sv_reass_t * reass,
221 u32 bi, ip4_sv_reass_trace_operation_e action,
222 u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
223{
224 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
225 ip4_sv_reass_trace_t *t = vlib_add_trace (vm, node, b, sizeof (t[0]));
226 t->reass_id = reass->id;
227 t->action = action;
228 t->op_id = reass->trace_op_counter;
229 t->ip_proto = ip_proto;
230 t->l4_src_port = l4_src_port;
231 t->l4_dst_port = l4_dst_port;
232 ++reass->trace_op_counter;
233#if 0
234 static u8 *s = NULL;
235 s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
236 printf ("%.*s\n", vec_len (s), s);
237 fflush (stdout);
238 vec_reset_length (s);
239#endif
240}
241
242
243always_inline void
244ip4_sv_reass_free (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
245 ip4_sv_reass_per_thread_t * rt, ip4_sv_reass_t * reass)
246{
247 clib_bihash_kv_16_8_t kv;
248 kv.key[0] = reass->key.as_u64[0];
249 kv.key[1] = reass->key.as_u64[1];
250 clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
251 vlib_buffer_free (vm, reass->cached_buffers,
252 vec_len (reass->cached_buffers));
253 vec_free (reass->cached_buffers);
254 reass->cached_buffers = NULL;
255 if (~0 != reass->lru_prev)
256 {
257 ip4_sv_reass_t *lru_prev =
258 pool_elt_at_index (rt->pool, reass->lru_prev);
259 lru_prev->lru_next = reass->lru_next;
260 }
261 if (~0 != reass->lru_next)
262 {
263 ip4_sv_reass_t *lru_next =
264 pool_elt_at_index (rt->pool, reass->lru_next);
265 lru_next->lru_prev = reass->lru_prev;
266 }
267 if (rt->lru_first == reass - rt->pool)
268 {
269 rt->lru_first = reass->lru_next;
270 }
271 if (rt->lru_last == reass - rt->pool)
272 {
273 rt->lru_last = reass->lru_prev;
274 }
275 pool_put (rt->pool, reass);
276 --rt->reass_n;
277}
278
279always_inline void
280ip4_sv_reass_init (ip4_sv_reass_t * reass)
281{
282 reass->cached_buffers = NULL;
283 reass->is_complete = false;
284}
285
286always_inline ip4_sv_reass_t *
287ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
288 ip4_sv_reass_per_thread_t * rt,
289 ip4_sv_reass_kv_t * kv, u8 * do_handoff)
290{
291 ip4_sv_reass_t *reass = NULL;
292 f64 now = vlib_time_now (rm->vlib_main);
293
294 if (!clib_bihash_search_16_8
295 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, (clib_bihash_kv_16_8_t *) kv))
296 {
297 if (vm->thread_index != kv->v.thread_index)
298 {
299 *do_handoff = 1;
300 return NULL;
301 }
302 reass = pool_elt_at_index (rt->pool, kv->v.reass_index);
303
304 if (now > reass->last_heard + rm->timeout)
305 {
306 ip4_sv_reass_free (vm, rm, rt, reass);
307 reass = NULL;
308 }
309 }
310
311 if (reass)
312 {
313 reass->last_heard = now;
314 return reass;
315 }
316
317 if (rt->reass_n >= rm->max_reass_n && rm->max_reass_n)
318 {
319 reass = pool_elt_at_index (rt->pool, rt->lru_last);
320 ip4_sv_reass_free (vm, rm, rt, reass);
321 }
322
323 pool_get (rt->pool, reass);
324 clib_memset (reass, 0, sizeof (*reass));
325 reass->id = ((u64) vm->thread_index * 1000000000) + rt->id_counter;
326 ++rt->id_counter;
327 ip4_sv_reass_init (reass);
328 ++rt->reass_n;
329 reass->lru_prev = reass->lru_next = ~0;
330
331 if (~0 != rt->lru_last)
332 {
333 ip4_sv_reass_t *lru_last = pool_elt_at_index (rt->pool, rt->lru_last);
334 reass->lru_prev = rt->lru_last;
335 lru_last->lru_next = rt->lru_last = reass - rt->pool;
336 }
337
338 if (~0 == rt->lru_first)
339 {
340 rt->lru_first = rt->lru_last = reass - rt->pool;
341 }
342
343 reass->key.as_u64[0] = ((clib_bihash_kv_16_8_t *) kv)->key[0];
344 reass->key.as_u64[1] = ((clib_bihash_kv_16_8_t *) kv)->key[1];
345 kv->v.reass_index = (reass - rt->pool);
346 kv->v.thread_index = vm->thread_index;
347 reass->last_heard = now;
348
349 if (clib_bihash_add_del_16_8 (&rm->hash, (clib_bihash_kv_16_8_t *) kv, 1))
350 {
351 ip4_sv_reass_free (vm, rm, rt, reass);
352 reass = NULL;
353 }
354
355 return reass;
356}
357
358always_inline ip4_sv_reass_rc_t
359ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
360 ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
361 ip4_sv_reass_t * reass, u32 bi0)
362{
363 vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
364 ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
365 ip4_header_t *fip = vlib_buffer_get_current (fb);
366 const u32 fragment_first = ip4_get_fragment_offset_bytes (fip);
367 if (0 == fragment_first)
368 {
369 reass->ip_proto = fip->protocol;
370 reass->l4_src_port = ip4_get_port (fip, 1);
371 reass->l4_dst_port = ip4_get_port (fip, 0);
372 if (!reass->l4_src_port || !reass->l4_dst_port)
373 return IP4_SV_REASS_RC_UNSUPP_IP_PROTO;
374 reass->is_complete = true;
375 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
376 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
377 {
378 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
379 reass->ip_proto, reass->l4_src_port,
380 reass->l4_dst_port);
381 }
382 }
383 vec_add1 (reass->cached_buffers, bi0);
384 if (!reass->is_complete)
385 {
386 if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
387 {
388 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
389 REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
390 }
391 if (vec_len (reass->cached_buffers) > rm->max_reass_len)
392 {
393 rc = IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS;
394 }
395 }
396 return rc;
397}
398
399always_inline uword
400ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
401 vlib_frame_t * frame, bool is_feature, bool is_custom)
402{
403 u32 *from = vlib_frame_vector_args (frame);
404 u32 n_left_from, n_left_to_next, *to_next, next_index;
405 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
406 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
407 clib_spinlock_lock (&rt->lock);
408
409 n_left_from = frame->n_vectors;
410 next_index = node->cached_next_index;
411
412 while (n_left_from > 0)
413 {
414 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
415
416 while (n_left_from > 0 && n_left_to_next > 0)
417 {
418 u32 bi0;
419 vlib_buffer_t *b0;
420 u32 next0;
421 u32 error0 = IP4_ERROR_NONE;
422
423 bi0 = from[0];
424 b0 = vlib_get_buffer (vm, bi0);
425
426 ip4_header_t *ip0 = vlib_buffer_get_current (b0);
427 if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
428 {
429 // this is a regular packet - no fragmentation
430 vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
431 vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
432 vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
433 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
434 goto packet_enqueue;
435 }
436 const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
437 const u32 fragment_length =
438 clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
439 const u32 fragment_last = fragment_first + fragment_length - 1;
440 if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
441 {
442 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
443 error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
444 goto packet_enqueue;
445 }
446 ip4_sv_reass_kv_t kv;
447 u8 do_handoff = 0;
448
449 kv.k.as_u64[0] =
450 (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
451 vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
452 (u64) ip0->src_address.as_u32 << 32;
453 kv.k.as_u64[1] =
454 (u64) ip0->dst_address.
455 as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
456
457 ip4_sv_reass_t *reass =
458 ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
459
460 if (PREDICT_FALSE (do_handoff))
461 {
462 next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
463 vnet_buffer (b0)->ip.reass.owner_thread_index =
464 kv.v.thread_index;
465 goto packet_enqueue;
466 }
467
468 if (!reass)
469 {
470 next0 = IP4_SV_REASSEMBLY_NEXT_DROP;
471 error0 = IP4_ERROR_REASS_LIMIT_REACHED;
472 goto packet_enqueue;
473 }
474
475 if (reass->is_complete)
476 {
477 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
478 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
479 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
480 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
481 error0 = IP4_ERROR_NONE;
482 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
483 {
484 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
485 REASS_FRAGMENT_FORWARD,
486 reass->ip_proto,
487 reass->l4_src_port,
488 reass->l4_dst_port);
489 }
490 goto packet_enqueue;
491 }
492
493 ip4_sv_reass_rc_t rc =
494 ip4_sv_reass_update (vm, node, rm, rt, reass, bi0);
495 switch (rc)
496 {
497 case IP4_SV_REASS_RC_OK:
498 /* nothing to do here */
499 break;
500 case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
501 vlib_node_increment_counter (vm, node->node_index,
502 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
503 1);
504 ip4_sv_reass_free (vm, rm, rt, reass);
505 goto next_packet;
506 break;
507 case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
508 vlib_node_increment_counter (vm, node->node_index,
509 IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
510 1);
511 ip4_sv_reass_free (vm, rm, rt, reass);
512 goto next_packet;
513 break;
514 }
515 if (reass->is_complete)
516 {
517 u32 idx;
518 vec_foreach_index (idx, reass->cached_buffers)
519 {
520 u32 bi0 = vec_elt (reass->cached_buffers, idx);
521 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
522 u32 next0 = IP4_SV_REASSEMBLY_NEXT_INPUT;
523 if (is_feature)
524 {
525 vnet_feature_next (&next0, b0);
526 }
527 if (is_custom)
528 {
529 next0 = vnet_buffer (b0)->ip.reass.next_index;
530 }
531 if (0 == n_left_to_next)
532 {
533 vlib_put_next_frame (vm, node, next_index,
534 n_left_to_next);
535 vlib_get_next_frame (vm, node, next_index, to_next,
536 n_left_to_next);
537 }
538 to_next[0] = bi0;
539 to_next += 1;
540 n_left_to_next -= 1;
541 vnet_buffer (b0)->ip.reass.ip_proto = reass->ip_proto;
542 vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
543 vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
544 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
545 {
546 ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
547 REASS_FRAGMENT_FORWARD,
548 reass->ip_proto,
549 reass->l4_src_port,
550 reass->l4_dst_port);
551 }
552 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
553 to_next, n_left_to_next, bi0,
554 next0);
555 }
556 _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
557 }
558 goto next_packet;
559
560 packet_enqueue:
561 b0->error = node->errors[error0];
562
563 to_next[0] = bi0;
564 to_next += 1;
565 n_left_to_next -= 1;
566 if (is_feature && IP4_ERROR_NONE == error0)
567 {
568 b0 = vlib_get_buffer (vm, bi0);
569 vnet_feature_next (&next0, b0);
570 }
571 if (is_custom)
572 {
573 next0 = vnet_buffer (b0)->ip.reass.next_index;
574 }
575 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
576 to_next, n_left_to_next,
577 bi0, next0);
578
579 next_packet:
580 from += 1;
581 n_left_from -= 1;
582 }
583
584 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
585 }
586
587 clib_spinlock_unlock (&rt->lock);
588 return frame->n_vectors;
589}
590
591static char *ip4_sv_reass_error_strings[] = {
592#define _(sym, string) string,
593 foreach_ip4_error
594#undef _
595};
596
597VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
598 vlib_node_runtime_t * node,
599 vlib_frame_t * frame)
600{
601 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
602 false /* is_custom */ );
603}
604
605/* *INDENT-OFF* */
606VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
607 .name = "ip4-sv-reassembly",
608 .vector_size = sizeof (u32),
609 .format_trace = format_ip4_sv_reass_trace,
610 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
611 .error_strings = ip4_sv_reass_error_strings,
612 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
613 .next_nodes =
614 {
615 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
616 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
617 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
618
619 },
620};
621/* *INDENT-ON* */
622
623VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
624 vlib_node_runtime_t * node,
625 vlib_frame_t * frame)
626{
627 return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
628 false /* is_custom */ );
629}
630
631/* *INDENT-OFF* */
632VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
633 .name = "ip4-sv-reassembly-feature",
634 .vector_size = sizeof (u32),
635 .format_trace = format_ip4_sv_reass_trace,
636 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
637 .error_strings = ip4_sv_reass_error_strings,
638 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
639 .next_nodes =
640 {
641 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
642 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
643 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
644 },
645};
646/* *INDENT-ON* */
647
648/* *INDENT-OFF* */
649VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
650 .arc_name = "ip4-unicast",
651 .node_name = "ip4-sv-reassembly-feature",
652 .runs_before = VNET_FEATURES ("ip4-lookup"),
653 .runs_after = 0,
654};
655/* *INDENT-ON* */
656
657/* *INDENT-OFF* */
658VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
659 .name = "ip4-sv-reassembly-custom-next",
660 .vector_size = sizeof (u32),
661 .format_trace = format_ip4_sv_reass_trace,
662 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
663 .error_strings = ip4_sv_reass_error_strings,
664 .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
665 .next_nodes =
666 {
667 [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
668 [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
669 [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-handoff",
670
671 },
672};
673/* *INDENT-ON* */
674
675VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
676 vlib_node_runtime_t * node,
677 vlib_frame_t * frame)
678{
679 return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
680 true /* is_custom */ );
681}
682
683#ifndef CLIB_MARCH_VARIANT
684always_inline u32
685ip4_sv_reass_get_nbuckets ()
686{
687 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
688 u32 nbuckets;
689 u8 i;
690
691 nbuckets = (u32) (rm->max_reass_n / IP4_SV_REASS_HT_LOAD_FACTOR);
692
693 for (i = 0; i < 31; i++)
694 if ((1 << i) >= nbuckets)
695 break;
696 nbuckets = 1 << i;
697
698 return nbuckets;
699}
700#endif /* CLIB_MARCH_VARIANT */
701
702typedef enum
703{
704 IP4_EVENT_CONFIG_CHANGED = 1,
705} ip4_sv_reass_event_t;
706
707typedef struct
708{
709 int failure;
710 clib_bihash_16_8_t *new_hash;
711} ip4_rehash_cb_ctx;
712
713#ifndef CLIB_MARCH_VARIANT
714static void
715ip4_rehash_cb (clib_bihash_kv_16_8_t * kv, void *_ctx)
716{
717 ip4_rehash_cb_ctx *ctx = _ctx;
718 if (clib_bihash_add_del_16_8 (ctx->new_hash, kv, 1))
719 {
720 ctx->failure = 1;
721 }
722}
723
724static void
725ip4_sv_reass_set_params (u32 timeout_ms, u32 max_reassemblies,
726 u32 max_reassembly_length,
727 u32 expire_walk_interval_ms)
728{
729 ip4_sv_reass_main.timeout_ms = timeout_ms;
730 ip4_sv_reass_main.timeout = (f64) timeout_ms / (f64) MSEC_PER_SEC;
731 ip4_sv_reass_main.max_reass_n = max_reassemblies;
732 ip4_sv_reass_main.max_reass_len = max_reassembly_length;
733 ip4_sv_reass_main.expire_walk_interval_ms = expire_walk_interval_ms;
734}
735
736vnet_api_error_t
737ip4_sv_reass_set (u32 timeout_ms, u32 max_reassemblies,
738 u32 max_reassembly_length, u32 expire_walk_interval_ms)
739{
740 u32 old_nbuckets = ip4_sv_reass_get_nbuckets ();
741 ip4_sv_reass_set_params (timeout_ms, max_reassemblies,
742 max_reassembly_length, expire_walk_interval_ms);
743 vlib_process_signal_event (ip4_sv_reass_main.vlib_main,
744 ip4_sv_reass_main.ip4_sv_reass_expire_node_idx,
745 IP4_EVENT_CONFIG_CHANGED, 0);
746 u32 new_nbuckets = ip4_sv_reass_get_nbuckets ();
747 if (ip4_sv_reass_main.max_reass_n > 0 && new_nbuckets > old_nbuckets)
748 {
749 clib_bihash_16_8_t new_hash;
750 clib_memset (&new_hash, 0, sizeof (new_hash));
751 ip4_rehash_cb_ctx ctx;
752 ctx.failure = 0;
753 ctx.new_hash = &new_hash;
754 clib_bihash_init_16_8 (&new_hash, "ip4-dr", new_nbuckets,
755 new_nbuckets * 1024);
756 clib_bihash_foreach_key_value_pair_16_8 (&ip4_sv_reass_main.hash,
757 ip4_rehash_cb, &ctx);
758 if (ctx.failure)
759 {
760 clib_bihash_free_16_8 (&new_hash);
761 return -1;
762 }
763 else
764 {
765 clib_bihash_free_16_8 (&ip4_sv_reass_main.hash);
766 clib_memcpy_fast (&ip4_sv_reass_main.hash, &new_hash,
767 sizeof (ip4_sv_reass_main.hash));
768 clib_bihash_copied (&ip4_sv_reass_main.hash, &new_hash);
769 }
770 }
771 return 0;
772}
773
774vnet_api_error_t
775ip4_sv_reass_get (u32 * timeout_ms, u32 * max_reassemblies,
776 u32 * max_reassembly_length, u32 * expire_walk_interval_ms)
777{
778 *timeout_ms = ip4_sv_reass_main.timeout_ms;
779 *max_reassemblies = ip4_sv_reass_main.max_reass_n;
780 *max_reassembly_length = ip4_sv_reass_main.max_reass_len;
781 *expire_walk_interval_ms = ip4_sv_reass_main.expire_walk_interval_ms;
782 return 0;
783}
784
785static clib_error_t *
786ip4_sv_reass_init_function (vlib_main_t * vm)
787{
788 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
789 clib_error_t *error = 0;
790 u32 nbuckets;
791 vlib_node_t *node;
792
793 rm->vlib_main = vm;
794 rm->vnet_main = vnet_get_main ();
795
796 vec_validate (rm->per_thread_data, vlib_num_workers ());
797 ip4_sv_reass_per_thread_t *rt;
798 vec_foreach (rt, rm->per_thread_data)
799 {
800 clib_spinlock_init (&rt->lock);
801 pool_alloc (rt->pool, rm->max_reass_n);
802 rt->lru_first = rt->lru_last = ~0;
803 }
804
805 node = vlib_get_node_by_name (vm, (u8 *) "ip4-sv-reassembly-expire-walk");
806 ASSERT (node);
807 rm->ip4_sv_reass_expire_node_idx = node->index;
808
809 ip4_sv_reass_set_params (IP4_SV_REASS_TIMEOUT_DEFAULT_MS,
810 IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT,
811 IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT,
812 IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS);
813
814 nbuckets = ip4_sv_reass_get_nbuckets ();
815 clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
816
817 node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
818 ASSERT (node);
819 rm->ip4_drop_idx = node->index;
820
821 rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
822 rm->fq_feature_index =
823 vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
824
825 return error;
826}
827
828VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
829#endif /* CLIB_MARCH_VARIANT */
830
831static uword
832ip4_sv_reass_walk_expired (vlib_main_t * vm,
833 vlib_node_runtime_t * node, vlib_frame_t * f)
834{
835 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
836 uword event_type, *event_data = 0;
837
838 while (true)
839 {
840 vlib_process_wait_for_event_or_clock (vm,
841 (f64)
842 rm->expire_walk_interval_ms /
843 (f64) MSEC_PER_SEC);
844 event_type = vlib_process_get_events (vm, &event_data);
845
846 switch (event_type)
847 {
848 case ~0: /* no events => timeout */
849 /* nothing to do here */
850 break;
851 case IP4_EVENT_CONFIG_CHANGED:
852 break;
853 default:
854 clib_warning ("BUG: event type 0x%wx", event_type);
855 break;
856 }
857 f64 now = vlib_time_now (vm);
858
859 ip4_sv_reass_t *reass;
860 int *pool_indexes_to_free = NULL;
861
862 uword thread_index = 0;
863 int index;
864 const uword nthreads = vlib_num_workers () + 1;
865 for (thread_index = 0; thread_index < nthreads; ++thread_index)
866 {
867 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
868 clib_spinlock_lock (&rt->lock);
869
870 vec_reset_length (pool_indexes_to_free);
871 /* *INDENT-OFF* */
872 pool_foreach_index (index, rt->pool, ({
873 reass = pool_elt_at_index (rt->pool, index);
874 if (now > reass->last_heard + rm->timeout)
875 {
876 vec_add1 (pool_indexes_to_free, index);
877 }
878 }));
879 /* *INDENT-ON* */
880 int *i;
881 /* *INDENT-OFF* */
882 vec_foreach (i, pool_indexes_to_free)
883 {
884 ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
885 ip4_sv_reass_free (vm, rm, rt, reass);
886 }
887 /* *INDENT-ON* */
888
889 clib_spinlock_unlock (&rt->lock);
890 }
891
892 vec_free (pool_indexes_to_free);
893 if (event_data)
894 {
895 _vec_len (event_data) = 0;
896 }
897 }
898
899 return 0;
900}
901
902/* *INDENT-OFF* */
903VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
904 .function = ip4_sv_reass_walk_expired,
905 .type = VLIB_NODE_TYPE_PROCESS,
906 .name = "ip4-sv-reassembly-expire-walk",
907 .format_trace = format_ip4_sv_reass_trace,
908 .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
909 .error_strings = ip4_sv_reass_error_strings,
910
911};
912/* *INDENT-ON* */
913
914static u8 *
915format_ip4_sv_reass_key (u8 * s, va_list * args)
916{
917 ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
918 s =
919 format (s,
920 "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
921 key->xx_id, format_ip4_address, &key->src, format_ip4_address,
922 &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
923 return s;
924}
925
926static u8 *
927format_ip4_sv_reass (u8 * s, va_list * args)
928{
929 vlib_main_t *vm = va_arg (*args, vlib_main_t *);
930 ip4_sv_reass_t *reass = va_arg (*args, ip4_sv_reass_t *);
931
932 s = format (s, "ID: %lu, key: %U trace_op_counter: %u\n",
933 reass->id, format_ip4_sv_reass_key, &reass->key,
934 reass->trace_op_counter);
935
936 vlib_buffer_t *b;
937 u32 *bip;
938 u32 counter = 0;
939 vec_foreach (bip, reass->cached_buffers)
940 {
941 u32 bi = *bip;
942 do
943 {
944 b = vlib_get_buffer (vm, bi);
945 s = format (s, " #%03u: bi: %u, ", counter, bi);
946 ++counter;
947 bi = b->next_buffer;
948 }
949 while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
950 }
951 return s;
952}
953
954static clib_error_t *
955show_ip4_reass (vlib_main_t * vm,
956 unformat_input_t * input,
957 CLIB_UNUSED (vlib_cli_command_t * lmd))
958{
959 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
960
961 vlib_cli_output (vm, "---------------------");
962 vlib_cli_output (vm, "IP4 reassembly status");
963 vlib_cli_output (vm, "---------------------");
964 bool details = false;
965 if (unformat (input, "details"))
966 {
967 details = true;
968 }
969
970 u32 sum_reass_n = 0;
971 ip4_sv_reass_t *reass;
972 uword thread_index;
973 const uword nthreads = vlib_num_workers () + 1;
974 for (thread_index = 0; thread_index < nthreads; ++thread_index)
975 {
976 ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[thread_index];
977 clib_spinlock_lock (&rt->lock);
978 if (details)
979 {
980 /* *INDENT-OFF* */
981 pool_foreach (reass, rt->pool, {
982 vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
983 });
984 /* *INDENT-ON* */
985 }
986 sum_reass_n += rt->reass_n;
987 clib_spinlock_unlock (&rt->lock);
988 }
989 vlib_cli_output (vm, "---------------------");
990 vlib_cli_output (vm, "Current IP4 reassemblies count: %lu\n",
991 (long unsigned) sum_reass_n);
992 vlib_cli_output (vm,
993 "Maximum configured concurrent IP4 reassemblies per worker-thread: %lu\n",
994 (long unsigned) rm->max_reass_n);
995 return 0;
996}
997
998/* *INDENT-OFF* */
999VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
1000 .path = "show ip4-sv-reassembly",
1001 .short_help = "show ip4-sv-reassembly [details]",
1002 .function = show_ip4_reass,
1003};
1004/* *INDENT-ON* */
1005
1006#ifndef CLIB_MARCH_VARIANT
1007vnet_api_error_t
1008ip4_sv_reass_enable_disable (u32 sw_if_index, u8 enable_disable)
1009{
1010 return vnet_feature_enable_disable ("ip4-unicast",
1011 "ip4-sv-reassembly-feature",
1012 sw_if_index, enable_disable, 0, 0);
1013}
1014#endif /* CLIB_MARCH_VARIANT */
1015
1016
1017#define foreach_ip4_sv_reass_handoff_error \
1018_(CONGESTION_DROP, "congestion drop")
1019
1020
1021typedef enum
1022{
1023#define _(sym,str) IP4_SV_REASSEMBLY_HANDOFF_ERROR_##sym,
1024 foreach_ip4_sv_reass_handoff_error
1025#undef _
1026 IP4_SV_REASSEMBLY_HANDOFF_N_ERROR,
1027} ip4_sv_reass_handoff_error_t;
1028
1029static char *ip4_sv_reass_handoff_error_strings[] = {
1030#define _(sym,string) string,
1031 foreach_ip4_sv_reass_handoff_error
1032#undef _
1033};
1034
1035typedef struct
1036{
1037 u32 next_worker_index;
1038} ip4_sv_reass_handoff_trace_t;
1039
1040static u8 *
1041format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
1042{
1043 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1044 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1045 ip4_sv_reass_handoff_trace_t *t =
1046 va_arg (*args, ip4_sv_reass_handoff_trace_t *);
1047
1048 s =
1049 format (s, "ip4-sv-reassembly-handoff: next-worker %d",
1050 t->next_worker_index);
1051
1052 return s;
1053}
1054
1055always_inline uword
1056ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
1057 vlib_node_runtime_t * node,
1058 vlib_frame_t * frame, bool is_feature)
1059{
1060 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1061
1062 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1063 u32 n_enq, n_left_from, *from;
1064 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1065 u32 fq_index;
1066
1067 from = vlib_frame_vector_args (frame);
1068 n_left_from = frame->n_vectors;
1069 vlib_get_buffers (vm, from, bufs, n_left_from);
1070
1071 b = bufs;
1072 ti = thread_indices;
1073
1074 fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
1075
1076 while (n_left_from > 0)
1077 {
1078 ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
1079
1080 if (PREDICT_FALSE
1081 ((node->flags & VLIB_NODE_FLAG_TRACE)
1082 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1083 {
1084 ip4_sv_reass_handoff_trace_t *t =
1085 vlib_add_trace (vm, node, b[0], sizeof (*t));
1086 t->next_worker_index = ti[0];
1087 }
1088
1089 n_left_from -= 1;
1090 ti += 1;
1091 b += 1;
1092 }
1093 n_enq =
1094 vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
1095 frame->n_vectors, 1);
1096
1097 if (n_enq < frame->n_vectors)
1098 vlib_node_increment_counter (vm, node->node_index,
1099 IP4_SV_REASSEMBLY_HANDOFF_ERROR_CONGESTION_DROP,
1100 frame->n_vectors - n_enq);
1101 return frame->n_vectors;
1102}
1103
1104VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
1105 vlib_node_runtime_t * node,
1106 vlib_frame_t * frame)
1107{
1108 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1109 false /* is_feature */ );
1110}
1111
1112
1113/* *INDENT-OFF* */
1114VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
1115 .name = "ip4-sv-reassembly-handoff",
1116 .vector_size = sizeof (u32),
1117 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1118 .error_strings = ip4_sv_reass_handoff_error_strings,
1119 .format_trace = format_ip4_sv_reass_handoff_trace,
1120
1121 .n_next_nodes = 1,
1122
1123 .next_nodes = {
1124 [0] = "error-drop",
1125 },
1126};
1127/* *INDENT-ON* */
1128
1129
1130/* *INDENT-OFF* */
1131VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
1132 vlib_node_runtime_t *
1133 node,
1134 vlib_frame_t * frame)
1135{
1136 return ip4_sv_reass_handoff_node_inline (vm, node, frame,
1137 true /* is_feature */ );
1138}
1139/* *INDENT-ON* */
1140
1141
1142/* *INDENT-OFF* */
1143VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
1144 .name = "ip4-sv-reass-feature-hoff",
1145 .vector_size = sizeof (u32),
1146 .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
1147 .error_strings = ip4_sv_reass_handoff_error_strings,
1148 .format_trace = format_ip4_sv_reass_handoff_trace,
1149
1150 .n_next_nodes = 1,
1151
1152 .next_nodes = {
1153 [0] = "error-drop",
1154 },
1155};
1156/* *INDENT-ON* */
1157
1158#ifndef CLIB_MARCH_VARIANT
1159int
1160ip4_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
1161{
1162 ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
1163 if (is_enable)
1164 {
1165 if (!rm->feature_use_refcount)
1166 {
1167 ++rm->feature_use_refcount;
1168 return vnet_feature_enable_disable ("ip4-unicast",
1169 "ip4-sv-reassembly-feature",
1170 sw_if_index, 1, 0, 0);
1171 }
1172 ++rm->feature_use_refcount;
1173 }
1174 else
1175 {
1176 --rm->feature_use_refcount;
1177 if (!rm->feature_use_refcount)
1178 return vnet_feature_enable_disable ("ip4-unicast",
1179 "ip4-sv-reassembly-feature",
1180 sw_if_index, 0, 0, 0);
1181 }
1182 return -1;
1183}
1184
1185uword
1186ip4_sv_reass_custom_register_next_node (uword node_index)
1187{
1188 return vlib_node_add_next (vlib_get_main (), ip4_sv_reass_custom_node.index,
1189 node_index);
1190}
1191#endif
1192
1193/*
1194 * fd.io coding-style-patch-verification: ON
1195 *
1196 * Local Variables:
1197 * eval: (c-set-style "gnu")
1198 * End:
1199 */