Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * gre.c: gre |
| 3 | * |
| 4 | * Copyright (c) 2012 Cisco and/or its affiliates. |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at: |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | #include <vnet/vnet.h> |
| 19 | #include <vnet/gre/gre.h> |
| 20 | |
| 21 | gre_main_t gre_main; |
| 22 | |
| 23 | typedef CLIB_PACKED (struct { |
| 24 | ip4_header_t ip4; |
| 25 | gre_header_t gre; |
| 26 | }) ip4_and_gre_header_t; |
| 27 | |
| 28 | typedef struct { |
| 29 | union { |
| 30 | ip4_and_gre_header_t ip4_and_gre; |
| 31 | u64 as_u64[3]; |
| 32 | }; |
| 33 | } ip4_and_gre_union_t; |
| 34 | |
| 35 | |
| 36 | /* Packet trace structure */ |
| 37 | typedef struct { |
| 38 | /* Tunnel-id / index in tunnel vector */ |
| 39 | u32 tunnel_id; |
| 40 | |
| 41 | /* pkt length */ |
| 42 | u32 length; |
| 43 | |
| 44 | /* tunnel ip4 addresses */ |
| 45 | ip4_address_t src; |
| 46 | ip4_address_t dst; |
| 47 | } gre_tx_trace_t; |
| 48 | |
| 49 | u8 * format_gre_tx_trace (u8 * s, va_list * args) |
| 50 | { |
| 51 | CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); |
| 52 | CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); |
| 53 | gre_tx_trace_t * t = va_arg (*args, gre_tx_trace_t *); |
| 54 | |
| 55 | s = format (s, "GRE: tunnel %d len %d src %U dst %U", |
| 56 | t->tunnel_id, clib_net_to_host_u16 (t->length), |
| 57 | format_ip4_address, &t->src.as_u8, |
| 58 | format_ip4_address, &t->dst.as_u8); |
| 59 | return s; |
| 60 | } |
| 61 | |
| 62 | u8 * format_gre_protocol (u8 * s, va_list * args) |
| 63 | { |
| 64 | gre_protocol_t p = va_arg (*args, u32); |
| 65 | gre_main_t * gm = &gre_main; |
| 66 | gre_protocol_info_t * pi = gre_get_protocol_info (gm, p); |
| 67 | |
| 68 | if (pi) |
| 69 | s = format (s, "%s", pi->name); |
| 70 | else |
| 71 | s = format (s, "0x%04x", p); |
| 72 | |
| 73 | return s; |
| 74 | } |
| 75 | |
| 76 | u8 * format_gre_header_with_length (u8 * s, va_list * args) |
| 77 | { |
| 78 | gre_main_t * gm = &gre_main; |
| 79 | gre_header_t * h = va_arg (*args, gre_header_t *); |
| 80 | u32 max_header_bytes = va_arg (*args, u32); |
| 81 | gre_protocol_t p = clib_net_to_host_u16 (h->protocol); |
| 82 | uword indent, header_bytes; |
| 83 | |
| 84 | header_bytes = sizeof (h[0]); |
| 85 | if (max_header_bytes != 0 && header_bytes > max_header_bytes) |
| 86 | return format (s, "gre header truncated"); |
| 87 | |
| 88 | indent = format_get_indent (s); |
| 89 | |
| 90 | s = format (s, "GRE %U", format_gre_protocol, p); |
| 91 | |
| 92 | if (max_header_bytes != 0 && header_bytes > max_header_bytes) |
| 93 | { |
| 94 | gre_protocol_info_t * pi = gre_get_protocol_info (gm, p); |
| 95 | vlib_node_t * node = vlib_get_node (gm->vlib_main, pi->node_index); |
| 96 | if (node->format_buffer) |
| 97 | s = format (s, "\n%U%U", |
| 98 | format_white_space, indent, |
| 99 | node->format_buffer, (void *) (h + 1), |
| 100 | max_header_bytes - header_bytes); |
| 101 | } |
| 102 | |
| 103 | return s; |
| 104 | } |
| 105 | |
| 106 | u8 * format_gre_header (u8 * s, va_list * args) |
| 107 | { |
| 108 | gre_header_t * h = va_arg (*args, gre_header_t *); |
| 109 | return format (s, "%U", format_gre_header_with_length, h, 0); |
| 110 | } |
| 111 | |
| 112 | /* Returns gre protocol as an int in host byte order. */ |
| 113 | uword |
| 114 | unformat_gre_protocol_host_byte_order (unformat_input_t * input, |
| 115 | va_list * args) |
| 116 | { |
| 117 | u16 * result = va_arg (*args, u16 *); |
| 118 | gre_main_t * gm = &gre_main; |
| 119 | int i; |
| 120 | |
| 121 | /* Named type. */ |
| 122 | if (unformat_user (input, unformat_vlib_number_by_name, |
| 123 | gm->protocol_info_by_name, &i)) |
| 124 | { |
| 125 | gre_protocol_info_t * pi = vec_elt_at_index (gm->protocol_infos, i); |
| 126 | *result = pi->protocol; |
| 127 | return 1; |
| 128 | } |
| 129 | |
| 130 | return 0; |
| 131 | } |
| 132 | |
| 133 | uword |
| 134 | unformat_gre_protocol_net_byte_order (unformat_input_t * input, |
| 135 | va_list * args) |
| 136 | { |
| 137 | u16 * result = va_arg (*args, u16 *); |
| 138 | if (! unformat_user (input, unformat_gre_protocol_host_byte_order, result)) |
| 139 | return 0; |
| 140 | *result = clib_host_to_net_u16 ((u16) *result); |
| 141 | return 1; |
| 142 | } |
| 143 | |
| 144 | uword |
| 145 | unformat_gre_header (unformat_input_t * input, va_list * args) |
| 146 | { |
| 147 | u8 ** result = va_arg (*args, u8 **); |
| 148 | gre_header_t _h, * h = &_h; |
| 149 | u16 p; |
| 150 | |
| 151 | if (! unformat (input, "%U", |
| 152 | unformat_gre_protocol_host_byte_order, &p)) |
| 153 | return 0; |
| 154 | |
| 155 | h->protocol = clib_host_to_net_u16 (p); |
| 156 | |
| 157 | /* Add header to result. */ |
| 158 | { |
| 159 | void * p; |
| 160 | u32 n_bytes = sizeof (h[0]); |
| 161 | |
| 162 | vec_add2 (*result, p, n_bytes); |
| 163 | memcpy (p, h, n_bytes); |
| 164 | } |
| 165 | |
| 166 | return 1; |
| 167 | } |
| 168 | |
| 169 | static uword gre_set_rewrite (vnet_main_t * vnm, |
| 170 | u32 sw_if_index, |
| 171 | u32 l3_type, |
| 172 | void * dst_address, |
| 173 | void * rewrite, |
| 174 | uword max_rewrite_bytes) |
| 175 | { |
| 176 | /* |
| 177 | * Conundrum: packets from tun/tap destined for the tunnel |
| 178 | * actually have this rewrite applied. Transit packets do not. |
| 179 | * To make the two cases equivalent, don't generate a |
| 180 | * rewrite here, build the entire header in the fast path. |
| 181 | */ |
| 182 | return 0; |
| 183 | |
| 184 | #ifdef THINGS_WORKED_AS_ONE_MIGHT_LIKE |
| 185 | ip4_and_gre_header_t * h = rewrite; |
| 186 | gre_protocol_t protocol; |
| 187 | |
| 188 | if (max_rewrite_bytes < sizeof (h[0])) |
| 189 | return 0; |
| 190 | |
| 191 | switch (l3_type) { |
| 192 | #define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = GRE_PROTOCOL_##b; break |
| 193 | _ (IP4, ip4); |
| 194 | _ (IP6, ip6); |
| 195 | #undef _ |
| 196 | default: |
| 197 | return 0; |
| 198 | } |
| 199 | |
| 200 | memset (h, 0, sizeof (*h)); |
| 201 | h->ip4.ip_version_and_header_length = 0x45; |
| 202 | h->ip4.ttl = 64; |
| 203 | h->ip4.protocol = IP_PROTOCOL_GRE; |
| 204 | h->gre.protocol = clib_host_to_net_u16 (protocol); |
| 205 | |
| 206 | return sizeof (h[0]); |
| 207 | #endif |
| 208 | } |
| 209 | |
| 210 | static uword |
| 211 | gre_interface_tx (vlib_main_t * vm, |
| 212 | vlib_node_runtime_t * node, |
| 213 | vlib_frame_t * frame) |
| 214 | { |
| 215 | gre_main_t * gm = &gre_main; |
| 216 | u32 next_index; |
| 217 | u32 * from, * to_next, n_left_from, n_left_to_next; |
| 218 | vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; |
| 219 | gre_tunnel_t *t = pool_elt_at_index (gm->tunnels, rd->dev_instance); |
| 220 | |
| 221 | /* Vector of buffer / pkt indices we're supposed to process */ |
| 222 | from = vlib_frame_vector_args (frame); |
| 223 | |
| 224 | /* Number of buffers / pkts */ |
| 225 | n_left_from = frame->n_vectors; |
| 226 | |
| 227 | /* Speculatively send the first buffer to the last disposition we used */ |
| 228 | next_index = node->cached_next_index; |
| 229 | |
| 230 | while (n_left_from > 0) |
| 231 | { |
| 232 | /* set up to enqueue to our disposition with index = next_index */ |
| 233 | vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); |
| 234 | |
| 235 | /* |
| 236 | * As long as we have enough pkts left to process two pkts |
| 237 | * and prefetch two pkts... |
| 238 | */ |
| 239 | while (n_left_from >= 4 && n_left_to_next >= 2) |
| 240 | { |
| 241 | vlib_buffer_t * b0, * b1; |
| 242 | ip4_header_t * ip0, * ip1; |
| 243 | ip4_and_gre_union_t * h0, * h1; |
| 244 | u32 bi0, next0, bi1, next1; |
| 245 | __attribute__((unused)) u8 error0, error1; |
| 246 | u16 gre_protocol0, gre_protocol1; |
| 247 | |
| 248 | /* Prefetch the next iteration */ |
| 249 | { |
| 250 | vlib_buffer_t * p2, * p3; |
| 251 | |
| 252 | p2 = vlib_get_buffer (vm, from[2]); |
| 253 | p3 = vlib_get_buffer (vm, from[3]); |
| 254 | |
| 255 | vlib_prefetch_buffer_header (p2, LOAD); |
| 256 | vlib_prefetch_buffer_header (p3, LOAD); |
| 257 | |
| 258 | /* |
| 259 | * Prefetch packet data. We expect to overwrite |
| 260 | * the inbound L2 header with an ip header and a |
| 261 | * gre header. Might want to prefetch the last line |
| 262 | * of rewrite space as well; need profile data |
| 263 | */ |
| 264 | CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); |
| 265 | CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); |
| 266 | } |
| 267 | |
| 268 | /* Pick up the next two buffer indices */ |
| 269 | bi0 = from[0]; |
| 270 | bi1 = from[1]; |
| 271 | |
| 272 | /* Speculatively enqueue them where we sent the last buffer */ |
| 273 | to_next[0] = bi0; |
| 274 | to_next[1] = bi1; |
| 275 | from += 2; |
| 276 | to_next += 2; |
| 277 | n_left_to_next -= 2; |
| 278 | n_left_from -= 2; |
| 279 | |
| 280 | b0 = vlib_get_buffer (vm, bi0); |
| 281 | b1 = vlib_get_buffer (vm, bi1); |
| 282 | |
| 283 | vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index; |
| 284 | vnet_buffer (b1)->sw_if_index[VLIB_TX] = t->outer_fib_index; |
| 285 | |
| 286 | ip0 = vlib_buffer_get_current (b0); |
| 287 | gre_protocol0 = clib_net_to_host_u16 (0x800); |
| 288 | gre_protocol0 = |
| 289 | ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ? |
| 290 | 0x86DD : gre_protocol0; |
| 291 | |
| 292 | ip1 = vlib_buffer_get_current (b1); |
| 293 | gre_protocol1 = clib_net_to_host_u16 (0x800); |
| 294 | gre_protocol1 = |
| 295 | ((ip1->ip_version_and_header_length & 0xF0) == 0x60) ? |
| 296 | 0x86DD : gre_protocol1; |
| 297 | |
| 298 | vlib_buffer_advance (b0, -sizeof(*h0)); |
| 299 | vlib_buffer_advance (b1, -sizeof(*h1)); |
| 300 | |
| 301 | h0 = vlib_buffer_get_current (b0); |
| 302 | h1 = vlib_buffer_get_current (b1); |
| 303 | h0->as_u64[0] = 0; |
| 304 | h0->as_u64[1] = 0; |
| 305 | h0->as_u64[2] = 0; |
| 306 | |
| 307 | h1->as_u64[0] = 0; |
| 308 | h1->as_u64[1] = 0; |
| 309 | h1->as_u64[2] = 0; |
| 310 | |
| 311 | ip0 = &h0->ip4_and_gre.ip4; |
| 312 | h0->ip4_and_gre.gre.protocol = gre_protocol0; |
| 313 | ip0->ip_version_and_header_length = 0x45; |
| 314 | ip0->ttl = 254; |
| 315 | ip0->protocol = IP_PROTOCOL_GRE; |
| 316 | |
| 317 | ip1 = &h1->ip4_and_gre.ip4; |
| 318 | h1->ip4_and_gre.gre.protocol = gre_protocol1; |
| 319 | ip1->ip_version_and_header_length = 0x45; |
| 320 | ip1->ttl = 254; |
| 321 | ip1->protocol = IP_PROTOCOL_GRE; |
| 322 | |
| 323 | ip0->length = |
| 324 | clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); |
| 325 | ip1->length = |
| 326 | clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); |
| 327 | ip0->src_address.as_u32 = t->tunnel_src.as_u32; |
| 328 | ip1->src_address.as_u32 = t->tunnel_src.as_u32; |
| 329 | ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; |
| 330 | ip1->dst_address.as_u32 = t->tunnel_dst.as_u32; |
| 331 | ip0->checksum = ip4_header_checksum (ip0); |
| 332 | ip1->checksum = ip4_header_checksum (ip1); |
| 333 | |
| 334 | /* ip4_lookup will route to the tunnel partner */ |
| 335 | next0 = GRE_OUTPUT_NEXT_LOOKUP; |
| 336 | next1 = GRE_OUTPUT_NEXT_LOOKUP; |
| 337 | error0 = GRE_ERROR_NONE; |
| 338 | error1 = GRE_ERROR_NONE; |
| 339 | |
| 340 | /* |
| 341 | * Enqueue 2 pkts. This macro deals with next0 != next1, |
| 342 | * acquiring enqueue rights to the indicated next |
| 343 | * node input frame, etc. |
| 344 | */ |
| 345 | vlib_validate_buffer_enqueue_x2 (vm, node, next_index, |
| 346 | to_next, n_left_to_next, |
| 347 | bi0, bi1, next0, next1); |
| 348 | } |
| 349 | |
| 350 | while (n_left_from > 0 && n_left_to_next > 0) |
| 351 | { |
| 352 | vlib_buffer_t * b0; |
| 353 | ip4_header_t * ip0; |
| 354 | ip4_and_gre_union_t * h0; |
| 355 | u32 bi0, next0; |
| 356 | __attribute__((unused)) u8 error0; |
| 357 | u16 gre_protocol0; |
| 358 | |
| 359 | bi0 = to_next[0] = from[0]; |
| 360 | from += 1; |
| 361 | n_left_from -= 1; |
| 362 | to_next += 1; |
| 363 | n_left_to_next -= 1; |
| 364 | |
| 365 | b0 = vlib_get_buffer (vm, bi0); |
| 366 | |
| 367 | vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index; |
| 368 | ip0 = vlib_buffer_get_current (b0); |
| 369 | gre_protocol0 = clib_net_to_host_u16 (0x800); |
| 370 | gre_protocol0 = |
| 371 | ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ? |
| 372 | 0x86DD : gre_protocol0; |
| 373 | |
| 374 | vlib_buffer_advance (b0, -sizeof(*h0)); |
| 375 | |
| 376 | h0 = vlib_buffer_get_current (b0); |
| 377 | h0->as_u64[0] = 0; |
| 378 | h0->as_u64[1] = 0; |
| 379 | h0->as_u64[2] = 0; |
| 380 | |
| 381 | ip0 = &h0->ip4_and_gre.ip4; |
| 382 | h0->ip4_and_gre.gre.protocol = gre_protocol0; |
| 383 | ip0->ip_version_and_header_length = 0x45; |
| 384 | ip0->ttl = 254; |
| 385 | ip0->protocol = IP_PROTOCOL_GRE; |
| 386 | ip0->length = |
| 387 | clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); |
| 388 | ip0->src_address.as_u32 = t->tunnel_src.as_u32; |
| 389 | ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; |
| 390 | ip0->checksum = ip4_header_checksum (ip0); |
| 391 | |
| 392 | next0 = GRE_OUTPUT_NEXT_LOOKUP; |
| 393 | error0 = GRE_ERROR_NONE; |
| 394 | |
| 395 | if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) |
| 396 | { |
| 397 | gre_tx_trace_t *tr = vlib_add_trace (vm, node, |
| 398 | b0, sizeof (*tr)); |
| 399 | tr->tunnel_id = t - gm->tunnels; |
| 400 | tr->length = ip0->length; |
| 401 | tr->src.as_u32 = ip0->src_address.as_u32; |
| 402 | tr->dst.as_u32 = ip0->dst_address.as_u32; |
| 403 | } |
| 404 | |
| 405 | vlib_validate_buffer_enqueue_x1 (vm, node, next_index, |
| 406 | to_next, n_left_to_next, |
| 407 | bi0, next0); |
| 408 | } |
| 409 | |
| 410 | vlib_put_next_frame (vm, node, next_index, n_left_to_next); |
| 411 | } |
| 412 | |
| 413 | vlib_node_increment_counter (vm, gre_input_node.index, |
| 414 | GRE_ERROR_PKTS_ENCAP, frame->n_vectors); |
| 415 | |
| 416 | return frame->n_vectors; |
| 417 | } |
| 418 | |
| 419 | static u8 * format_gre_tunnel_name (u8 * s, va_list * args) |
| 420 | { |
| 421 | u32 dev_instance = va_arg (*args, u32); |
| 422 | return format (s, "gre%d", dev_instance); |
| 423 | } |
| 424 | |
| 425 | static u8 * format_gre_device (u8 * s, va_list * args) |
| 426 | { |
| 427 | u32 dev_instance = va_arg (*args, u32); |
| 428 | CLIB_UNUSED (int verbose) = va_arg (*args, int); |
| 429 | |
| 430 | s = format (s, "GRE tunnel: id %d\n", dev_instance); |
| 431 | return s; |
| 432 | } |
| 433 | |
| 434 | VNET_DEVICE_CLASS (gre_device_class) = { |
| 435 | .name = "GRE tunnel device", |
| 436 | .format_device_name = format_gre_tunnel_name, |
| 437 | .format_device = format_gre_device, |
| 438 | .format_tx_trace = format_gre_tx_trace, |
| 439 | .tx_function = gre_interface_tx, |
| 440 | #ifdef SOON |
| 441 | .clear counter = 0; |
| 442 | .admin_up_down_function = 0; |
| 443 | #endif |
| 444 | }; |
| 445 | |
| 446 | |
| 447 | VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = { |
| 448 | .name = "GRE", |
| 449 | .format_header = format_gre_header_with_length, |
| 450 | .unformat_header = unformat_gre_header, |
| 451 | .set_rewrite = gre_set_rewrite, |
| 452 | }; |
| 453 | |
| 454 | static void add_protocol (gre_main_t * gm, |
| 455 | gre_protocol_t protocol, |
| 456 | char * protocol_name) |
| 457 | { |
| 458 | gre_protocol_info_t * pi; |
| 459 | u32 i; |
| 460 | |
| 461 | vec_add2 (gm->protocol_infos, pi, 1); |
| 462 | i = pi - gm->protocol_infos; |
| 463 | |
| 464 | pi->name = protocol_name; |
| 465 | pi->protocol = protocol; |
| 466 | pi->next_index = pi->node_index = ~0; |
| 467 | |
| 468 | hash_set (gm->protocol_info_by_protocol, protocol, i); |
| 469 | hash_set_mem (gm->protocol_info_by_name, pi->name, i); |
| 470 | } |
| 471 | |
| 472 | static clib_error_t * gre_init (vlib_main_t * vm) |
| 473 | { |
| 474 | gre_main_t * gm = &gre_main; |
| 475 | clib_error_t * error; |
| 476 | ip_main_t * im = &ip_main; |
| 477 | ip_protocol_info_t * pi; |
| 478 | |
| 479 | memset (gm, 0, sizeof (gm[0])); |
| 480 | gm->vlib_main = vm; |
| 481 | gm->vnet_main = vnet_get_main(); |
| 482 | |
| 483 | if ((error = vlib_call_init_function (vm, ip_main_init))) |
| 484 | return error; |
| 485 | |
| 486 | if ((error = vlib_call_init_function (vm, ip4_lookup_init))) |
| 487 | return error; |
| 488 | |
| 489 | /* Set up the ip packet generator */ |
| 490 | pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE); |
| 491 | pi->format_header = format_gre_header; |
| 492 | pi->unformat_pg_edit = unformat_pg_gre_header; |
| 493 | |
| 494 | gm->protocol_info_by_name = hash_create_string (0, sizeof (uword)); |
| 495 | gm->protocol_info_by_protocol = hash_create (0, sizeof (uword)); |
| 496 | gm->tunnel_by_key = hash_create (0, sizeof (uword)); |
| 497 | |
| 498 | #define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s); |
| 499 | foreach_gre_protocol |
| 500 | #undef _ |
| 501 | |
| 502 | return vlib_call_init_function (vm, gre_input_init); |
| 503 | } |
| 504 | |
| 505 | VLIB_INIT_FUNCTION (gre_init); |
| 506 | |
| 507 | gre_main_t * gre_get_main (vlib_main_t * vm) |
| 508 | { |
| 509 | vlib_call_init_function (vm, gre_init); |
| 510 | return &gre_main; |
| 511 | } |
| 512 | |