Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 1 | /* |
Florin Coras | c5df8c7 | 2019-04-08 07:42:30 -0700 | [diff] [blame] | 2 | * Copyright (c) 2016-2019 Cisco and/or its affiliates. |
Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | * you may not use this file except in compliance with the License. |
| 5 | * You may obtain a copy of the License at: |
| 6 | * |
| 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | * |
| 9 | * Unless required by applicable law or agreed to in writing, software |
| 10 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | * See the License for the specific language governing permissions and |
| 13 | * limitations under the License. |
| 14 | */ |
| 15 | |
| 16 | #ifndef included_tcp_packet_h |
| 17 | #define included_tcp_packet_h |
| 18 | |
| 19 | #include <vnet/vnet.h> |
| 20 | |
| 21 | /* TCP flags bit 0 first. */ |
| 22 | #define foreach_tcp_flag \ |
| 23 | _ (FIN) /**< No more data from sender. */ \ |
| 24 | _ (SYN) /**< Synchronize sequence numbers. */ \ |
| 25 | _ (RST) /**< Reset the connection. */ \ |
| 26 | _ (PSH) /**< Push function. */ \ |
| 27 | _ (ACK) /**< Ack field significant. */ \ |
| 28 | _ (URG) /**< Urgent pointer field significant. */ \ |
| 29 | _ (ECE) /**< ECN-echo. Receiver got CE packet */ \ |
| 30 | _ (CWR) /**< Sender reduced congestion window */ |
| 31 | |
| 32 | enum |
| 33 | { |
| 34 | #define _(f) TCP_FLAG_BIT_##f, |
| 35 | foreach_tcp_flag |
| 36 | #undef _ |
| 37 | TCP_N_FLAG_BITS, |
| 38 | }; |
| 39 | |
| 40 | enum |
| 41 | { |
| 42 | #define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, |
| 43 | foreach_tcp_flag |
| 44 | #undef _ |
| 45 | }; |
| 46 | |
| 47 | typedef struct _tcp_header |
| 48 | { |
| 49 | union |
| 50 | { |
| 51 | struct |
| 52 | { |
| 53 | u16 src_port; /**< Source port. */ |
| 54 | u16 dst_port; /**< Destination port. */ |
| 55 | }; |
| 56 | struct |
| 57 | { |
| 58 | u16 src, dst; |
| 59 | }; |
| 60 | }; |
| 61 | |
| 62 | u32 seq_number; /**< Sequence number of the first data octet in this |
| 63 | * segment, except when SYN is present. If SYN |
| 64 | * is present the seq number is is the ISN and the |
| 65 | * first data octet is ISN+1 */ |
| 66 | u32 ack_number; /**< Acknowledgement number if ACK is set. It contains |
| 67 | * the value of the next sequence number the sender |
| 68 | * of the segment is expecting to receive. */ |
| 69 | u8 data_offset_and_reserved; |
| 70 | u8 flags; /**< Flags: see the macro above */ |
| 71 | u16 window; /**< Number of bytes sender is willing to receive. */ |
| 72 | |
| 73 | u16 checksum; /**< Checksum of TCP pseudo header and data. */ |
| 74 | u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */ |
| 75 | } __attribute__ ((packed)) tcp_header_t; |
| 76 | |
| 77 | /* Flag tests that return 0 or !0 */ |
| 78 | #define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4) |
| 79 | #define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN) |
| 80 | #define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN) |
| 81 | #define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST) |
| 82 | #define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH) |
| 83 | #define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK) |
| 84 | #define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG) |
| 85 | #define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE) |
| 86 | #define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR) |
| 87 | |
| 88 | /* Flag tests that return 0 or 1 */ |
| 89 | #define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN) |
| 90 | #define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN) |
| 91 | |
| 92 | always_inline int |
| 93 | tcp_header_bytes (tcp_header_t * t) |
| 94 | { |
| 95 | return tcp_doff (t) * sizeof (u32); |
| 96 | } |
| 97 | |
| 98 | /* |
| 99 | * TCP options. |
| 100 | */ |
| 101 | |
| 102 | typedef enum tcp_option_type |
| 103 | { |
| 104 | TCP_OPTION_EOL = 0, /**< End of options. */ |
| 105 | TCP_OPTION_NOOP = 1, /**< No operation. */ |
| 106 | TCP_OPTION_MSS = 2, /**< Limit MSS. */ |
| 107 | TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */ |
| 108 | TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */ |
| 109 | TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */ |
| 110 | TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */ |
| 111 | TCP_OPTION_UTO = 28, /**< User timeout. */ |
| 112 | TCP_OPTION_AO = 29, /**< Authentication Option. */ |
| 113 | } tcp_option_type_t; |
| 114 | |
| 115 | #define foreach_tcp_options_flag \ |
| 116 | _ (MSS) /**< MSS advertised in SYN */ \ |
| 117 | _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \ |
| 118 | _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \ |
| 119 | _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \ |
| 120 | _ (SACK) /**< SACK present */ |
| 121 | |
| 122 | enum |
| 123 | { |
| 124 | #define _(f) TCP_OPTS_FLAG_BIT_##f, |
| 125 | foreach_tcp_options_flag |
| 126 | #undef _ |
| 127 | TCP_OPTIONS_N_FLAG_BITS, |
| 128 | }; |
| 129 | |
| 130 | enum |
| 131 | { |
| 132 | #define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f, |
| 133 | foreach_tcp_options_flag |
| 134 | #undef _ |
| 135 | }; |
| 136 | |
| 137 | typedef struct _sack_block |
| 138 | { |
| 139 | u32 start; /**< Start sequence number */ |
Florin Coras | 6792ec0 | 2017-03-13 03:49:51 -0700 | [diff] [blame] | 140 | u32 end; /**< End sequence number (first outside) */ |
Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 141 | } sack_block_t; |
| 142 | |
| 143 | typedef struct |
| 144 | { |
| 145 | u8 flags; /** Option flags, see above */ |
Florin Coras | f6359c8 | 2017-06-19 12:26:09 -0400 | [diff] [blame] | 146 | u8 wscale; /**< Window scale advertised */ |
Florin Coras | 36ebcff | 2019-09-12 18:36:44 -0700 | [diff] [blame] | 147 | u16 mss; /**< Maximum segment size advertised */ |
Florin Coras | f6359c8 | 2017-06-19 12:26:09 -0400 | [diff] [blame] | 148 | u32 tsval; /**< Timestamp value */ |
Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 149 | u32 tsecr; /**< Echoed/reflected time stamp */ |
Florin Coras | f6359c8 | 2017-06-19 12:26:09 -0400 | [diff] [blame] | 150 | sack_block_t *sacks; /**< SACK blocks */ |
Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 151 | u8 n_sack_blocks; /**< Number of SACKs blocks */ |
| 152 | } tcp_options_t; |
| 153 | |
| 154 | /* Flag tests that return 0 or !0 */ |
| 155 | #define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS) |
| 156 | #define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP) |
| 157 | #define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE) |
| 158 | #define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK) |
| 159 | #define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED) |
| 160 | |
| 161 | /* TCP option lengths */ |
| 162 | #define TCP_OPTION_LEN_EOL 1 |
| 163 | #define TCP_OPTION_LEN_NOOP 1 |
| 164 | #define TCP_OPTION_LEN_MSS 4 |
| 165 | #define TCP_OPTION_LEN_WINDOW_SCALE 3 |
| 166 | #define TCP_OPTION_LEN_SACK_PERMITTED 2 |
| 167 | #define TCP_OPTION_LEN_TIMESTAMP 10 |
| 168 | #define TCP_OPTION_LEN_SACK_BLOCK 8 |
| 169 | |
Dave Barach | 2c25a62 | 2017-06-26 11:35:07 -0400 | [diff] [blame] | 170 | #define TCP_HDR_LEN_MAX 60 |
Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 171 | #define TCP_WND_MAX 65535U |
| 172 | #define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */ |
| 173 | #define TCP_OPTS_ALIGN 4 |
| 174 | #define TCP_OPTS_MAX_SACK_BLOCKS 3 |
Florin Coras | 97f9694 | 2020-10-20 13:45:51 -0700 | [diff] [blame^] | 175 | #define TCP_MAX_GSO_SZ 65536 |
Florin Coras | 999840c | 2020-03-18 20:31:34 +0000 | [diff] [blame] | 176 | |
| 177 | /* Modulo arithmetic for TCP sequence numbers */ |
| 178 | #define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0) |
| 179 | #define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0) |
| 180 | #define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0) |
| 181 | #define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0) |
| 182 | #define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2)) |
| 183 | |
| 184 | /* Modulo arithmetic for timestamps */ |
| 185 | #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0) |
| 186 | #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0) |
| 187 | |
| 188 | /** |
| 189 | * Parse TCP header options. |
| 190 | * |
| 191 | * @param th TCP header |
| 192 | * @param to TCP options data structure to be populated |
| 193 | * @param is_syn set if packet is syn |
| 194 | * @return -1 if parsing failed |
| 195 | */ |
| 196 | always_inline int |
| 197 | tcp_options_parse (tcp_header_t * th, tcp_options_t * to, u8 is_syn) |
| 198 | { |
| 199 | const u8 *data; |
| 200 | u8 opt_len, opts_len, kind; |
| 201 | int j; |
| 202 | sack_block_t b; |
| 203 | |
| 204 | opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); |
| 205 | data = (const u8 *) (th + 1); |
| 206 | |
| 207 | /* Zero out all flags but those set in SYN */ |
| 208 | to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE |
| 209 | | TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS); |
| 210 | |
| 211 | for (; opts_len > 0; opts_len -= opt_len, data += opt_len) |
| 212 | { |
| 213 | kind = data[0]; |
| 214 | |
| 215 | /* Get options length */ |
| 216 | if (kind == TCP_OPTION_EOL) |
| 217 | break; |
| 218 | else if (kind == TCP_OPTION_NOOP) |
| 219 | { |
| 220 | opt_len = 1; |
| 221 | continue; |
| 222 | } |
| 223 | else |
| 224 | { |
| 225 | /* broken options */ |
| 226 | if (opts_len < 2) |
| 227 | return -1; |
| 228 | opt_len = data[1]; |
| 229 | |
| 230 | /* weird option length */ |
| 231 | if (opt_len < 2 || opt_len > opts_len) |
| 232 | return -1; |
| 233 | } |
| 234 | |
| 235 | /* Parse options */ |
| 236 | switch (kind) |
| 237 | { |
| 238 | case TCP_OPTION_MSS: |
| 239 | if (!is_syn) |
| 240 | break; |
| 241 | if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th)) |
| 242 | { |
| 243 | to->flags |= TCP_OPTS_FLAG_MSS; |
| 244 | to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2)); |
| 245 | } |
| 246 | break; |
| 247 | case TCP_OPTION_WINDOW_SCALE: |
| 248 | if (!is_syn) |
| 249 | break; |
| 250 | if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th)) |
| 251 | { |
| 252 | to->flags |= TCP_OPTS_FLAG_WSCALE; |
| 253 | to->wscale = data[2]; |
| 254 | if (to->wscale > TCP_MAX_WND_SCALE) |
| 255 | to->wscale = TCP_MAX_WND_SCALE; |
| 256 | } |
| 257 | break; |
| 258 | case TCP_OPTION_TIMESTAMP: |
| 259 | if (is_syn) |
| 260 | to->flags |= TCP_OPTS_FLAG_TSTAMP; |
| 261 | if ((to->flags & TCP_OPTS_FLAG_TSTAMP) |
| 262 | && opt_len == TCP_OPTION_LEN_TIMESTAMP) |
| 263 | { |
| 264 | to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2)); |
| 265 | to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6)); |
| 266 | } |
| 267 | break; |
| 268 | case TCP_OPTION_SACK_PERMITTED: |
| 269 | if (!is_syn) |
| 270 | break; |
| 271 | if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th)) |
| 272 | to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; |
| 273 | break; |
| 274 | case TCP_OPTION_SACK_BLOCK: |
| 275 | /* If SACK permitted was not advertised or a SYN, break */ |
| 276 | if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th)) |
| 277 | break; |
| 278 | |
| 279 | /* If too short or not correctly formatted, break */ |
| 280 | if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK)) |
| 281 | break; |
| 282 | |
| 283 | to->flags |= TCP_OPTS_FLAG_SACK; |
| 284 | to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK; |
| 285 | vec_reset_length (to->sacks); |
| 286 | for (j = 0; j < to->n_sack_blocks; j++) |
| 287 | { |
| 288 | b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j)); |
| 289 | b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j)); |
| 290 | vec_add1 (to->sacks, b); |
| 291 | } |
| 292 | break; |
| 293 | default: |
| 294 | /* Nothing to see here */ |
| 295 | continue; |
| 296 | } |
| 297 | } |
| 298 | return 0; |
| 299 | } |
| 300 | |
| 301 | /** |
| 302 | * Write TCP options to segment. |
| 303 | * |
| 304 | * @param data buffer where to write the options |
| 305 | * @param opts options to write |
| 306 | * @return length of options written |
| 307 | */ |
| 308 | always_inline u32 |
| 309 | tcp_options_write (u8 * data, tcp_options_t * opts) |
| 310 | { |
| 311 | u32 opts_len = 0; |
| 312 | u32 buf, seq_len = 4; |
| 313 | |
| 314 | if (tcp_opts_mss (opts)) |
| 315 | { |
| 316 | *data++ = TCP_OPTION_MSS; |
| 317 | *data++ = TCP_OPTION_LEN_MSS; |
| 318 | buf = clib_host_to_net_u16 (opts->mss); |
| 319 | clib_memcpy_fast (data, &buf, sizeof (opts->mss)); |
| 320 | data += sizeof (opts->mss); |
| 321 | opts_len += TCP_OPTION_LEN_MSS; |
| 322 | } |
| 323 | |
| 324 | if (tcp_opts_wscale (opts)) |
| 325 | { |
| 326 | *data++ = TCP_OPTION_WINDOW_SCALE; |
| 327 | *data++ = TCP_OPTION_LEN_WINDOW_SCALE; |
| 328 | *data++ = opts->wscale; |
| 329 | opts_len += TCP_OPTION_LEN_WINDOW_SCALE; |
| 330 | } |
| 331 | |
| 332 | if (tcp_opts_sack_permitted (opts)) |
| 333 | { |
| 334 | *data++ = TCP_OPTION_SACK_PERMITTED; |
| 335 | *data++ = TCP_OPTION_LEN_SACK_PERMITTED; |
| 336 | opts_len += TCP_OPTION_LEN_SACK_PERMITTED; |
| 337 | } |
| 338 | |
| 339 | if (tcp_opts_tstamp (opts)) |
| 340 | { |
| 341 | *data++ = TCP_OPTION_TIMESTAMP; |
| 342 | *data++ = TCP_OPTION_LEN_TIMESTAMP; |
| 343 | buf = clib_host_to_net_u32 (opts->tsval); |
| 344 | clib_memcpy_fast (data, &buf, sizeof (opts->tsval)); |
| 345 | data += sizeof (opts->tsval); |
| 346 | buf = clib_host_to_net_u32 (opts->tsecr); |
| 347 | clib_memcpy_fast (data, &buf, sizeof (opts->tsecr)); |
| 348 | data += sizeof (opts->tsecr); |
| 349 | opts_len += TCP_OPTION_LEN_TIMESTAMP; |
| 350 | } |
| 351 | |
| 352 | if (tcp_opts_sack (opts)) |
| 353 | { |
| 354 | int i; |
| 355 | |
| 356 | if (opts->n_sack_blocks != 0) |
| 357 | { |
| 358 | *data++ = TCP_OPTION_SACK_BLOCK; |
| 359 | *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; |
| 360 | for (i = 0; i < opts->n_sack_blocks; i++) |
| 361 | { |
| 362 | buf = clib_host_to_net_u32 (opts->sacks[i].start); |
| 363 | clib_memcpy_fast (data, &buf, seq_len); |
| 364 | data += seq_len; |
| 365 | buf = clib_host_to_net_u32 (opts->sacks[i].end); |
| 366 | clib_memcpy_fast (data, &buf, seq_len); |
| 367 | data += seq_len; |
| 368 | } |
| 369 | opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | /* Terminate TCP options */ |
| 374 | if (opts_len % 4) |
| 375 | { |
| 376 | *data++ = TCP_OPTION_EOL; |
| 377 | opts_len += TCP_OPTION_LEN_EOL; |
| 378 | } |
| 379 | |
| 380 | /* Pad with zeroes to a u32 boundary */ |
| 381 | while (opts_len % 4) |
| 382 | { |
| 383 | *data++ = TCP_OPTION_NOOP; |
| 384 | opts_len += TCP_OPTION_LEN_NOOP; |
| 385 | } |
| 386 | return opts_len; |
| 387 | } |
| 388 | |
Dave Barach | 68b0fb0 | 2017-02-28 15:15:56 -0500 | [diff] [blame] | 389 | #endif /* included_tcp_packet_h */ |
| 390 | |
| 391 | /* |
| 392 | * fd.io coding-style-patch-verification: ON |
| 393 | * |
| 394 | * Local Variables: |
| 395 | * eval: (c-set-style "gnu") |
| 396 | * End: |
| 397 | */ |