| /* |
| * Copyright (c) 2016-2019 Cisco and/or its affiliates. |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef included_tcp_packet_h |
| #define included_tcp_packet_h |
| |
| #include <vnet/ip/ip4_packet.h> |
| #include <vnet/ip/ip6_packet.h> |
| |
| /* TCP flags bit 0 first. */ |
| #define foreach_tcp_flag \ |
| _ (FIN) /**< No more data from sender. */ \ |
| _ (SYN) /**< Synchronize sequence numbers. */ \ |
| _ (RST) /**< Reset the connection. */ \ |
| _ (PSH) /**< Push function. */ \ |
| _ (ACK) /**< Ack field significant. */ \ |
| _ (URG) /**< Urgent pointer field significant. */ \ |
| _ (ECE) /**< ECN-echo. Receiver got CE packet */ \ |
| _ (CWR) /**< Sender reduced congestion window */ |
| |
| enum |
| { |
| #define _(f) TCP_FLAG_BIT_##f, |
| foreach_tcp_flag |
| #undef _ |
| TCP_N_FLAG_BITS, |
| }; |
| |
| enum |
| { |
| #define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f, |
| foreach_tcp_flag |
| #undef _ |
| }; |
| |
| typedef struct _tcp_header |
| { |
| union |
| { |
| struct |
| { |
| u16 src_port; /**< Source port. */ |
| u16 dst_port; /**< Destination port. */ |
| }; |
| struct |
| { |
| u16 src, dst; |
| }; |
| }; |
| |
| u32 seq_number; /**< Sequence number of the first data octet in this |
| * segment, except when SYN is present. If SYN |
| * is present the seq number is is the ISN and the |
| * first data octet is ISN+1 */ |
| u32 ack_number; /**< Acknowledgement number if ACK is set. It contains |
| * the value of the next sequence number the sender |
| * of the segment is expecting to receive. */ |
| u8 data_offset_and_reserved; |
| u8 flags; /**< Flags: see the macro above */ |
| u16 window; /**< Number of bytes sender is willing to receive. */ |
| |
| u16 checksum; /**< Checksum of TCP pseudo header and data. */ |
| u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */ |
| } __attribute__ ((packed)) tcp_header_t; |
| |
| /* Flag tests that return 0 or !0 */ |
| #define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4) |
| #define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN) |
| #define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN) |
| #define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST) |
| #define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH) |
| #define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK) |
| #define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG) |
| #define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE) |
| #define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR) |
| |
| /* Flag tests that return 0 or 1 */ |
| #define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN) |
| #define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN) |
| |
| always_inline int |
| tcp_header_bytes (tcp_header_t * t) |
| { |
| return tcp_doff (t) * sizeof (u32); |
| } |
| |
| /* |
| * TCP options. |
| */ |
| |
| typedef enum tcp_option_type |
| { |
| TCP_OPTION_EOL = 0, /**< End of options. */ |
| TCP_OPTION_NOOP = 1, /**< No operation. */ |
| TCP_OPTION_MSS = 2, /**< Limit MSS. */ |
| TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */ |
| TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */ |
| TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */ |
| TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */ |
| TCP_OPTION_UTO = 28, /**< User timeout. */ |
| TCP_OPTION_AO = 29, /**< Authentication Option. */ |
| } tcp_option_type_t; |
| |
| #define foreach_tcp_options_flag \ |
| _ (MSS) /**< MSS advertised in SYN */ \ |
| _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \ |
| _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \ |
| _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \ |
| _ (SACK) /**< SACK present */ |
| |
| enum |
| { |
| #define _(f) TCP_OPTS_FLAG_BIT_##f, |
| foreach_tcp_options_flag |
| #undef _ |
| TCP_OPTIONS_N_FLAG_BITS, |
| }; |
| |
| enum |
| { |
| #define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f, |
| foreach_tcp_options_flag |
| #undef _ |
| }; |
| |
| typedef struct _sack_block |
| { |
| u32 start; /**< Start sequence number */ |
| u32 end; /**< End sequence number (first outside) */ |
| } sack_block_t; |
| |
| typedef struct |
| { |
| sack_block_t *sacks; /**< SACK blocks */ |
| u32 tsval; /**< Timestamp value */ |
| u32 tsecr; /**< Echoed/reflected time stamp */ |
| u16 mss; /**< Maximum segment size advertised */ |
| u8 flags; /**< Option flags, see above */ |
| u8 wscale; /**< Window scale advertised */ |
| u8 n_sack_blocks; /**< Number of SACKs blocks */ |
| } tcp_options_t; |
| |
| /* Flag tests that return 0 or !0 */ |
| #define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS) |
| #define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP) |
| #define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE) |
| #define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK) |
| #define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED) |
| |
| /* TCP option lengths */ |
| #define TCP_OPTION_LEN_EOL 1 |
| #define TCP_OPTION_LEN_NOOP 1 |
| #define TCP_OPTION_LEN_MSS 4 |
| #define TCP_OPTION_LEN_WINDOW_SCALE 3 |
| #define TCP_OPTION_LEN_SACK_PERMITTED 2 |
| #define TCP_OPTION_LEN_TIMESTAMP 10 |
| #define TCP_OPTION_LEN_SACK_BLOCK 8 |
| |
| #define TCP_HDR_LEN_MAX 60 |
| #define TCP_WND_MAX 65535U |
| #define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */ |
| #define TCP_OPTS_ALIGN 4 |
| #define TCP_OPTS_MAX_SACK_BLOCKS 3 |
| #define TCP_MAX_GSO_SZ 65536 |
| |
| /* Modulo arithmetic for TCP sequence numbers */ |
| #define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0) |
| #define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0) |
| #define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0) |
| #define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0) |
| #define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2)) |
| |
| /* Modulo arithmetic for timestamps */ |
| #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0) |
| #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0) |
| |
| always_inline void |
| ip4_tcp_reply_x1 (ip4_header_t *ip0, tcp_header_t *tcp0) |
| { |
| u32 src0, dst0; |
| |
| src0 = ip0->src_address.data_u32; |
| dst0 = ip0->dst_address.data_u32; |
| ip0->src_address.data_u32 = dst0; |
| ip0->dst_address.data_u32 = src0; |
| |
| src0 = tcp0->src; |
| dst0 = tcp0->dst; |
| tcp0->src = dst0; |
| tcp0->dst = src0; |
| } |
| |
| always_inline void |
| ip4_tcp_reply_x2 (ip4_header_t *ip0, ip4_header_t *ip1, tcp_header_t *tcp0, |
| tcp_header_t *tcp1) |
| { |
| u32 src0, dst0, src1, dst1; |
| |
| src0 = ip0->src_address.data_u32; |
| src1 = ip1->src_address.data_u32; |
| dst0 = ip0->dst_address.data_u32; |
| dst1 = ip1->dst_address.data_u32; |
| ip0->src_address.data_u32 = dst0; |
| ip1->src_address.data_u32 = dst1; |
| ip0->dst_address.data_u32 = src0; |
| ip1->dst_address.data_u32 = src1; |
| |
| src0 = tcp0->src; |
| src1 = tcp1->src; |
| dst0 = tcp0->dst; |
| dst1 = tcp1->dst; |
| tcp0->src = dst0; |
| tcp1->src = dst1; |
| tcp0->dst = src0; |
| tcp1->dst = src1; |
| } |
| |
| always_inline void |
| ip6_tcp_reply_x1 (ip6_header_t *ip0, tcp_header_t *tcp0) |
| { |
| { |
| ip6_address_t src0, dst0; |
| |
| src0 = ip0->src_address; |
| dst0 = ip0->dst_address; |
| ip0->src_address = dst0; |
| ip0->dst_address = src0; |
| } |
| |
| { |
| u16 src0, dst0; |
| |
| src0 = tcp0->src; |
| dst0 = tcp0->dst; |
| tcp0->src = dst0; |
| tcp0->dst = src0; |
| } |
| } |
| |
| always_inline void |
| ip6_tcp_reply_x2 (ip6_header_t *ip0, ip6_header_t *ip1, tcp_header_t *tcp0, |
| tcp_header_t *tcp1) |
| { |
| { |
| ip6_address_t src0, dst0, src1, dst1; |
| |
| src0 = ip0->src_address; |
| src1 = ip1->src_address; |
| dst0 = ip0->dst_address; |
| dst1 = ip1->dst_address; |
| ip0->src_address = dst0; |
| ip1->src_address = dst1; |
| ip0->dst_address = src0; |
| ip1->dst_address = src1; |
| } |
| |
| { |
| u16 src0, dst0, src1, dst1; |
| |
| src0 = tcp0->src; |
| src1 = tcp1->src; |
| dst0 = tcp0->dst; |
| dst1 = tcp1->dst; |
| tcp0->src = dst0; |
| tcp1->src = dst1; |
| tcp0->dst = src0; |
| tcp1->dst = src1; |
| } |
| } |
| |
| /** |
| * Parse TCP header options. |
| * |
| * @param th TCP header |
| * @param to TCP options data structure to be populated |
| * @param is_syn set if packet is syn |
| * @return -1 if parsing failed |
| */ |
| always_inline int |
| tcp_options_parse (tcp_header_t * th, tcp_options_t * to, u8 is_syn) |
| { |
| const u8 *data; |
| u8 opt_len, opts_len, kind; |
| int j; |
| sack_block_t b; |
| |
| opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t); |
| data = (const u8 *) (th + 1); |
| |
| /* Zero out all flags but those set in SYN */ |
| to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE |
| | TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS); |
| |
| for (; opts_len > 0; opts_len -= opt_len, data += opt_len) |
| { |
| kind = data[0]; |
| |
| /* Get options length */ |
| if (kind == TCP_OPTION_EOL) |
| break; |
| else if (kind == TCP_OPTION_NOOP) |
| { |
| opt_len = 1; |
| continue; |
| } |
| else |
| { |
| /* broken options */ |
| if (opts_len < 2) |
| return -1; |
| opt_len = data[1]; |
| |
| /* weird option length */ |
| if (opt_len < 2 || opt_len > opts_len) |
| return -1; |
| } |
| |
| /* Parse options */ |
| switch (kind) |
| { |
| case TCP_OPTION_MSS: |
| if (!is_syn) |
| break; |
| if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th)) |
| { |
| to->flags |= TCP_OPTS_FLAG_MSS; |
| to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2)); |
| } |
| break; |
| case TCP_OPTION_WINDOW_SCALE: |
| if (!is_syn) |
| break; |
| if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th)) |
| { |
| to->flags |= TCP_OPTS_FLAG_WSCALE; |
| to->wscale = data[2]; |
| if (to->wscale > TCP_MAX_WND_SCALE) |
| to->wscale = TCP_MAX_WND_SCALE; |
| } |
| break; |
| case TCP_OPTION_TIMESTAMP: |
| if (is_syn) |
| to->flags |= TCP_OPTS_FLAG_TSTAMP; |
| if ((to->flags & TCP_OPTS_FLAG_TSTAMP) |
| && opt_len == TCP_OPTION_LEN_TIMESTAMP) |
| { |
| to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2)); |
| to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6)); |
| } |
| break; |
| case TCP_OPTION_SACK_PERMITTED: |
| if (!is_syn) |
| break; |
| if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th)) |
| to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED; |
| break; |
| case TCP_OPTION_SACK_BLOCK: |
| /* If SACK permitted was not advertised or a SYN, break */ |
| if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th)) |
| break; |
| |
| /* If too short or not correctly formatted, break */ |
| if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK)) |
| break; |
| |
| to->flags |= TCP_OPTS_FLAG_SACK; |
| to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK; |
| vec_reset_length (to->sacks); |
| for (j = 0; j < to->n_sack_blocks; j++) |
| { |
| b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j)); |
| b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j)); |
| vec_add1 (to->sacks, b); |
| } |
| break; |
| default: |
| /* Nothing to see here */ |
| continue; |
| } |
| } |
| return 0; |
| } |
| |
| /** |
| * Write TCP options to segment. |
| * |
| * @param data buffer where to write the options |
| * @param opts options to write |
| * @return length of options written |
| */ |
| always_inline u32 |
| tcp_options_write (u8 * data, tcp_options_t * opts) |
| { |
| u32 opts_len = 0; |
| u32 buf, seq_len = 4; |
| |
| if (tcp_opts_mss (opts)) |
| { |
| *data++ = TCP_OPTION_MSS; |
| *data++ = TCP_OPTION_LEN_MSS; |
| buf = clib_host_to_net_u16 (opts->mss); |
| clib_memcpy_fast (data, &buf, sizeof (opts->mss)); |
| data += sizeof (opts->mss); |
| opts_len += TCP_OPTION_LEN_MSS; |
| } |
| |
| if (tcp_opts_wscale (opts)) |
| { |
| *data++ = TCP_OPTION_WINDOW_SCALE; |
| *data++ = TCP_OPTION_LEN_WINDOW_SCALE; |
| *data++ = opts->wscale; |
| opts_len += TCP_OPTION_LEN_WINDOW_SCALE; |
| } |
| |
| if (tcp_opts_sack_permitted (opts)) |
| { |
| *data++ = TCP_OPTION_SACK_PERMITTED; |
| *data++ = TCP_OPTION_LEN_SACK_PERMITTED; |
| opts_len += TCP_OPTION_LEN_SACK_PERMITTED; |
| } |
| |
| if (tcp_opts_tstamp (opts)) |
| { |
| *data++ = TCP_OPTION_TIMESTAMP; |
| *data++ = TCP_OPTION_LEN_TIMESTAMP; |
| buf = clib_host_to_net_u32 (opts->tsval); |
| clib_memcpy_fast (data, &buf, sizeof (opts->tsval)); |
| data += sizeof (opts->tsval); |
| buf = clib_host_to_net_u32 (opts->tsecr); |
| clib_memcpy_fast (data, &buf, sizeof (opts->tsecr)); |
| data += sizeof (opts->tsecr); |
| opts_len += TCP_OPTION_LEN_TIMESTAMP; |
| } |
| |
| if (tcp_opts_sack (opts)) |
| { |
| int i; |
| |
| if (opts->n_sack_blocks != 0) |
| { |
| *data++ = TCP_OPTION_SACK_BLOCK; |
| *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; |
| for (i = 0; i < opts->n_sack_blocks; i++) |
| { |
| buf = clib_host_to_net_u32 (opts->sacks[i].start); |
| clib_memcpy_fast (data, &buf, seq_len); |
| data += seq_len; |
| buf = clib_host_to_net_u32 (opts->sacks[i].end); |
| clib_memcpy_fast (data, &buf, seq_len); |
| data += seq_len; |
| } |
| opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK; |
| } |
| } |
| |
| /* Terminate TCP options by padding with NOPs to a u32 boundary. Avoid using |
| * EOL because, it seems, it can break peers with broken option parsers that |
| * rely on options ending on a u32 boundary. |
| */ |
| while (opts_len % 4) |
| { |
| *data++ = TCP_OPTION_NOOP; |
| opts_len += TCP_OPTION_LEN_NOOP; |
| } |
| return opts_len; |
| } |
| |
| #endif /* included_tcp_packet_h */ |
| |
| /* |
| * fd.io coding-style-patch-verification: ON |
| * |
| * Local Variables: |
| * eval: (c-set-style "gnu") |
| * End: |
| */ |