blob: c137ea68108962aaab137b20f00dc8d90c2ea88c [file] [log] [blame]
/*
* Copyright (c) 2016-2019 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef included_tcp_packet_h
#define included_tcp_packet_h
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
/* TCP flags bit 0 first. */
#define foreach_tcp_flag \
_ (FIN) /**< No more data from sender. */ \
_ (SYN) /**< Synchronize sequence numbers. */ \
_ (RST) /**< Reset the connection. */ \
_ (PSH) /**< Push function. */ \
_ (ACK) /**< Ack field significant. */ \
_ (URG) /**< Urgent pointer field significant. */ \
_ (ECE) /**< ECN-echo. Receiver got CE packet */ \
_ (CWR) /**< Sender reduced congestion window */
enum
{
#define _(f) TCP_FLAG_BIT_##f,
foreach_tcp_flag
#undef _
TCP_N_FLAG_BITS,
};
enum
{
#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
foreach_tcp_flag
#undef _
};
typedef struct _tcp_header
{
union
{
struct
{
u16 src_port; /**< Source port. */
u16 dst_port; /**< Destination port. */
};
struct
{
u16 src, dst;
};
};
u32 seq_number; /**< Sequence number of the first data octet in this
* segment, except when SYN is present. If SYN
* is present the seq number is is the ISN and the
* first data octet is ISN+1 */
u32 ack_number; /**< Acknowledgement number if ACK is set. It contains
* the value of the next sequence number the sender
* of the segment is expecting to receive. */
u8 data_offset_and_reserved;
u8 flags; /**< Flags: see the macro above */
u16 window; /**< Number of bytes sender is willing to receive. */
u16 checksum; /**< Checksum of TCP pseudo header and data. */
u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */
} __attribute__ ((packed)) tcp_header_t;
/* Flag tests that return 0 or !0 */
#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
/* Flag tests that return 0 or 1 */
#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
always_inline int
tcp_header_bytes (tcp_header_t * t)
{
return tcp_doff (t) * sizeof (u32);
}
/*
* TCP options.
*/
typedef enum tcp_option_type
{
TCP_OPTION_EOL = 0, /**< End of options. */
TCP_OPTION_NOOP = 1, /**< No operation. */
TCP_OPTION_MSS = 2, /**< Limit MSS. */
TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */
TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */
TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */
TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */
TCP_OPTION_UTO = 28, /**< User timeout. */
TCP_OPTION_AO = 29, /**< Authentication Option. */
} tcp_option_type_t;
#define foreach_tcp_options_flag \
_ (MSS) /**< MSS advertised in SYN */ \
_ (TSTAMP) /**< Timestamp capability advertised in SYN */ \
_ (WSCALE) /**< Wnd scale capability advertised in SYN */ \
_ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \
_ (SACK) /**< SACK present */
enum
{
#define _(f) TCP_OPTS_FLAG_BIT_##f,
foreach_tcp_options_flag
#undef _
TCP_OPTIONS_N_FLAG_BITS,
};
enum
{
#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
foreach_tcp_options_flag
#undef _
};
typedef struct _sack_block
{
u32 start; /**< Start sequence number */
u32 end; /**< End sequence number (first outside) */
} sack_block_t;
typedef struct
{
sack_block_t *sacks; /**< SACK blocks */
u32 tsval; /**< Timestamp value */
u32 tsecr; /**< Echoed/reflected time stamp */
u16 mss; /**< Maximum segment size advertised */
u8 flags; /**< Option flags, see above */
u8 wscale; /**< Window scale advertised */
u8 n_sack_blocks; /**< Number of SACKs blocks */
} tcp_options_t;
/* Flag tests that return 0 or !0 */
#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
/* TCP option lengths */
#define TCP_OPTION_LEN_EOL 1
#define TCP_OPTION_LEN_NOOP 1
#define TCP_OPTION_LEN_MSS 4
#define TCP_OPTION_LEN_WINDOW_SCALE 3
#define TCP_OPTION_LEN_SACK_PERMITTED 2
#define TCP_OPTION_LEN_TIMESTAMP 10
#define TCP_OPTION_LEN_SACK_BLOCK 8
#define TCP_HDR_LEN_MAX 60
#define TCP_WND_MAX 65535U
#define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */
#define TCP_OPTS_ALIGN 4
#define TCP_OPTS_MAX_SACK_BLOCKS 3
#define TCP_MAX_GSO_SZ 65536
/* Modulo arithmetic for TCP sequence numbers */
#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
#define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
/* Modulo arithmetic for timestamps */
#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
always_inline void
ip4_tcp_reply_x1 (ip4_header_t *ip0, tcp_header_t *tcp0)
{
u32 src0, dst0;
src0 = ip0->src_address.data_u32;
dst0 = ip0->dst_address.data_u32;
ip0->src_address.data_u32 = dst0;
ip0->dst_address.data_u32 = src0;
src0 = tcp0->src;
dst0 = tcp0->dst;
tcp0->src = dst0;
tcp0->dst = src0;
}
always_inline void
ip4_tcp_reply_x2 (ip4_header_t *ip0, ip4_header_t *ip1, tcp_header_t *tcp0,
tcp_header_t *tcp1)
{
u32 src0, dst0, src1, dst1;
src0 = ip0->src_address.data_u32;
src1 = ip1->src_address.data_u32;
dst0 = ip0->dst_address.data_u32;
dst1 = ip1->dst_address.data_u32;
ip0->src_address.data_u32 = dst0;
ip1->src_address.data_u32 = dst1;
ip0->dst_address.data_u32 = src0;
ip1->dst_address.data_u32 = src1;
src0 = tcp0->src;
src1 = tcp1->src;
dst0 = tcp0->dst;
dst1 = tcp1->dst;
tcp0->src = dst0;
tcp1->src = dst1;
tcp0->dst = src0;
tcp1->dst = src1;
}
always_inline void
ip6_tcp_reply_x1 (ip6_header_t *ip0, tcp_header_t *tcp0)
{
{
ip6_address_t src0, dst0;
src0 = ip0->src_address;
dst0 = ip0->dst_address;
ip0->src_address = dst0;
ip0->dst_address = src0;
}
{
u16 src0, dst0;
src0 = tcp0->src;
dst0 = tcp0->dst;
tcp0->src = dst0;
tcp0->dst = src0;
}
}
always_inline void
ip6_tcp_reply_x2 (ip6_header_t *ip0, ip6_header_t *ip1, tcp_header_t *tcp0,
tcp_header_t *tcp1)
{
{
ip6_address_t src0, dst0, src1, dst1;
src0 = ip0->src_address;
src1 = ip1->src_address;
dst0 = ip0->dst_address;
dst1 = ip1->dst_address;
ip0->src_address = dst0;
ip1->src_address = dst1;
ip0->dst_address = src0;
ip1->dst_address = src1;
}
{
u16 src0, dst0, src1, dst1;
src0 = tcp0->src;
src1 = tcp1->src;
dst0 = tcp0->dst;
dst1 = tcp1->dst;
tcp0->src = dst0;
tcp1->src = dst1;
tcp0->dst = src0;
tcp1->dst = src1;
}
}
/**
* Parse TCP header options.
*
* @param th TCP header
* @param to TCP options data structure to be populated
* @param is_syn set if packet is syn
* @return -1 if parsing failed
*/
always_inline int
tcp_options_parse (tcp_header_t * th, tcp_options_t * to, u8 is_syn)
{
const u8 *data;
u8 opt_len, opts_len, kind;
int j;
sack_block_t b;
opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
data = (const u8 *) (th + 1);
/* Zero out all flags but those set in SYN */
to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE
| TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS);
for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
{
kind = data[0];
/* Get options length */
if (kind == TCP_OPTION_EOL)
break;
else if (kind == TCP_OPTION_NOOP)
{
opt_len = 1;
continue;
}
else
{
/* broken options */
if (opts_len < 2)
return -1;
opt_len = data[1];
/* weird option length */
if (opt_len < 2 || opt_len > opts_len)
return -1;
}
/* Parse options */
switch (kind)
{
case TCP_OPTION_MSS:
if (!is_syn)
break;
if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
{
to->flags |= TCP_OPTS_FLAG_MSS;
to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
}
break;
case TCP_OPTION_WINDOW_SCALE:
if (!is_syn)
break;
if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
{
to->flags |= TCP_OPTS_FLAG_WSCALE;
to->wscale = data[2];
if (to->wscale > TCP_MAX_WND_SCALE)
to->wscale = TCP_MAX_WND_SCALE;
}
break;
case TCP_OPTION_TIMESTAMP:
if (is_syn)
to->flags |= TCP_OPTS_FLAG_TSTAMP;
if ((to->flags & TCP_OPTS_FLAG_TSTAMP)
&& opt_len == TCP_OPTION_LEN_TIMESTAMP)
{
to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
}
break;
case TCP_OPTION_SACK_PERMITTED:
if (!is_syn)
break;
if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
break;
case TCP_OPTION_SACK_BLOCK:
/* If SACK permitted was not advertised or a SYN, break */
if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
break;
/* If too short or not correctly formatted, break */
if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
break;
to->flags |= TCP_OPTS_FLAG_SACK;
to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
vec_reset_length (to->sacks);
for (j = 0; j < to->n_sack_blocks; j++)
{
b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j));
b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j));
vec_add1 (to->sacks, b);
}
break;
default:
/* Nothing to see here */
continue;
}
}
return 0;
}
/**
* Write TCP options to segment.
*
* @param data buffer where to write the options
* @param opts options to write
* @return length of options written
*/
always_inline u32
tcp_options_write (u8 * data, tcp_options_t * opts)
{
u32 opts_len = 0;
u32 buf, seq_len = 4;
if (tcp_opts_mss (opts))
{
*data++ = TCP_OPTION_MSS;
*data++ = TCP_OPTION_LEN_MSS;
buf = clib_host_to_net_u16 (opts->mss);
clib_memcpy_fast (data, &buf, sizeof (opts->mss));
data += sizeof (opts->mss);
opts_len += TCP_OPTION_LEN_MSS;
}
if (tcp_opts_wscale (opts))
{
*data++ = TCP_OPTION_WINDOW_SCALE;
*data++ = TCP_OPTION_LEN_WINDOW_SCALE;
*data++ = opts->wscale;
opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
}
if (tcp_opts_sack_permitted (opts))
{
*data++ = TCP_OPTION_SACK_PERMITTED;
*data++ = TCP_OPTION_LEN_SACK_PERMITTED;
opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
}
if (tcp_opts_tstamp (opts))
{
*data++ = TCP_OPTION_TIMESTAMP;
*data++ = TCP_OPTION_LEN_TIMESTAMP;
buf = clib_host_to_net_u32 (opts->tsval);
clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
data += sizeof (opts->tsval);
buf = clib_host_to_net_u32 (opts->tsecr);
clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
data += sizeof (opts->tsecr);
opts_len += TCP_OPTION_LEN_TIMESTAMP;
}
if (tcp_opts_sack (opts))
{
int i;
if (opts->n_sack_blocks != 0)
{
*data++ = TCP_OPTION_SACK_BLOCK;
*data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
for (i = 0; i < opts->n_sack_blocks; i++)
{
buf = clib_host_to_net_u32 (opts->sacks[i].start);
clib_memcpy_fast (data, &buf, seq_len);
data += seq_len;
buf = clib_host_to_net_u32 (opts->sacks[i].end);
clib_memcpy_fast (data, &buf, seq_len);
data += seq_len;
}
opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
}
}
/* Terminate TCP options by padding with NOPs to a u32 boundary. Avoid using
* EOL because, it seems, it can break peers with broken option parsers that
* rely on options ending on a u32 boundary.
*/
while (opts_len % 4)
{
*data++ = TCP_OPTION_NOOP;
opts_len += TCP_OPTION_LEN_NOOP;
}
return opts_len;
}
#endif /* included_tcp_packet_h */
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/