blob: 9ccd8e0ceb3ac4e4ab45327969751906275b9848 [file] [log] [blame]
Dave Barach68b0fb02017-02-28 15:15:56 -05001/*
Florin Corasc5df8c72019-04-08 07:42:30 -07002 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
Dave Barach68b0fb02017-02-28 15:15:56 -05003 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef included_tcp_packet_h
17#define included_tcp_packet_h
18
19#include <vnet/vnet.h>
20
21/* TCP flags bit 0 first. */
22#define foreach_tcp_flag \
23 _ (FIN) /**< No more data from sender. */ \
24 _ (SYN) /**< Synchronize sequence numbers. */ \
25 _ (RST) /**< Reset the connection. */ \
26 _ (PSH) /**< Push function. */ \
27 _ (ACK) /**< Ack field significant. */ \
28 _ (URG) /**< Urgent pointer field significant. */ \
29 _ (ECE) /**< ECN-echo. Receiver got CE packet */ \
30 _ (CWR) /**< Sender reduced congestion window */
31
32enum
33{
34#define _(f) TCP_FLAG_BIT_##f,
35 foreach_tcp_flag
36#undef _
37 TCP_N_FLAG_BITS,
38};
39
40enum
41{
42#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
43 foreach_tcp_flag
44#undef _
45};
46
47typedef struct _tcp_header
48{
49 union
50 {
51 struct
52 {
53 u16 src_port; /**< Source port. */
54 u16 dst_port; /**< Destination port. */
55 };
56 struct
57 {
58 u16 src, dst;
59 };
60 };
61
62 u32 seq_number; /**< Sequence number of the first data octet in this
63 * segment, except when SYN is present. If SYN
64 * is present the seq number is is the ISN and the
65 * first data octet is ISN+1 */
66 u32 ack_number; /**< Acknowledgement number if ACK is set. It contains
67 * the value of the next sequence number the sender
68 * of the segment is expecting to receive. */
69 u8 data_offset_and_reserved;
70 u8 flags; /**< Flags: see the macro above */
71 u16 window; /**< Number of bytes sender is willing to receive. */
72
73 u16 checksum; /**< Checksum of TCP pseudo header and data. */
74 u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */
75} __attribute__ ((packed)) tcp_header_t;
76
77/* Flag tests that return 0 or !0 */
78#define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
79#define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
80#define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
81#define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
82#define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
83#define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
84#define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
85#define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
86#define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
87
88/* Flag tests that return 0 or 1 */
89#define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
90#define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
91
92always_inline int
93tcp_header_bytes (tcp_header_t * t)
94{
95 return tcp_doff (t) * sizeof (u32);
96}
97
98/*
99 * TCP options.
100 */
101
102typedef enum tcp_option_type
103{
104 TCP_OPTION_EOL = 0, /**< End of options. */
105 TCP_OPTION_NOOP = 1, /**< No operation. */
106 TCP_OPTION_MSS = 2, /**< Limit MSS. */
107 TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */
108 TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */
109 TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */
110 TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */
111 TCP_OPTION_UTO = 28, /**< User timeout. */
112 TCP_OPTION_AO = 29, /**< Authentication Option. */
113} tcp_option_type_t;
114
115#define foreach_tcp_options_flag \
116 _ (MSS) /**< MSS advertised in SYN */ \
117 _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \
118 _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \
119 _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \
120 _ (SACK) /**< SACK present */
121
122enum
123{
124#define _(f) TCP_OPTS_FLAG_BIT_##f,
125 foreach_tcp_options_flag
126#undef _
127 TCP_OPTIONS_N_FLAG_BITS,
128};
129
130enum
131{
132#define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
133 foreach_tcp_options_flag
134#undef _
135};
136
137typedef struct _sack_block
138{
139 u32 start; /**< Start sequence number */
Florin Coras6792ec02017-03-13 03:49:51 -0700140 u32 end; /**< End sequence number (first outside) */
Dave Barach68b0fb02017-02-28 15:15:56 -0500141} sack_block_t;
142
143typedef struct
144{
145 u8 flags; /** Option flags, see above */
Florin Corasf6359c82017-06-19 12:26:09 -0400146 u8 wscale; /**< Window scale advertised */
Florin Coras36ebcff2019-09-12 18:36:44 -0700147 u16 mss; /**< Maximum segment size advertised */
Florin Corasf6359c82017-06-19 12:26:09 -0400148 u32 tsval; /**< Timestamp value */
Dave Barach68b0fb02017-02-28 15:15:56 -0500149 u32 tsecr; /**< Echoed/reflected time stamp */
Florin Corasf6359c82017-06-19 12:26:09 -0400150 sack_block_t *sacks; /**< SACK blocks */
Dave Barach68b0fb02017-02-28 15:15:56 -0500151 u8 n_sack_blocks; /**< Number of SACKs blocks */
152} tcp_options_t;
153
154/* Flag tests that return 0 or !0 */
155#define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
156#define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
157#define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
158#define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
159#define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
160
161/* TCP option lengths */
162#define TCP_OPTION_LEN_EOL 1
163#define TCP_OPTION_LEN_NOOP 1
164#define TCP_OPTION_LEN_MSS 4
165#define TCP_OPTION_LEN_WINDOW_SCALE 3
166#define TCP_OPTION_LEN_SACK_PERMITTED 2
167#define TCP_OPTION_LEN_TIMESTAMP 10
168#define TCP_OPTION_LEN_SACK_BLOCK 8
169
Dave Barach2c25a622017-06-26 11:35:07 -0400170#define TCP_HDR_LEN_MAX 60
Dave Barach68b0fb02017-02-28 15:15:56 -0500171#define TCP_WND_MAX 65535U
172#define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */
173#define TCP_OPTS_ALIGN 4
174#define TCP_OPTS_MAX_SACK_BLOCKS 3
Florin Coras97f96942020-10-20 13:45:51 -0700175#define TCP_MAX_GSO_SZ 65536
Florin Coras999840c2020-03-18 20:31:34 +0000176
177/* Modulo arithmetic for TCP sequence numbers */
178#define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
179#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
180#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
181#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
182#define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
183
184/* Modulo arithmetic for timestamps */
185#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
186#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
187
188/**
189 * Parse TCP header options.
190 *
191 * @param th TCP header
192 * @param to TCP options data structure to be populated
193 * @param is_syn set if packet is syn
194 * @return -1 if parsing failed
195 */
196always_inline int
197tcp_options_parse (tcp_header_t * th, tcp_options_t * to, u8 is_syn)
198{
199 const u8 *data;
200 u8 opt_len, opts_len, kind;
201 int j;
202 sack_block_t b;
203
204 opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
205 data = (const u8 *) (th + 1);
206
207 /* Zero out all flags but those set in SYN */
208 to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE
209 | TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS);
210
211 for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
212 {
213 kind = data[0];
214
215 /* Get options length */
216 if (kind == TCP_OPTION_EOL)
217 break;
218 else if (kind == TCP_OPTION_NOOP)
219 {
220 opt_len = 1;
221 continue;
222 }
223 else
224 {
225 /* broken options */
226 if (opts_len < 2)
227 return -1;
228 opt_len = data[1];
229
230 /* weird option length */
231 if (opt_len < 2 || opt_len > opts_len)
232 return -1;
233 }
234
235 /* Parse options */
236 switch (kind)
237 {
238 case TCP_OPTION_MSS:
239 if (!is_syn)
240 break;
241 if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
242 {
243 to->flags |= TCP_OPTS_FLAG_MSS;
244 to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
245 }
246 break;
247 case TCP_OPTION_WINDOW_SCALE:
248 if (!is_syn)
249 break;
250 if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
251 {
252 to->flags |= TCP_OPTS_FLAG_WSCALE;
253 to->wscale = data[2];
254 if (to->wscale > TCP_MAX_WND_SCALE)
255 to->wscale = TCP_MAX_WND_SCALE;
256 }
257 break;
258 case TCP_OPTION_TIMESTAMP:
259 if (is_syn)
260 to->flags |= TCP_OPTS_FLAG_TSTAMP;
261 if ((to->flags & TCP_OPTS_FLAG_TSTAMP)
262 && opt_len == TCP_OPTION_LEN_TIMESTAMP)
263 {
264 to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
265 to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
266 }
267 break;
268 case TCP_OPTION_SACK_PERMITTED:
269 if (!is_syn)
270 break;
271 if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
272 to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
273 break;
274 case TCP_OPTION_SACK_BLOCK:
275 /* If SACK permitted was not advertised or a SYN, break */
276 if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
277 break;
278
279 /* If too short or not correctly formatted, break */
280 if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
281 break;
282
283 to->flags |= TCP_OPTS_FLAG_SACK;
284 to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
285 vec_reset_length (to->sacks);
286 for (j = 0; j < to->n_sack_blocks; j++)
287 {
288 b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j));
289 b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j));
290 vec_add1 (to->sacks, b);
291 }
292 break;
293 default:
294 /* Nothing to see here */
295 continue;
296 }
297 }
298 return 0;
299}
300
301/**
302 * Write TCP options to segment.
303 *
304 * @param data buffer where to write the options
305 * @param opts options to write
306 * @return length of options written
307 */
308always_inline u32
309tcp_options_write (u8 * data, tcp_options_t * opts)
310{
311 u32 opts_len = 0;
312 u32 buf, seq_len = 4;
313
314 if (tcp_opts_mss (opts))
315 {
316 *data++ = TCP_OPTION_MSS;
317 *data++ = TCP_OPTION_LEN_MSS;
318 buf = clib_host_to_net_u16 (opts->mss);
319 clib_memcpy_fast (data, &buf, sizeof (opts->mss));
320 data += sizeof (opts->mss);
321 opts_len += TCP_OPTION_LEN_MSS;
322 }
323
324 if (tcp_opts_wscale (opts))
325 {
326 *data++ = TCP_OPTION_WINDOW_SCALE;
327 *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
328 *data++ = opts->wscale;
329 opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
330 }
331
332 if (tcp_opts_sack_permitted (opts))
333 {
334 *data++ = TCP_OPTION_SACK_PERMITTED;
335 *data++ = TCP_OPTION_LEN_SACK_PERMITTED;
336 opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
337 }
338
339 if (tcp_opts_tstamp (opts))
340 {
341 *data++ = TCP_OPTION_TIMESTAMP;
342 *data++ = TCP_OPTION_LEN_TIMESTAMP;
343 buf = clib_host_to_net_u32 (opts->tsval);
344 clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
345 data += sizeof (opts->tsval);
346 buf = clib_host_to_net_u32 (opts->tsecr);
347 clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
348 data += sizeof (opts->tsecr);
349 opts_len += TCP_OPTION_LEN_TIMESTAMP;
350 }
351
352 if (tcp_opts_sack (opts))
353 {
354 int i;
355
356 if (opts->n_sack_blocks != 0)
357 {
358 *data++ = TCP_OPTION_SACK_BLOCK;
359 *data++ = 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
360 for (i = 0; i < opts->n_sack_blocks; i++)
361 {
362 buf = clib_host_to_net_u32 (opts->sacks[i].start);
363 clib_memcpy_fast (data, &buf, seq_len);
364 data += seq_len;
365 buf = clib_host_to_net_u32 (opts->sacks[i].end);
366 clib_memcpy_fast (data, &buf, seq_len);
367 data += seq_len;
368 }
369 opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
370 }
371 }
372
373 /* Terminate TCP options */
374 if (opts_len % 4)
375 {
376 *data++ = TCP_OPTION_EOL;
377 opts_len += TCP_OPTION_LEN_EOL;
378 }
379
380 /* Pad with zeroes to a u32 boundary */
381 while (opts_len % 4)
382 {
383 *data++ = TCP_OPTION_NOOP;
384 opts_len += TCP_OPTION_LEN_NOOP;
385 }
386 return opts_len;
387}
388
Dave Barach68b0fb02017-02-28 15:15:56 -0500389#endif /* included_tcp_packet_h */
390
391/*
392 * fd.io coding-style-patch-verification: ON
393 *
394 * Local Variables:
395 * eval: (c-set-style "gnu")
396 * End:
397 */