blob: 36ed94f3aad5b9cbc0307ece7fe0ae0ed1c4c123 [file] [log] [blame]
Xiaoping Fan978b3772015-05-27 14:15:18 -07001/*
2 * sfe_ipv6.c
3 * Shortcut forwarding engine - IPv6 support.
4 *
Ryan Sherlock47c5a702016-01-12 07:27:05 -06005 * Copyright (c) 2015-2016 The Linux Foundation. All rights reserved.
Xiaoping Fana42c68b2015-08-07 18:00:39 -07006 * Permission to use, copy, modify, and/or distribute this software for
7 * any purpose with or without fee is hereby granted, provided that the
8 * above copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Xiaoping Fan978b3772015-05-27 14:15:18 -070016 */
17
18#include <linux/module.h>
19#include <linux/sysfs.h>
20#include <linux/skbuff.h>
21#include <linux/icmp.h>
22#include <net/tcp.h>
23#include <linux/etherdevice.h>
24
25#include "sfe.h"
26#include "sfe_cm.h"
27
28/*
29 * By default Linux IP header and transport layer header structures are
30 * unpacked, assuming that such headers should be 32-bit aligned.
31 * Unfortunately some wireless adaptors can't cope with this requirement and
32 * some CPUs can't handle misaligned accesses. For those platforms we
33 * define SFE_IPV6_UNALIGNED_IP_HEADER and mark the structures as packed.
34 * When we do this the compiler will generate slightly worse code than for the
35 * aligned case (on most platforms) but will be much quicker than fixing
36 * things up in an unaligned trap handler.
37 */
38#define SFE_IPV6_UNALIGNED_IP_HEADER 1
39#if SFE_IPV6_UNALIGNED_IP_HEADER
40#define SFE_IPV6_UNALIGNED_STRUCT __attribute__((packed))
41#else
42#define SFE_IPV6_UNALIGNED_STRUCT
43#endif
44
45#define CHAR_DEV_MSG_SIZE 768
46
47/*
48 * An Ethernet header, but with an optional "packed" attribute to
49 * help with performance on some platforms (see the definition of
50 * SFE_IPV6_UNALIGNED_STRUCT)
51 */
52struct sfe_ipv6_eth_hdr {
53 __be16 h_dest[ETH_ALEN / 2];
54 __be16 h_source[ETH_ALEN / 2];
55 __be16 h_proto;
56} SFE_IPV6_UNALIGNED_STRUCT;
57
Xiaoping Fane1963d42015-08-25 17:06:19 -070058#define SFE_IPV6_DSCP_MASK 0xf03f
59#define SFE_IPV6_DSCP_SHIFT 2
60
Xiaoping Fan978b3772015-05-27 14:15:18 -070061/*
62 * An IPv6 header, but with an optional "packed" attribute to
63 * help with performance on some platforms (see the definition of
64 * SFE_IPV6_UNALIGNED_STRUCT)
65 */
66struct sfe_ipv6_ip_hdr {
67#if defined(__LITTLE_ENDIAN_BITFIELD)
68 __u8 priority:4,
69 version:4;
70#elif defined(__BIG_ENDIAN_BITFIELD)
71 __u8 version:4,
72 priority:4;
73#else
74#error "Please fix <asm/byteorder.h>"
75#endif
76 __u8 flow_lbl[3];
77 __be16 payload_len;
78 __u8 nexthdr;
79 __u8 hop_limit;
80 struct sfe_ipv6_addr saddr;
81 struct sfe_ipv6_addr daddr;
82
83 /*
84 * The extension header start here.
85 */
86} SFE_IPV6_UNALIGNED_STRUCT;
87
88#define SFE_IPV6_EXT_HDR_HOP 0
89#define SFE_IPV6_EXT_HDR_ROUTING 43
90#define SFE_IPV6_EXT_HDR_FRAG 44
91#define SFE_IPV6_EXT_HDR_ESP 50
92#define SFE_IPV6_EXT_HDR_AH 51
93#define SFE_IPV6_EXT_HDR_NONE 59
94#define SFE_IPV6_EXT_HDR_DST 60
95#define SFE_IPV6_EXT_HDR_MH 135
96
97/*
98 * fragmentation header
99 */
100
101struct sfe_ipv6_frag_hdr {
102 __u8 nexthdr;
103 __u8 reserved;
104 __be16 frag_off;
105 __be32 identification;
106};
107
108#define SFE_IPV6_FRAG_OFFSET 0xfff8
109
110/*
111 * generic IPv6 extension header
112 */
113struct sfe_ipv6_ext_hdr {
114 __u8 next_hdr;
115 __u8 hdr_len;
116 __u8 padding[6];
117} SFE_IPV6_UNALIGNED_STRUCT;
118
119/*
120 * A UDP header, but with an optional "packed" attribute to
121 * help with performance on some platforms (see the definition of
122 * SFE_IPV6_UNALIGNED_STRUCT)
123 */
124struct sfe_ipv6_udp_hdr {
125 __be16 source;
126 __be16 dest;
127 __be16 len;
128 __sum16 check;
129} SFE_IPV6_UNALIGNED_STRUCT;
130
131/*
132 * A TCP header, but with an optional "packed" attribute to
133 * help with performance on some platforms (see the definition of
134 * SFE_IPV6_UNALIGNED_STRUCT)
135 */
136struct sfe_ipv6_tcp_hdr {
137 __be16 source;
138 __be16 dest;
139 __be32 seq;
140 __be32 ack_seq;
141#if defined(__LITTLE_ENDIAN_BITFIELD)
142 __u16 res1:4,
143 doff:4,
144 fin:1,
145 syn:1,
146 rst:1,
147 psh:1,
148 ack:1,
149 urg:1,
150 ece:1,
151 cwr:1;
152#elif defined(__BIG_ENDIAN_BITFIELD)
153 __u16 doff:4,
154 res1:4,
155 cwr:1,
156 ece:1,
157 urg:1,
158 ack:1,
159 psh:1,
160 rst:1,
161 syn:1,
162 fin:1;
163#else
164#error "Adjust your <asm/byteorder.h> defines"
165#endif
166 __be16 window;
167 __sum16 check;
168 __be16 urg_ptr;
169} SFE_IPV6_UNALIGNED_STRUCT;
170
171/*
172 * Specifies the lower bound on ACK numbers carried in the TCP header
173 */
174#define SFE_IPV6_TCP_MAX_ACK_WINDOW 65520
175
176/*
177 * IPv6 TCP connection match additional data.
178 */
179struct sfe_ipv6_tcp_connection_match {
180 uint8_t win_scale; /* Window scale */
181 uint32_t max_win; /* Maximum window size seen */
182 uint32_t end; /* Sequence number of the next byte to send (seq + segment length) */
183 uint32_t max_end; /* Sequence number of the last byte to ack */
184};
185
186/*
187 * Bit flags for IPv6 connection matching entry.
188 */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700189#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC (1<<0)
Xiaoping Fan978b3772015-05-27 14:15:18 -0700190 /* Perform source translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700191#define SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST (1<<1)
Xiaoping Fan978b3772015-05-27 14:15:18 -0700192 /* Perform destination translation */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700193#define SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK (1<<2)
Xiaoping Fan978b3772015-05-27 14:15:18 -0700194 /* Ignore TCP sequence numbers */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700195#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR (1<<3)
Xiaoping Fan978b3772015-05-27 14:15:18 -0700196 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700197#define SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR (1<<4)
Xiaoping Fan978b3772015-05-27 14:15:18 -0700198 /* Fast Ethernet header write */
Xiaoping Fane1963d42015-08-25 17:06:19 -0700199#define SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK (1<<5)
200 /* remark priority of SKB */
201#define SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK (1<<6)
202 /* remark DSCP of packet */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700203
204/*
205 * IPv6 connection matching structure.
206 */
207struct sfe_ipv6_connection_match {
208 /*
209 * References to other objects.
210 */
211 struct sfe_ipv6_connection_match *next;
212 /* Next connection match entry in a list */
213 struct sfe_ipv6_connection_match *prev;
214 /* Previous connection match entry in a list */
215 struct sfe_ipv6_connection *connection;
216 /* Pointer to our connection */
217 struct sfe_ipv6_connection_match *counter_match;
218 /* Pointer to the connection match in the "counter" direction to this one */
219 struct sfe_ipv6_connection_match *active_next;
220 /* Pointer to the next connection in the active list */
221 struct sfe_ipv6_connection_match *active_prev;
222 /* Pointer to the previous connection in the active list */
223 bool active; /* Flag to indicate if we're on the active list */
224
225 /*
226 * Characteristics that identify flows that match this rule.
227 */
228 struct net_device *match_dev; /* Network device */
229 uint8_t match_protocol; /* Protocol */
230 struct sfe_ipv6_addr match_src_ip[1]; /* Source IP address */
231 struct sfe_ipv6_addr match_dest_ip[1]; /* Destination IP address */
232 __be16 match_src_port; /* Source port/connection ident */
233 __be16 match_dest_port; /* Destination port/connection ident */
234
235 /*
236 * Control the operations of the match.
237 */
238 uint32_t flags; /* Bit flags */
239#ifdef CONFIG_NF_FLOW_COOKIE
240 uint32_t flow_cookie; /* used flow cookie, for debug */
241#endif
Zhi Chen8748eb32015-06-18 12:58:48 -0700242#ifdef CONFIG_XFRM
243 uint32_t flow_accel; /* The flow accelerated or not */
244#endif
Xiaoping Fan978b3772015-05-27 14:15:18 -0700245
246 /*
247 * Connection state that we track once we match.
248 */
249 union { /* Protocol-specific state */
250 struct sfe_ipv6_tcp_connection_match tcp;
251 } protocol_state;
252 uint32_t rx_packet_count; /* Number of packets RX'd */
253 uint32_t rx_byte_count; /* Number of bytes RX'd */
254
255 /*
256 * Packet translation information.
257 */
258 struct sfe_ipv6_addr xlate_src_ip[1]; /* Address after source translation */
259 __be16 xlate_src_port; /* Port/connection ident after source translation */
260 uint16_t xlate_src_csum_adjustment;
261 /* Transport layer checksum adjustment after source translation */
262 struct sfe_ipv6_addr xlate_dest_ip[1]; /* Address after destination translation */
263 __be16 xlate_dest_port; /* Port/connection ident after destination translation */
264 uint16_t xlate_dest_csum_adjustment;
265 /* Transport layer checksum adjustment after destination translation */
266
267 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -0700268 * QoS information
269 */
270 uint32_t priority;
271 uint32_t dscp;
272
273 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -0700274 * Packet transmit information.
275 */
276 struct net_device *xmit_dev; /* Network device on which to transmit */
277 unsigned short int xmit_dev_mtu;
278 /* Interface MTU */
279 uint16_t xmit_dest_mac[ETH_ALEN / 2];
280 /* Destination MAC address to use when forwarding */
281 uint16_t xmit_src_mac[ETH_ALEN / 2];
282 /* Source MAC address to use when forwarding */
283
284 /*
285 * Summary stats.
286 */
287 uint64_t rx_packet_count64; /* Number of packets RX'd */
288 uint64_t rx_byte_count64; /* Number of bytes RX'd */
289};
290
291/*
292 * Per-connection data structure.
293 */
294struct sfe_ipv6_connection {
295 struct sfe_ipv6_connection *next;
296 /* Pointer to the next entry in a hash chain */
297 struct sfe_ipv6_connection *prev;
298 /* Pointer to the previous entry in a hash chain */
299 int protocol; /* IP protocol number */
300 struct sfe_ipv6_addr src_ip[1]; /* Source IP address */
301 struct sfe_ipv6_addr src_ip_xlate[1]; /* NAT-translated source IP address */
302 struct sfe_ipv6_addr dest_ip[1]; /* Destination IP address */
303 struct sfe_ipv6_addr dest_ip_xlate[1]; /* NAT-translated destination IP address */
304 __be16 src_port; /* Source port */
305 __be16 src_port_xlate; /* NAT-translated source port */
306 __be16 dest_port; /* Destination port */
307 __be16 dest_port_xlate; /* NAT-translated destination port */
308 struct sfe_ipv6_connection_match *original_match;
309 /* Original direction matching structure */
310 struct net_device *original_dev;
311 /* Original direction source device */
312 struct sfe_ipv6_connection_match *reply_match;
313 /* Reply direction matching structure */
314 struct net_device *reply_dev; /* Reply direction source device */
315 uint64_t last_sync_jiffies; /* Jiffies count for the last sync */
316 struct sfe_ipv6_connection *all_connections_next;
317 /* Pointer to the next entry in the list of all connections */
318 struct sfe_ipv6_connection *all_connections_prev;
319 /* Pointer to the previous entry in the list of all connections */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700320 uint32_t mark; /* mark for outgoing packet */
Xiaoping Fan34586472015-07-03 02:20:35 -0700321 uint32_t debug_read_seq; /* sequence number for debug dump */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700322};
323
324/*
325 * IPv6 connections and hash table size information.
326 */
327#define SFE_IPV6_CONNECTION_HASH_SHIFT 12
328#define SFE_IPV6_CONNECTION_HASH_SIZE (1 << SFE_IPV6_CONNECTION_HASH_SHIFT)
329#define SFE_IPV6_CONNECTION_HASH_MASK (SFE_IPV6_CONNECTION_HASH_SIZE - 1)
330
331#ifdef CONFIG_NF_FLOW_COOKIE
332#define SFE_FLOW_COOKIE_SIZE 2048
333#define SFE_FLOW_COOKIE_MASK 0x7ff
334
335struct sfe_ipv6_flow_cookie_entry {
336 struct sfe_ipv6_connection_match *match;
337 unsigned long last_clean_time;
338};
339#endif
340
341enum sfe_ipv6_exception_events {
342 SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE,
343 SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION,
344 SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
345 SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL,
346 SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION,
347 SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE,
348 SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS,
349 SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS,
350 SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT,
351 SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL,
352 SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION,
353 SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS,
354 SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE,
355 SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS,
356 SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK,
357 SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS,
358 SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE,
359 SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE,
360 SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE,
361 SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE,
362 SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE,
363 SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE,
364 SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6,
365 SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_IP_OPTIONS_INCOMPLETE,
366 SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UDP_HEADER_INCOMPLETE,
367 SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_TCP_HEADER_INCOMPLETE,
368 SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL,
369 SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION,
370 SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION,
371 SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE,
372 SFE_IPV6_EXCEPTION_EVENT_BAD_TOTAL_LENGTH,
373 SFE_IPV6_EXCEPTION_EVENT_NON_V6,
374 SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT,
375 SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE,
376 SFE_IPV6_EXCEPTION_EVENT_IP_OPTIONS_INCOMPLETE,
377 SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL,
378 SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL,
379 SFE_IPV6_EXCEPTION_EVENT_LAST
380};
381
382static char *sfe_ipv6_exception_events_string[SFE_IPV6_EXCEPTION_EVENT_LAST] = {
383 "UDP_HEADER_INCOMPLETE",
384 "UDP_NO_CONNECTION",
385 "UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
386 "UDP_SMALL_TTL",
387 "UDP_NEEDS_FRAGMENTATION",
388 "TCP_HEADER_INCOMPLETE",
389 "TCP_NO_CONNECTION_SLOW_FLAGS",
390 "TCP_NO_CONNECTION_FAST_FLAGS",
391 "TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT",
392 "TCP_SMALL_TTL",
393 "TCP_NEEDS_FRAGMENTATION",
394 "TCP_FLAGS",
395 "TCP_SEQ_EXCEEDS_RIGHT_EDGE",
396 "TCP_SMALL_DATA_OFFS",
397 "TCP_BAD_SACK",
398 "TCP_BIG_DATA_OFFS",
399 "TCP_SEQ_BEFORE_LEFT_EDGE",
400 "TCP_ACK_EXCEEDS_RIGHT_EDGE",
401 "TCP_ACK_BEFORE_LEFT_EDGE",
402 "ICMP_HEADER_INCOMPLETE",
403 "ICMP_UNHANDLED_TYPE",
404 "ICMP_IPV6_HEADER_INCOMPLETE",
405 "ICMP_IPV6_NON_V6",
406 "ICMP_IPV6_IP_OPTIONS_INCOMPLETE",
407 "ICMP_IPV6_UDP_HEADER_INCOMPLETE",
408 "ICMP_IPV6_TCP_HEADER_INCOMPLETE",
409 "ICMP_IPV6_UNHANDLED_PROTOCOL",
410 "ICMP_NO_CONNECTION",
411 "ICMP_FLUSHED_CONNECTION",
412 "HEADER_INCOMPLETE",
413 "BAD_TOTAL_LENGTH",
414 "NON_V6",
415 "NON_INITIAL_FRAGMENT",
416 "DATAGRAM_INCOMPLETE",
417 "IP_OPTIONS_INCOMPLETE",
418 "UNHANDLED_PROTOCOL",
419 "FLOW_COOKIE_ADD_FAIL"
420};
421
422/*
423 * Per-module structure.
424 */
425struct sfe_ipv6 {
426 spinlock_t lock; /* Lock for SMP correctness */
427 struct sfe_ipv6_connection_match *active_head;
428 /* Head of the list of recently active connections */
429 struct sfe_ipv6_connection_match *active_tail;
430 /* Tail of the list of recently active connections */
431 struct sfe_ipv6_connection *all_connections_head;
432 /* Head of the list of all connections */
433 struct sfe_ipv6_connection *all_connections_tail;
434 /* Tail of the list of all connections */
435 unsigned int num_connections; /* Number of connections */
436 struct timer_list timer; /* Timer used for periodic sync ops */
437 sfe_sync_rule_callback_t __rcu sync_rule_callback;
438 /* Callback function registered by a connection manager for stats syncing */
439 struct sfe_ipv6_connection *conn_hash[SFE_IPV6_CONNECTION_HASH_SIZE];
440 /* Connection hash table */
441 struct sfe_ipv6_connection_match *conn_match_hash[SFE_IPV6_CONNECTION_HASH_SIZE];
442 /* Connection match hash table */
443#ifdef CONFIG_NF_FLOW_COOKIE
444 struct sfe_ipv6_flow_cookie_entry sfe_flow_cookie_table[SFE_FLOW_COOKIE_SIZE];
445 /* flow cookie table*/
446 sfe_ipv6_flow_cookie_set_func_t flow_cookie_set_func;
447 /* function used to configure flow cookie in hardware*/
Xiaoping Fan640faf42015-08-28 15:50:55 -0700448 int flow_cookie_enable;
449 /* Enable/disable flow cookie at runtime */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700450#endif
451
452 /*
453 * Statistics.
454 */
455 uint32_t connection_create_requests;
456 /* Number of IPv6 connection create requests */
457 uint32_t connection_create_collisions;
458 /* Number of IPv6 connection create requests that collided with existing hash table entries */
459 uint32_t connection_destroy_requests;
460 /* Number of IPv6 connection destroy requests */
461 uint32_t connection_destroy_misses;
462 /* Number of IPv6 connection destroy requests that missed our hash table */
463 uint32_t connection_match_hash_hits;
464 /* Number of IPv6 connection match hash hits */
465 uint32_t connection_match_hash_reorders;
466 /* Number of IPv6 connection match hash reorders */
467 uint32_t connection_flushes; /* Number of IPv6 connection flushes */
468 uint32_t packets_forwarded; /* Number of IPv6 packets forwarded */
469 uint32_t packets_not_forwarded; /* Number of IPv6 packets not forwarded */
470 uint32_t exception_events[SFE_IPV6_EXCEPTION_EVENT_LAST];
471
472 /*
473 * Summary tatistics.
474 */
475 uint64_t connection_create_requests64;
476 /* Number of IPv6 connection create requests */
477 uint64_t connection_create_collisions64;
478 /* Number of IPv6 connection create requests that collided with existing hash table entries */
479 uint64_t connection_destroy_requests64;
480 /* Number of IPv6 connection destroy requests */
481 uint64_t connection_destroy_misses64;
482 /* Number of IPv6 connection destroy requests that missed our hash table */
483 uint64_t connection_match_hash_hits64;
484 /* Number of IPv6 connection match hash hits */
485 uint64_t connection_match_hash_reorders64;
486 /* Number of IPv6 connection match hash reorders */
487 uint64_t connection_flushes64; /* Number of IPv6 connection flushes */
488 uint64_t packets_forwarded64; /* Number of IPv6 packets forwarded */
489 uint64_t packets_not_forwarded64;
490 /* Number of IPv6 packets not forwarded */
491 uint64_t exception_events64[SFE_IPV6_EXCEPTION_EVENT_LAST];
492
493 /*
494 * Control state.
495 */
496 struct kobject *sys_sfe_ipv6; /* sysfs linkage */
497 int debug_dev; /* Major number of the debug char device */
Xiaoping Fan34586472015-07-03 02:20:35 -0700498 uint32_t debug_read_seq; /* sequence number for debug dump */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700499};
500
501/*
502 * Enumeration of the XML output.
503 */
504enum sfe_ipv6_debug_xml_states {
505 SFE_IPV6_DEBUG_XML_STATE_START,
506 SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_START,
507 SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_CONNECTION,
508 SFE_IPV6_DEBUG_XML_STATE_CONNECTIONS_END,
509 SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_START,
510 SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_EXCEPTION,
511 SFE_IPV6_DEBUG_XML_STATE_EXCEPTIONS_END,
512 SFE_IPV6_DEBUG_XML_STATE_STATS,
513 SFE_IPV6_DEBUG_XML_STATE_END,
514 SFE_IPV6_DEBUG_XML_STATE_DONE
515};
516
517/*
518 * XML write state.
519 */
520struct sfe_ipv6_debug_xml_write_state {
521 enum sfe_ipv6_debug_xml_states state;
522 /* XML output file state machine state */
Xiaoping Fan978b3772015-05-27 14:15:18 -0700523 int iter_exception; /* Next exception iterator */
524};
525
526typedef bool (*sfe_ipv6_debug_xml_write_method_t)(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
527 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws);
528
529struct sfe_ipv6 __si6;
530
531/*
532 * sfe_ipv6_get_debug_dev()
533 */
534static ssize_t sfe_ipv6_get_debug_dev(struct device *dev, struct device_attribute *attr, char *buf);
535
536/*
537 * sysfs attributes.
538 */
539static const struct device_attribute sfe_ipv6_debug_dev_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -0800540 __ATTR(debug_dev, S_IWUSR | S_IRUGO, sfe_ipv6_get_debug_dev, NULL);
Xiaoping Fan978b3772015-05-27 14:15:18 -0700541
542/*
543 * sfe_ipv6_addr_equal()
544 * compare ipv6 address
545 *
546 * return: 1, equal; 0, no equal
547 */
548static inline int sfe_ipv6_addr_equal(struct sfe_ipv6_addr *a,
549 struct sfe_ipv6_addr *b)
550{
551 return a->addr[0] == b->addr[0] &&
552 a->addr[1] == b->addr[1] &&
553 a->addr[2] == b->addr[2] &&
554 a->addr[3] == b->addr[3];
555}
556
557/*
558 * sfe_ipv6_is_ext_hdr()
559 * check if we recognize ipv6 extension header
560 */
561static inline bool sfe_ipv6_is_ext_hdr(uint8_t hdr)
562{
563 return (hdr == SFE_IPV6_EXT_HDR_HOP) ||
564 (hdr == SFE_IPV6_EXT_HDR_ROUTING) ||
565 (hdr == SFE_IPV6_EXT_HDR_FRAG) ||
566 (hdr == SFE_IPV6_EXT_HDR_AH) ||
567 (hdr == SFE_IPV6_EXT_HDR_DST) ||
568 (hdr == SFE_IPV6_EXT_HDR_MH);
569}
570
571/*
Xiaoping Fane1963d42015-08-25 17:06:19 -0700572 * sfe_ipv6_change_dsfield()
573 * change dscp field in IPv6 packet
574 */
575static inline void sfe_ipv6_change_dsfield(struct sfe_ipv6_ip_hdr *iph, uint8_t dscp)
576{
577 __be16 *p = (__be16 *)iph;
578
579 *p = ((*p & htons(SFE_IPV6_DSCP_MASK)) | htons((u16)dscp << 4));
580}
581
582/*
Xiaoping Fan978b3772015-05-27 14:15:18 -0700583 * sfe_ipv6_get_connection_match_hash()
584 * Generate the hash used in connection match lookups.
585 */
586static inline unsigned int sfe_ipv6_get_connection_match_hash(struct net_device *dev, uint8_t protocol,
587 struct sfe_ipv6_addr *src_ip, __be16 src_port,
588 struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
589{
590 uint32_t idx, hash = 0;
591 size_t dev_addr = (size_t)dev;
592
593 for (idx = 0; idx < 4; idx++) {
594 hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx];
595 }
596 hash = ((uint32_t)dev_addr) ^ hash ^ protocol ^ ntohs(src_port ^ dest_port);
597 return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK;
598}
599
600/*
601 * sfe_ipv6_find_connection_match()
602 * Get the IPv6 flow match info that corresponds to a particular 5-tuple.
603 *
604 * On entry we must be holding the lock that protects the hash table.
605 */
606static struct sfe_ipv6_connection_match *
607sfe_ipv6_find_connection_match(struct sfe_ipv6 *si, struct net_device *dev, uint8_t protocol,
608 struct sfe_ipv6_addr *src_ip, __be16 src_port,
609 struct sfe_ipv6_addr *dest_ip, __be16 dest_port) __attribute__((always_inline));
610static struct sfe_ipv6_connection_match *
611sfe_ipv6_find_connection_match(struct sfe_ipv6 *si, struct net_device *dev, uint8_t protocol,
612 struct sfe_ipv6_addr *src_ip, __be16 src_port,
613 struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
614{
615 struct sfe_ipv6_connection_match *cm;
616 struct sfe_ipv6_connection_match *head;
617 unsigned int conn_match_idx;
618
619 conn_match_idx = sfe_ipv6_get_connection_match_hash(dev, protocol, src_ip, src_port, dest_ip, dest_port);
620 cm = si->conn_match_hash[conn_match_idx];
621
622 /*
623 * If we don't have anything in this chain then bale.
624 */
625 if (unlikely(!cm)) {
626 return cm;
627 }
628
629 /*
630 * Hopefully the first entry is the one we want.
631 */
632 if (likely(cm->match_src_port == src_port)
633 && likely(cm->match_dest_port == dest_port)
634 && likely(sfe_ipv6_addr_equal(cm->match_src_ip, src_ip))
635 && likely(sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip))
636 && likely(cm->match_protocol == protocol)
637 && likely(cm->match_dev == dev)) {
638 si->connection_match_hash_hits++;
639 return cm;
640 }
641
642 /*
643 * We may or may not have a matching entry but if we do then we want to
644 * move that entry to the top of the hash chain when we get to it. We
645 * presume that this will be reused again very quickly.
646 */
647 head = cm;
648 do {
649 cm = cm->next;
650 } while (cm && (cm->match_src_port != src_port
651 || cm->match_dest_port != dest_port
652 || !sfe_ipv6_addr_equal(cm->match_src_ip, src_ip)
653 || !sfe_ipv6_addr_equal(cm->match_dest_ip, dest_ip)
654 || cm->match_protocol != protocol
655 || cm->match_dev != dev));
656
657 /*
658 * Not found then we're done.
659 */
660 if (unlikely(!cm)) {
661 return cm;
662 }
663
664 /*
665 * We found a match so move it.
666 */
667 if (cm->next) {
668 cm->next->prev = cm->prev;
669 }
670 cm->prev->next = cm->next;
671 cm->prev = NULL;
672 cm->next = head;
673 head->prev = cm;
674 si->conn_match_hash[conn_match_idx] = cm;
675 si->connection_match_hash_reorders++;
676
677 return cm;
678}
679
680/*
681 * sfe_ipv6_connection_match_update_summary_stats()
682 * Update the summary stats for a connection match entry.
683 */
684static inline void sfe_ipv6_connection_match_update_summary_stats(struct sfe_ipv6_connection_match *cm)
685{
686 cm->rx_packet_count64 += cm->rx_packet_count;
687 cm->rx_packet_count = 0;
688 cm->rx_byte_count64 += cm->rx_byte_count;
689 cm->rx_byte_count = 0;
690}
691
692/*
693 * sfe_ipv6_connection_match_compute_translations()
694 * Compute port and address translations for a connection match entry.
695 */
696static void sfe_ipv6_connection_match_compute_translations(struct sfe_ipv6_connection_match *cm)
697{
698 uint32_t diff[9];
699 uint32_t *idx_32;
700 uint16_t *idx_16;
701
702 /*
703 * Before we insert the entry look to see if this is tagged as doing address
704 * translations. If it is then work out the adjustment that we need to apply
705 * to the transport checksum.
706 */
707 if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC) {
708 uint32_t adj = 0;
709 uint32_t carry = 0;
710
711 /*
712 * Precompute an incremental checksum adjustment so we can
713 * edit packets in this stream very quickly. The algorithm is from RFC1624.
714 */
715 idx_32 = diff;
716 *(idx_32++) = cm->match_src_ip->addr[0];
717 *(idx_32++) = cm->match_src_ip->addr[1];
718 *(idx_32++) = cm->match_src_ip->addr[2];
719 *(idx_32++) = cm->match_src_ip->addr[3];
720
721 idx_16 = (uint16_t *)idx_32;
722 *(idx_16++) = cm->match_src_port;
723 *(idx_16++) = ~cm->xlate_src_port;
724 idx_32 = (uint32_t *)idx_16;
725
726 *(idx_32++) = ~cm->xlate_src_ip->addr[0];
727 *(idx_32++) = ~cm->xlate_src_ip->addr[1];
728 *(idx_32++) = ~cm->xlate_src_ip->addr[2];
729 *(idx_32++) = ~cm->xlate_src_ip->addr[3];
730
731 /*
732 * When we compute this fold it down to a 16-bit offset
733 * as that way we can avoid having to do a double
734 * folding of the twos-complement result because the
735 * addition of 2 16-bit values cannot cause a double
736 * wrap-around!
737 */
738 for (idx_32 = diff; idx_32 < diff + 9; idx_32++) {
739 uint32_t w = *idx_32;
740 adj += carry;
741 adj += w;
742 carry = (w > adj);
743 }
744 adj += carry;
745 adj = (adj & 0xffff) + (adj >> 16);
746 adj = (adj & 0xffff) + (adj >> 16);
747 cm->xlate_src_csum_adjustment = (uint16_t)adj;
748 }
749
750 if (cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST) {
751 uint32_t adj = 0;
752 uint32_t carry = 0;
753
754 /*
755 * Precompute an incremental checksum adjustment so we can
756 * edit packets in this stream very quickly. The algorithm is from RFC1624.
757 */
758 idx_32 = diff;
759 *(idx_32++) = cm->match_dest_ip->addr[0];
760 *(idx_32++) = cm->match_dest_ip->addr[1];
761 *(idx_32++) = cm->match_dest_ip->addr[2];
762 *(idx_32++) = cm->match_dest_ip->addr[3];
763
764 idx_16 = (uint16_t *)idx_32;
765 *(idx_16++) = cm->match_dest_port;
766 *(idx_16++) = ~cm->xlate_dest_port;
767 idx_32 = (uint32_t *)idx_16;
768
769 *(idx_32++) = ~cm->xlate_dest_ip->addr[0];
770 *(idx_32++) = ~cm->xlate_dest_ip->addr[1];
771 *(idx_32++) = ~cm->xlate_dest_ip->addr[2];
772 *(idx_32++) = ~cm->xlate_dest_ip->addr[3];
773
774 /*
775 * When we compute this fold it down to a 16-bit offset
776 * as that way we can avoid having to do a double
777 * folding of the twos-complement result because the
778 * addition of 2 16-bit values cannot cause a double
779 * wrap-around!
780 */
781 for (idx_32 = diff; idx_32 < diff + 9; idx_32++) {
782 uint32_t w = *idx_32;
783 adj += carry;
784 adj += w;
785 carry = (w > adj);
786 }
787 adj += carry;
788 adj = (adj & 0xffff) + (adj >> 16);
789 adj = (adj & 0xffff) + (adj >> 16);
790 cm->xlate_dest_csum_adjustment = (uint16_t)adj;
791 }
792}
793
794/*
795 * sfe_ipv6_update_summary_stats()
796 * Update the summary stats.
797 */
798static void sfe_ipv6_update_summary_stats(struct sfe_ipv6 *si)
799{
800 int i;
801
802 si->connection_create_requests64 += si->connection_create_requests;
803 si->connection_create_requests = 0;
804 si->connection_create_collisions64 += si->connection_create_collisions;
805 si->connection_create_collisions = 0;
806 si->connection_destroy_requests64 += si->connection_destroy_requests;
807 si->connection_destroy_requests = 0;
808 si->connection_destroy_misses64 += si->connection_destroy_misses;
809 si->connection_destroy_misses = 0;
810 si->connection_match_hash_hits64 += si->connection_match_hash_hits;
811 si->connection_match_hash_hits = 0;
812 si->connection_match_hash_reorders64 += si->connection_match_hash_reorders;
813 si->connection_match_hash_reorders = 0;
814 si->connection_flushes64 += si->connection_flushes;
815 si->connection_flushes = 0;
816 si->packets_forwarded64 += si->packets_forwarded;
817 si->packets_forwarded = 0;
818 si->packets_not_forwarded64 += si->packets_not_forwarded;
819 si->packets_not_forwarded = 0;
820
821 for (i = 0; i < SFE_IPV6_EXCEPTION_EVENT_LAST; i++) {
822 si->exception_events64[i] += si->exception_events[i];
823 si->exception_events[i] = 0;
824 }
825}
826
827/*
828 * sfe_ipv6_insert_connection_match()
829 * Insert a connection match into the hash.
830 *
831 * On entry we must be holding the lock that protects the hash table.
832 */
833static inline void sfe_ipv6_insert_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm)
834{
835 struct sfe_ipv6_connection_match **hash_head;
836 struct sfe_ipv6_connection_match *prev_head;
837 unsigned int conn_match_idx
838 = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol,
839 cm->match_src_ip, cm->match_src_port,
840 cm->match_dest_ip, cm->match_dest_port);
841 hash_head = &si->conn_match_hash[conn_match_idx];
842 prev_head = *hash_head;
843 cm->prev = NULL;
844 if (prev_head) {
845 prev_head->prev = cm;
846 }
847
848 cm->next = prev_head;
849 *hash_head = cm;
850
851#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700852 if (!si->flow_cookie_enable || !(cm->flags & (SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC | SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)))
Xiaoping Fan978b3772015-05-27 14:15:18 -0700853 return;
854
855 /*
856 * Configure hardware to put a flow cookie in packet of this flow,
857 * then we can accelerate the lookup process when we received this packet.
858 */
859 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
860 struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
861
862 if ((NULL == entry->match) && time_is_before_jiffies(entry->last_clean_time + HZ)) {
863 sfe_ipv6_flow_cookie_set_func_t func;
864
865 rcu_read_lock();
866 func = rcu_dereference(si->flow_cookie_set_func);
867 if (func) {
868 if (!func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port,
869 cm->match_dest_ip->addr, cm->match_dest_port, conn_match_idx)) {
870 entry->match = cm;
871 cm->flow_cookie = conn_match_idx;
872 } else {
873 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_FLOW_COOKIE_ADD_FAIL]++;
874 }
875 }
876 rcu_read_unlock();
877
878 break;
879 }
880 }
881#endif
Xiaoping Fan978b3772015-05-27 14:15:18 -0700882}
883
884/*
885 * sfe_ipv6_remove_connection_match()
886 * Remove a connection match object from the hash.
887 *
888 * On entry we must be holding the lock that protects the hash table.
889 */
890static inline void sfe_ipv6_remove_connection_match(struct sfe_ipv6 *si, struct sfe_ipv6_connection_match *cm)
891{
892#ifdef CONFIG_NF_FLOW_COOKIE
Xiaoping Fan640faf42015-08-28 15:50:55 -0700893 if (si->flow_cookie_enable) {
894 /*
895 * Tell hardware that we no longer need a flow cookie in packet of this flow
896 */
897 unsigned int conn_match_idx;
Xiaoping Fan978b3772015-05-27 14:15:18 -0700898
Xiaoping Fan640faf42015-08-28 15:50:55 -0700899 for (conn_match_idx = 1; conn_match_idx < SFE_FLOW_COOKIE_SIZE; conn_match_idx++) {
900 struct sfe_ipv6_flow_cookie_entry *entry = &si->sfe_flow_cookie_table[conn_match_idx];
Xiaoping Fan978b3772015-05-27 14:15:18 -0700901
Xiaoping Fan640faf42015-08-28 15:50:55 -0700902 if (cm == entry->match) {
903 sfe_ipv6_flow_cookie_set_func_t func;
Xiaoping Fan978b3772015-05-27 14:15:18 -0700904
Xiaoping Fan640faf42015-08-28 15:50:55 -0700905 rcu_read_lock();
906 func = rcu_dereference(si->flow_cookie_set_func);
907 if (func) {
908 func(cm->match_protocol, cm->match_src_ip->addr, cm->match_src_port,
909 cm->match_dest_ip->addr, cm->match_dest_port, 0);
910 }
911 rcu_read_unlock();
912
913 cm->flow_cookie = 0;
914 entry->match = NULL;
915 entry->last_clean_time = jiffies;
916 break;
Xiaoping Fan978b3772015-05-27 14:15:18 -0700917 }
Xiaoping Fan978b3772015-05-27 14:15:18 -0700918 }
919 }
920#endif
921
922 /*
923 * Unlink the connection match entry from the hash.
924 */
925 if (cm->prev) {
926 cm->prev->next = cm->next;
927 } else {
928 unsigned int conn_match_idx
929 = sfe_ipv6_get_connection_match_hash(cm->match_dev, cm->match_protocol,
930 cm->match_src_ip, cm->match_src_port,
931 cm->match_dest_ip, cm->match_dest_port);
932 si->conn_match_hash[conn_match_idx] = cm->next;
933 }
934
935 if (cm->next) {
936 cm->next->prev = cm->prev;
937 }
938
939 /*
940 * If the connection match entry is in the active list remove it.
941 */
942 if (cm->active) {
943 if (likely(cm->active_prev)) {
944 cm->active_prev->active_next = cm->active_next;
945 } else {
946 si->active_head = cm->active_next;
947 }
948
949 if (likely(cm->active_next)) {
950 cm->active_next->active_prev = cm->active_prev;
951 } else {
952 si->active_tail = cm->active_prev;
953 }
954 }
955}
956
957/*
958 * sfe_ipv6_get_connection_hash()
959 * Generate the hash used in connection lookups.
960 */
961static inline unsigned int sfe_ipv6_get_connection_hash(uint8_t protocol, struct sfe_ipv6_addr *src_ip, __be16 src_port,
962 struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
963{
964 uint32_t idx, hash = 0;
965
966 for (idx = 0; idx < 4; idx++) {
967 hash ^= src_ip->addr[idx] ^ dest_ip->addr[idx];
968 }
969 hash = hash ^ protocol ^ ntohs(src_port ^ dest_port);
970 return ((hash >> SFE_IPV6_CONNECTION_HASH_SHIFT) ^ hash) & SFE_IPV6_CONNECTION_HASH_MASK;
971}
972
973/*
974 * sfe_ipv6_find_connection()
975 * Get the IPv6 connection info that corresponds to a particular 5-tuple.
976 *
977 * On entry we must be holding the lock that protects the hash table.
978 */
979static inline struct sfe_ipv6_connection *sfe_ipv6_find_connection(struct sfe_ipv6 *si, uint32_t protocol,
980 struct sfe_ipv6_addr *src_ip, __be16 src_port,
981 struct sfe_ipv6_addr *dest_ip, __be16 dest_port)
982{
983 struct sfe_ipv6_connection *c;
984 unsigned int conn_idx = sfe_ipv6_get_connection_hash(protocol, src_ip, src_port, dest_ip, dest_port);
985 c = si->conn_hash[conn_idx];
986
987 /*
988 * If we don't have anything in this chain then bale.
989 */
990 if (unlikely(!c)) {
991 return c;
992 }
993
994 /*
995 * Hopefully the first entry is the one we want.
996 */
997 if (likely(c->src_port == src_port)
998 && likely(c->dest_port == dest_port)
999 && likely(sfe_ipv6_addr_equal(c->src_ip, src_ip))
1000 && likely(sfe_ipv6_addr_equal(c->dest_ip, dest_ip))
1001 && likely(c->protocol == protocol)) {
1002 return c;
1003 }
1004
1005 /*
1006 * We may or may not have a matching entry but if we do then we want to
1007 * move that entry to the top of the hash chain when we get to it. We
1008 * presume that this will be reused again very quickly.
1009 */
1010 do {
1011 c = c->next;
1012 } while (c && (c->src_port != src_port
1013 || c->dest_port != dest_port
1014 || !sfe_ipv6_addr_equal(c->src_ip, src_ip)
1015 || !sfe_ipv6_addr_equal(c->dest_ip, dest_ip)
1016 || c->protocol != protocol));
1017
1018 /*
1019 * Will need connection entry for next create/destroy metadata,
1020 * So no need to re-order entry for these requests
1021 */
1022 return c;
1023}
1024
1025/*
1026 * sfe_ipv6_mark_rule()
1027 * Updates the mark for a current offloaded connection
1028 *
1029 * Will take hash lock upon entry
1030 */
1031void sfe_ipv6_mark_rule(struct sfe_connection_mark *mark)
1032{
1033 struct sfe_ipv6 *si = &__si6;
1034 struct sfe_ipv6_connection *c;
1035
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001036 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001037 c = sfe_ipv6_find_connection(si, mark->protocol,
1038 mark->src_ip.ip6, mark->src_port,
1039 mark->dest_ip.ip6, mark->dest_port);
1040 if (c) {
1041 DEBUG_TRACE("Matching connection found for mark, "
1042 "setting from %08x to %08x\n",
1043 c->mark, mark->mark);
1044 WARN_ON((0 != c->mark) && (0 == mark->mark));
1045 c->mark = mark->mark;
1046 }
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001047 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001048}
1049
1050/*
1051 * sfe_ipv6_insert_connection()
1052 * Insert a connection into the hash.
1053 *
1054 * On entry we must be holding the lock that protects the hash table.
1055 */
1056static void sfe_ipv6_insert_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c)
1057{
1058 struct sfe_ipv6_connection **hash_head;
1059 struct sfe_ipv6_connection *prev_head;
1060 unsigned int conn_idx;
1061
1062 /*
1063 * Insert entry into the connection hash.
1064 */
1065 conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1066 c->dest_ip, c->dest_port);
1067 hash_head = &si->conn_hash[conn_idx];
1068 prev_head = *hash_head;
1069 c->prev = NULL;
1070 if (prev_head) {
1071 prev_head->prev = c;
1072 }
1073
1074 c->next = prev_head;
1075 *hash_head = c;
1076
1077 /*
1078 * Insert entry into the "all connections" list.
1079 */
1080 if (si->all_connections_tail) {
1081 c->all_connections_prev = si->all_connections_tail;
1082 si->all_connections_tail->all_connections_next = c;
1083 } else {
1084 c->all_connections_prev = NULL;
1085 si->all_connections_head = c;
1086 }
1087
1088 si->all_connections_tail = c;
1089 c->all_connections_next = NULL;
1090 si->num_connections++;
1091
1092 /*
1093 * Insert the connection match objects too.
1094 */
1095 sfe_ipv6_insert_connection_match(si, c->original_match);
1096 sfe_ipv6_insert_connection_match(si, c->reply_match);
1097}
1098
1099/*
1100 * sfe_ipv6_remove_connection()
1101 * Remove a sfe_ipv6_connection object from the hash.
1102 *
1103 * On entry we must be holding the lock that protects the hash table.
1104 */
1105static void sfe_ipv6_remove_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c)
1106{
1107 /*
1108 * Remove the connection match objects.
1109 */
1110 sfe_ipv6_remove_connection_match(si, c->reply_match);
1111 sfe_ipv6_remove_connection_match(si, c->original_match);
1112
1113 /*
1114 * Unlink the connection.
1115 */
1116 if (c->prev) {
1117 c->prev->next = c->next;
1118 } else {
1119 unsigned int conn_idx = sfe_ipv6_get_connection_hash(c->protocol, c->src_ip, c->src_port,
1120 c->dest_ip, c->dest_port);
1121 si->conn_hash[conn_idx] = c->next;
1122 }
1123
1124 if (c->next) {
1125 c->next->prev = c->prev;
1126 }
Xiaoping Fan34586472015-07-03 02:20:35 -07001127
1128 /*
1129 * Unlink connection from all_connections list
1130 */
1131 if (c->all_connections_prev) {
1132 c->all_connections_prev->all_connections_next = c->all_connections_next;
1133 } else {
1134 si->all_connections_head = c->all_connections_next;
1135 }
1136
1137 if (c->all_connections_next) {
1138 c->all_connections_next->all_connections_prev = c->all_connections_prev;
1139 } else {
1140 si->all_connections_tail = c->all_connections_prev;
1141 }
1142
1143 si->num_connections--;
Xiaoping Fan978b3772015-05-27 14:15:18 -07001144}
1145
1146/*
1147 * sfe_ipv6_gen_sync_connection()
1148 * Sync a connection.
1149 *
1150 * On entry to this function we expect that the lock for the connection is either
1151 * already held or isn't required.
1152 */
1153static void sfe_ipv6_gen_sync_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c,
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001154 struct sfe_connection_sync *sis, sfe_sync_reason_t reason,
1155 uint64_t now_jiffies)
Xiaoping Fan978b3772015-05-27 14:15:18 -07001156{
1157 struct sfe_ipv6_connection_match *original_cm;
1158 struct sfe_ipv6_connection_match *reply_cm;
1159
1160 /*
1161 * Fill in the update message.
1162 */
1163 sis->protocol = c->protocol;
1164 sis->src_ip.ip6[0] = c->src_ip[0];
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001165 sis->src_ip_xlate.ip6[0] = c->src_ip_xlate[0];
Xiaoping Fan978b3772015-05-27 14:15:18 -07001166 sis->dest_ip.ip6[0] = c->dest_ip[0];
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001167 sis->dest_ip_xlate.ip6[0] = c->dest_ip_xlate[0];
Xiaoping Fan978b3772015-05-27 14:15:18 -07001168 sis->src_port = c->src_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001169 sis->src_port_xlate = c->src_port_xlate;
Xiaoping Fan978b3772015-05-27 14:15:18 -07001170 sis->dest_port = c->dest_port;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001171 sis->dest_port_xlate = c->dest_port_xlate;
Xiaoping Fan978b3772015-05-27 14:15:18 -07001172
1173 original_cm = c->original_match;
1174 reply_cm = c->reply_match;
1175 sis->src_td_max_window = original_cm->protocol_state.tcp.max_win;
1176 sis->src_td_end = original_cm->protocol_state.tcp.end;
1177 sis->src_td_max_end = original_cm->protocol_state.tcp.max_end;
1178 sis->dest_td_max_window = reply_cm->protocol_state.tcp.max_win;
1179 sis->dest_td_end = reply_cm->protocol_state.tcp.end;
1180 sis->dest_td_max_end = reply_cm->protocol_state.tcp.max_end;
1181
1182 sis->src_new_packet_count = original_cm->rx_packet_count;
1183 sis->src_new_byte_count = original_cm->rx_byte_count;
1184 sis->dest_new_packet_count = reply_cm->rx_packet_count;
1185 sis->dest_new_byte_count = reply_cm->rx_byte_count;
1186
1187 sfe_ipv6_connection_match_update_summary_stats(original_cm);
1188 sfe_ipv6_connection_match_update_summary_stats(reply_cm);
1189
1190 sis->src_dev = original_cm->match_dev;
1191 sis->src_packet_count = original_cm->rx_packet_count64;
1192 sis->src_byte_count = original_cm->rx_byte_count64;
1193
1194 sis->dest_dev = reply_cm->match_dev;
1195 sis->dest_packet_count = reply_cm->rx_packet_count64;
1196 sis->dest_byte_count = reply_cm->rx_byte_count64;
1197
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001198 sis->reason = reason;
1199
Xiaoping Fan978b3772015-05-27 14:15:18 -07001200 /*
1201 * Get the time increment since our last sync.
1202 */
1203 sis->delta_jiffies = now_jiffies - c->last_sync_jiffies;
1204 c->last_sync_jiffies = now_jiffies;
1205}
1206
1207/*
Xiaoping Fan978b3772015-05-27 14:15:18 -07001208 * sfe_ipv6_flush_connection()
1209 * Flush a connection and free all associated resources.
1210 *
1211 * We need to be called with bottom halves disabled locally as we need to acquire
1212 * the connection hash lock and release it again. In general we're actually called
1213 * from within a BH and so we're fine, but we're also called when connections are
1214 * torn down.
1215 */
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001216static void sfe_ipv6_flush_connection(struct sfe_ipv6 *si, struct sfe_ipv6_connection *c, sfe_sync_reason_t reason)
Xiaoping Fan978b3772015-05-27 14:15:18 -07001217{
1218 struct sfe_connection_sync sis;
1219 uint64_t now_jiffies;
Xiaoping Fan978b3772015-05-27 14:15:18 -07001220 sfe_sync_rule_callback_t sync_rule_callback;
1221
1222 rcu_read_lock();
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001223 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001224 si->connection_flushes++;
Xiaoping Fan978b3772015-05-27 14:15:18 -07001225 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001226 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001227
1228 if (sync_rule_callback) {
1229 /*
1230 * Generate a sync message and then sync.
1231 */
1232 now_jiffies = get_jiffies_64();
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001233 sfe_ipv6_gen_sync_connection(si, c, &sis, reason, now_jiffies);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001234 sync_rule_callback(&sis);
1235 }
1236
1237 rcu_read_unlock();
1238
1239 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -07001240 * Release our hold of the source and dest devices and free the memory
1241 * for our connection objects.
1242 */
1243 dev_put(c->original_dev);
1244 dev_put(c->reply_dev);
1245 kfree(c->original_match);
1246 kfree(c->reply_match);
1247 kfree(c);
1248}
1249
1250/*
1251 * sfe_ipv6_recv_udp()
1252 * Handle UDP packet receives and forwarding.
1253 */
1254static int sfe_ipv6_recv_udp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
1255 unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
1256{
1257 struct sfe_ipv6_udp_hdr *udph;
1258 struct sfe_ipv6_addr *src_ip;
1259 struct sfe_ipv6_addr *dest_ip;
1260 __be16 src_port;
1261 __be16 dest_port;
1262 struct sfe_ipv6_connection_match *cm;
1263 struct net_device *xmit_dev;
1264
1265 /*
1266 * Is our packet too short to contain a valid UDP header?
1267 */
1268 if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_udp_hdr) + ihl))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001269 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001270 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_HEADER_INCOMPLETE]++;
1271 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001272 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001273
1274 DEBUG_TRACE("packet too short for UDP header\n");
1275 return 0;
1276 }
1277
1278 /*
1279 * Read the IP address and port information. Read the IP header data first
1280 * because we've almost certainly got that in the cache. We may not yet have
1281 * the UDP header cached though so allow more time for any prefetching.
1282 */
1283 src_ip = &iph->saddr;
1284 dest_ip = &iph->daddr;
1285
1286 udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl);
1287 src_port = udph->source;
1288 dest_port = udph->dest;
1289
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001290 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001291
1292 /*
1293 * Look for a connection match.
1294 */
1295#ifdef CONFIG_NF_FLOW_COOKIE
1296 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1297 if (unlikely(!cm)) {
1298 cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1299 }
1300#else
1301 cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_UDP, src_ip, src_port, dest_ip, dest_port);
1302#endif
1303 if (unlikely(!cm)) {
1304 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NO_CONNECTION]++;
1305 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001306 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001307
1308 DEBUG_TRACE("no connection found\n");
1309 return 0;
1310 }
1311
1312 /*
1313 * If our packet has beern marked as "flush on find" we can't actually
1314 * forward it in the fast path, but now that we've found an associated
1315 * connection we can flush that out before we process the packet.
1316 */
1317 if (unlikely(flush_on_find)) {
1318 struct sfe_ipv6_connection *c = cm->connection;
1319 sfe_ipv6_remove_connection(si, c);
1320 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1321 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001322 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001323
1324 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001325 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001326 return 0;
1327 }
1328
Zhi Chen8748eb32015-06-18 12:58:48 -07001329#ifdef CONFIG_XFRM
1330 /*
1331 * We can't accelerate the flow on this direction, just let it go
1332 * through the slow path.
1333 */
1334 if (unlikely(!cm->flow_accel)) {
1335 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001336 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001337 return 0;
1338 }
1339#endif
1340
Xiaoping Fan978b3772015-05-27 14:15:18 -07001341 /*
1342 * Does our hop_limit allow forwarding?
1343 */
1344 if (unlikely(iph->hop_limit < 2)) {
1345 struct sfe_ipv6_connection *c = cm->connection;
1346 sfe_ipv6_remove_connection(si, c);
1347 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_SMALL_TTL]++;
1348 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001349 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001350
1351 DEBUG_TRACE("hop_limit too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001352 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001353 return 0;
1354 }
1355
1356 /*
1357 * If our packet is larger than the MTU of the transmit interface then
1358 * we can't forward it easily.
1359 */
1360 if (unlikely(len > cm->xmit_dev_mtu)) {
1361 struct sfe_ipv6_connection *c = cm->connection;
1362 sfe_ipv6_remove_connection(si, c);
1363 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UDP_NEEDS_FRAGMENTATION]++;
1364 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001365 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001366
1367 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001368 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001369 return 0;
1370 }
1371
1372 /*
1373 * From this point on we're good to modify the packet.
1374 */
1375
1376 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001377 * Update DSCP
1378 */
1379 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1380 sfe_ipv6_change_dsfield(iph, cm->dscp);
1381 }
1382
1383 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -07001384 * Decrement our hop_limit.
1385 */
1386 iph->hop_limit -= 1;
1387
1388 /*
1389 * Do we have to perform translations of the source address/port?
1390 */
1391 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1392 uint16_t udp_csum;
1393
1394 iph->saddr = cm->xlate_src_ip[0];
1395 udph->source = cm->xlate_src_port;
1396
1397 /*
1398 * Do we have a non-zero UDP checksum? If we do then we need
1399 * to update it.
1400 */
1401 udp_csum = udph->check;
1402 if (likely(udp_csum)) {
1403 uint32_t sum = udp_csum + cm->xlate_src_csum_adjustment;
1404 sum = (sum & 0xffff) + (sum >> 16);
1405 udph->check = (uint16_t)sum;
1406 }
1407 }
1408
1409 /*
1410 * Do we have to perform translations of the destination address/port?
1411 */
1412 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1413 uint16_t udp_csum;
1414
1415 iph->daddr = cm->xlate_dest_ip[0];
1416 udph->dest = cm->xlate_dest_port;
1417
1418 /*
1419 * Do we have a non-zero UDP checksum? If we do then we need
1420 * to update it.
1421 */
1422 udp_csum = udph->check;
1423 if (likely(udp_csum)) {
1424 uint32_t sum = udp_csum + cm->xlate_dest_csum_adjustment;
1425 sum = (sum & 0xffff) + (sum >> 16);
1426 udph->check = (uint16_t)sum;
1427 }
1428 }
1429
1430 /*
1431 * Update traffic stats.
1432 */
1433 cm->rx_packet_count++;
1434 cm->rx_byte_count += len;
1435
1436 /*
1437 * If we're not already on the active list then insert ourselves at the tail
1438 * of the current list.
1439 */
1440 if (unlikely(!cm->active)) {
1441 cm->active = true;
1442 cm->active_prev = si->active_tail;
1443 if (likely(si->active_tail)) {
1444 si->active_tail->active_next = cm;
1445 } else {
1446 si->active_head = cm;
1447 }
1448 si->active_tail = cm;
1449 }
1450
1451 xmit_dev = cm->xmit_dev;
1452 skb->dev = xmit_dev;
1453
1454 /*
1455 * Check to see if we need to write a header.
1456 */
1457 if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1458 if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001459 dev_hard_header(skb, xmit_dev, ETH_P_IPV6,
1460 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001461 } else {
1462 /*
1463 * For the simple case we write this really fast.
1464 */
1465 struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN);
1466 eth->h_proto = htons(ETH_P_IPV6);
1467 eth->h_dest[0] = cm->xmit_dest_mac[0];
1468 eth->h_dest[1] = cm->xmit_dest_mac[1];
1469 eth->h_dest[2] = cm->xmit_dest_mac[2];
1470 eth->h_source[0] = cm->xmit_src_mac[0];
1471 eth->h_source[1] = cm->xmit_src_mac[1];
1472 eth->h_source[2] = cm->xmit_src_mac[2];
1473 }
1474 }
1475
1476 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001477 * Update priority of skb.
1478 */
1479 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
1480 skb->priority = cm->priority;
1481 }
1482
1483 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -07001484 * Mark outgoing packet.
1485 */
1486 skb->mark = cm->connection->mark;
1487 if (skb->mark) {
1488 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
1489 }
1490
1491 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001492 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001493
1494 /*
1495 * We're going to check for GSO flags when we transmit the packet so
1496 * start fetching the necessary cache line now.
1497 */
1498 prefetch(skb_shinfo(skb));
1499
1500 /*
1501 * Mark that this packet has been fast forwarded.
1502 */
1503 skb->fast_forwarded = 1;
1504
1505 /*
1506 * Send the packet on its way.
1507 */
1508 dev_queue_xmit(skb);
1509
1510 return 1;
1511}
1512
1513/*
1514 * sfe_ipv6_process_tcp_option_sack()
1515 * Parse TCP SACK option and update ack according
1516 */
1517static bool sfe_ipv6_process_tcp_option_sack(const struct sfe_ipv6_tcp_hdr *th, const uint32_t data_offs,
1518 uint32_t *ack) __attribute__((always_inline));
1519static bool sfe_ipv6_process_tcp_option_sack(const struct sfe_ipv6_tcp_hdr *th, const uint32_t data_offs,
1520 uint32_t *ack)
1521{
1522 uint32_t length = sizeof(struct sfe_ipv6_tcp_hdr);
1523 uint8_t *ptr = (uint8_t *)th + length;
1524
1525 /*
1526 * If option is TIMESTAMP discard it.
1527 */
1528 if (likely(data_offs == length + TCPOLEN_TIMESTAMP + 1 + 1)
1529 && likely(ptr[0] == TCPOPT_NOP)
1530 && likely(ptr[1] == TCPOPT_NOP)
1531 && likely(ptr[2] == TCPOPT_TIMESTAMP)
1532 && likely(ptr[3] == TCPOLEN_TIMESTAMP)) {
1533 return true;
1534 }
1535
1536 /*
1537 * TCP options. Parse SACK option.
1538 */
1539 while (length < data_offs) {
1540 uint8_t size;
1541 uint8_t kind;
1542
1543 ptr = (uint8_t *)th + length;
1544 kind = *ptr;
1545
1546 /*
1547 * NOP, for padding
1548 * Not in the switch because to fast escape and to not calculate size
1549 */
1550 if (kind == TCPOPT_NOP) {
1551 length++;
1552 continue;
1553 }
1554
1555 if (kind == TCPOPT_SACK) {
1556 uint32_t sack = 0;
1557 uint8_t re = 1 + 1;
1558
1559 size = *(ptr + 1);
1560 if ((size < (1 + 1 + TCPOLEN_SACK_PERBLOCK))
1561 || ((size - (1 + 1)) % (TCPOLEN_SACK_PERBLOCK))
1562 || (size > (data_offs - length))) {
1563 return false;
1564 }
1565
1566 re += 4;
1567 while (re < size) {
1568 uint32_t sack_re;
1569 uint8_t *sptr = ptr + re;
1570 sack_re = (sptr[0] << 24) | (sptr[1] << 16) | (sptr[2] << 8) | sptr[3];
1571 if (sack_re > sack) {
1572 sack = sack_re;
1573 }
1574 re += TCPOLEN_SACK_PERBLOCK;
1575 }
1576 if (sack > *ack) {
1577 *ack = sack;
1578 }
1579 length += size;
1580 continue;
1581 }
1582 if (kind == TCPOPT_EOL) {
1583 return true;
1584 }
1585 size = *(ptr + 1);
1586 if (size < 2) {
1587 return false;
1588 }
1589 length += size;
1590 }
1591
1592 return true;
1593}
1594
1595/*
1596 * sfe_ipv6_recv_tcp()
1597 * Handle TCP packet receives and forwarding.
1598 */
1599static int sfe_ipv6_recv_tcp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
1600 unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl, bool flush_on_find)
1601{
1602 struct sfe_ipv6_tcp_hdr *tcph;
1603 struct sfe_ipv6_addr *src_ip;
1604 struct sfe_ipv6_addr *dest_ip;
1605 __be16 src_port;
1606 __be16 dest_port;
1607 struct sfe_ipv6_connection_match *cm;
1608 struct sfe_ipv6_connection_match *counter_cm;
1609 uint32_t flags;
1610 struct net_device *xmit_dev;
1611
1612 /*
1613 * Is our packet too short to contain a valid UDP header?
1614 */
1615 if (!pskb_may_pull(skb, (sizeof(struct sfe_ipv6_tcp_hdr) + ihl))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001616 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001617 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_HEADER_INCOMPLETE]++;
1618 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001619 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001620
1621 DEBUG_TRACE("packet too short for TCP header\n");
1622 return 0;
1623 }
1624
1625 /*
1626 * Read the IP address and port information. Read the IP header data first
1627 * because we've almost certainly got that in the cache. We may not yet have
1628 * the TCP header cached though so allow more time for any prefetching.
1629 */
1630 src_ip = &iph->saddr;
1631 dest_ip = &iph->daddr;
1632
1633 tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl);
1634 src_port = tcph->source;
1635 dest_port = tcph->dest;
1636 flags = tcp_flag_word(tcph);
1637
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001638 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001639
1640 /*
1641 * Look for a connection match.
1642 */
1643#ifdef CONFIG_NF_FLOW_COOKIE
1644 cm = si->sfe_flow_cookie_table[skb->flow_cookie & SFE_FLOW_COOKIE_MASK].match;
1645 if (unlikely(!cm)) {
1646 cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1647 }
1648#else
1649 cm = sfe_ipv6_find_connection_match(si, dev, IPPROTO_TCP, src_ip, src_port, dest_ip, dest_port);
1650#endif
1651 if (unlikely(!cm)) {
1652 /*
1653 * We didn't get a connection but as TCP is connection-oriented that
1654 * may be because this is a non-fast connection (not running established).
1655 * For diagnostic purposes we differentiate this here.
1656 */
1657 if (likely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) == TCP_FLAG_ACK)) {
1658 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_FAST_FLAGS]++;
1659 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001660 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001661
1662 DEBUG_TRACE("no connection found - fast flags\n");
1663 return 0;
1664 }
1665 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NO_CONNECTION_SLOW_FLAGS]++;
1666 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001667 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001668
1669 DEBUG_TRACE("no connection found - slow flags: 0x%x\n",
1670 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
1671 return 0;
1672 }
1673
1674 /*
1675 * If our packet has beern marked as "flush on find" we can't actually
1676 * forward it in the fast path, but now that we've found an associated
1677 * connection we can flush that out before we process the packet.
1678 */
1679 if (unlikely(flush_on_find)) {
1680 struct sfe_ipv6_connection *c = cm->connection;
1681 sfe_ipv6_remove_connection(si, c);
1682 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_IP_OPTIONS_OR_INITIAL_FRAGMENT]++;
1683 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001684 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001685
1686 DEBUG_TRACE("flush on find\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001687 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001688 return 0;
1689 }
1690
Zhi Chen8748eb32015-06-18 12:58:48 -07001691#ifdef CONFIG_XFRM
1692 /*
1693 * We can't accelerate the flow on this direction, just let it go
1694 * through the slow path.
1695 */
1696 if (unlikely(!cm->flow_accel)) {
1697 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001698 spin_unlock_bh(&si->lock);
Zhi Chen8748eb32015-06-18 12:58:48 -07001699 return 0;
1700 }
1701#endif
1702
Xiaoping Fan978b3772015-05-27 14:15:18 -07001703 /*
1704 * Does our hop_limit allow forwarding?
1705 */
1706 if (unlikely(iph->hop_limit < 2)) {
1707 struct sfe_ipv6_connection *c = cm->connection;
1708 sfe_ipv6_remove_connection(si, c);
1709 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_TTL]++;
1710 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001711 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001712
1713 DEBUG_TRACE("hop_limit too low\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001714 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001715 return 0;
1716 }
1717
1718 /*
1719 * If our packet is larger than the MTU of the transmit interface then
1720 * we can't forward it easily.
1721 */
1722 if (unlikely((len > cm->xmit_dev_mtu) && !skb_is_gso(skb))) {
1723 struct sfe_ipv6_connection *c = cm->connection;
1724 sfe_ipv6_remove_connection(si, c);
1725 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_NEEDS_FRAGMENTATION]++;
1726 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001727 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001728
1729 DEBUG_TRACE("larger than mtu\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001730 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001731 return 0;
1732 }
1733
1734 /*
1735 * Look at our TCP flags. Anything missing an ACK or that has RST, SYN or FIN
1736 * set is not a fast path packet.
1737 */
1738 if (unlikely((flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK)) != TCP_FLAG_ACK)) {
1739 struct sfe_ipv6_connection *c = cm->connection;
1740 sfe_ipv6_remove_connection(si, c);
1741 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_FLAGS]++;
1742 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001743 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001744
1745 DEBUG_TRACE("TCP flags: 0x%x are not fast\n",
1746 flags & (TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_FIN | TCP_FLAG_ACK));
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001747 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001748 return 0;
1749 }
1750
1751 counter_cm = cm->counter_match;
1752
1753 /*
1754 * Are we doing sequence number checking?
1755 */
1756 if (likely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK))) {
1757 uint32_t seq;
1758 uint32_t ack;
1759 uint32_t sack;
1760 uint32_t data_offs;
1761 uint32_t end;
1762 uint32_t left_edge;
1763 uint32_t scaled_win;
1764 uint32_t max_end;
1765
1766 /*
1767 * Is our sequence fully past the right hand edge of the window?
1768 */
1769 seq = ntohl(tcph->seq);
1770 if (unlikely((int32_t)(seq - (cm->protocol_state.tcp.max_end + 1)) > 0)) {
1771 struct sfe_ipv6_connection *c = cm->connection;
1772 sfe_ipv6_remove_connection(si, c);
1773 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_EXCEEDS_RIGHT_EDGE]++;
1774 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001775 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001776
1777 DEBUG_TRACE("seq: %u exceeds right edge: %u\n",
1778 seq, cm->protocol_state.tcp.max_end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001779 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001780 return 0;
1781 }
1782
1783 /*
1784 * Check that our TCP data offset isn't too short.
1785 */
1786 data_offs = tcph->doff << 2;
1787 if (unlikely(data_offs < sizeof(struct sfe_ipv6_tcp_hdr))) {
1788 struct sfe_ipv6_connection *c = cm->connection;
1789 sfe_ipv6_remove_connection(si, c);
1790 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SMALL_DATA_OFFS]++;
1791 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001792 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001793
1794 DEBUG_TRACE("TCP data offset: %u, too small\n", data_offs);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001795 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001796 return 0;
1797 }
1798
1799 /*
1800 * Update ACK according to any SACK option.
1801 */
1802 ack = ntohl(tcph->ack_seq);
1803 sack = ack;
1804 if (unlikely(!sfe_ipv6_process_tcp_option_sack(tcph, data_offs, &sack))) {
1805 struct sfe_ipv6_connection *c = cm->connection;
1806 sfe_ipv6_remove_connection(si, c);
1807 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BAD_SACK]++;
1808 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001809 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001810
1811 DEBUG_TRACE("TCP option SACK size is wrong\n");
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001812 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001813 return 0;
1814 }
1815
1816 /*
1817 * Check that our TCP data offset isn't past the end of the packet.
1818 */
1819 data_offs += sizeof(struct sfe_ipv6_ip_hdr);
1820 if (unlikely(len < data_offs)) {
1821 struct sfe_ipv6_connection *c = cm->connection;
1822 sfe_ipv6_remove_connection(si, c);
1823 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_BIG_DATA_OFFS]++;
1824 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001825 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001826
1827 DEBUG_TRACE("TCP data offset: %u, past end of packet: %u\n",
1828 data_offs, len);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001829 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001830 return 0;
1831 }
1832
1833 end = seq + len - data_offs;
1834
1835 /*
1836 * Is our sequence fully before the left hand edge of the window?
1837 */
1838 if (unlikely((int32_t)(end - (cm->protocol_state.tcp.end
1839 - counter_cm->protocol_state.tcp.max_win - 1)) < 0)) {
1840 struct sfe_ipv6_connection *c = cm->connection;
1841 sfe_ipv6_remove_connection(si, c);
1842 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_SEQ_BEFORE_LEFT_EDGE]++;
1843 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001844 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001845
1846 DEBUG_TRACE("seq: %u before left edge: %u\n",
1847 end, cm->protocol_state.tcp.end - counter_cm->protocol_state.tcp.max_win - 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001848 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001849 return 0;
1850 }
1851
1852 /*
1853 * Are we acking data that is to the right of what has been sent?
1854 */
1855 if (unlikely((int32_t)(sack - (counter_cm->protocol_state.tcp.end + 1)) > 0)) {
1856 struct sfe_ipv6_connection *c = cm->connection;
1857 sfe_ipv6_remove_connection(si, c);
1858 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_EXCEEDS_RIGHT_EDGE]++;
1859 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001860 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001861
1862 DEBUG_TRACE("ack: %u exceeds right edge: %u\n",
1863 sack, counter_cm->protocol_state.tcp.end + 1);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001864 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001865 return 0;
1866 }
1867
1868 /*
1869 * Is our ack too far before the left hand edge of the window?
1870 */
1871 left_edge = counter_cm->protocol_state.tcp.end
1872 - cm->protocol_state.tcp.max_win
1873 - SFE_IPV6_TCP_MAX_ACK_WINDOW
1874 - 1;
1875 if (unlikely((int32_t)(sack - left_edge) < 0)) {
1876 struct sfe_ipv6_connection *c = cm->connection;
1877 sfe_ipv6_remove_connection(si, c);
1878 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_TCP_ACK_BEFORE_LEFT_EDGE]++;
1879 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07001880 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001881
1882 DEBUG_TRACE("ack: %u before left edge: %u\n", sack, left_edge);
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07001883 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001884 return 0;
1885 }
1886
1887 /*
1888 * Have we just seen the largest window size yet for this connection? If yes
1889 * then we need to record the new value.
1890 */
1891 scaled_win = ntohs(tcph->window) << cm->protocol_state.tcp.win_scale;
1892 scaled_win += (sack - ack);
1893 if (unlikely(cm->protocol_state.tcp.max_win < scaled_win)) {
1894 cm->protocol_state.tcp.max_win = scaled_win;
1895 }
1896
1897 /*
1898 * If our sequence and/or ack numbers have advanced then record the new state.
1899 */
1900 if (likely((int32_t)(end - cm->protocol_state.tcp.end) >= 0)) {
1901 cm->protocol_state.tcp.end = end;
1902 }
1903
1904 max_end = sack + scaled_win;
1905 if (likely((int32_t)(max_end - counter_cm->protocol_state.tcp.max_end) >= 0)) {
1906 counter_cm->protocol_state.tcp.max_end = max_end;
1907 }
1908 }
1909
1910 /*
1911 * From this point on we're good to modify the packet.
1912 */
1913
1914 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07001915 * Update DSCP
1916 */
1917 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK)) {
1918 sfe_ipv6_change_dsfield(iph, cm->dscp);
1919 }
1920
1921 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -07001922 * Decrement our hop_limit.
1923 */
1924 iph->hop_limit -= 1;
1925
1926 /*
1927 * Do we have to perform translations of the source address/port?
1928 */
1929 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC)) {
1930 uint16_t tcp_csum;
1931 uint32_t sum;
1932
1933 iph->saddr = cm->xlate_src_ip[0];
1934 tcph->source = cm->xlate_src_port;
1935
1936 /*
1937 * Do we have a non-zero UDP checksum? If we do then we need
1938 * to update it.
1939 */
1940 tcp_csum = tcph->check;
1941 sum = tcp_csum + cm->xlate_src_csum_adjustment;
1942 sum = (sum & 0xffff) + (sum >> 16);
1943 tcph->check = (uint16_t)sum;
1944 }
1945
1946 /*
1947 * Do we have to perform translations of the destination address/port?
1948 */
1949 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST)) {
1950 uint16_t tcp_csum;
1951 uint32_t sum;
1952
1953 iph->daddr = cm->xlate_dest_ip[0];
1954 tcph->dest = cm->xlate_dest_port;
1955
1956 /*
1957 * Do we have a non-zero UDP checksum? If we do then we need
1958 * to update it.
1959 */
1960 tcp_csum = tcph->check;
1961 sum = tcp_csum + cm->xlate_dest_csum_adjustment;
1962 sum = (sum & 0xffff) + (sum >> 16);
1963 tcph->check = (uint16_t)sum;
1964 }
1965
1966 /*
1967 * Update traffic stats.
1968 */
1969 cm->rx_packet_count++;
1970 cm->rx_byte_count += len;
1971
1972 /*
1973 * If we're not already on the active list then insert ourselves at the tail
1974 * of the current list.
1975 */
1976 if (unlikely(!cm->active)) {
1977 cm->active = true;
1978 cm->active_prev = si->active_tail;
1979 if (likely(si->active_tail)) {
1980 si->active_tail->active_next = cm;
1981 } else {
1982 si->active_head = cm;
1983 }
1984 si->active_tail = cm;
1985 }
1986
1987 xmit_dev = cm->xmit_dev;
1988 skb->dev = xmit_dev;
1989
1990 /*
1991 * Check to see if we need to write a header.
1992 */
1993 if (likely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR)) {
1994 if (unlikely(!(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR))) {
Xiaoping Fan2784e612015-06-25 17:57:41 -07001995 dev_hard_header(skb, xmit_dev, ETH_P_IPV6,
1996 cm->xmit_dest_mac, cm->xmit_src_mac, len);
Xiaoping Fan978b3772015-05-27 14:15:18 -07001997 } else {
1998 /*
1999 * For the simple case we write this really fast.
2000 */
2001 struct sfe_ipv6_eth_hdr *eth = (struct sfe_ipv6_eth_hdr *)__skb_push(skb, ETH_HLEN);
2002 eth->h_proto = htons(ETH_P_IPV6);
2003 eth->h_dest[0] = cm->xmit_dest_mac[0];
2004 eth->h_dest[1] = cm->xmit_dest_mac[1];
2005 eth->h_dest[2] = cm->xmit_dest_mac[2];
2006 eth->h_source[0] = cm->xmit_src_mac[0];
2007 eth->h_source[1] = cm->xmit_src_mac[1];
2008 eth->h_source[2] = cm->xmit_src_mac[2];
2009 }
2010 }
2011
2012 /*
Xiaoping Fane1963d42015-08-25 17:06:19 -07002013 * Update priority of skb.
2014 */
2015 if (unlikely(cm->flags & SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK)) {
2016 skb->priority = cm->priority;
2017 }
2018
2019 /*
Xiaoping Fan978b3772015-05-27 14:15:18 -07002020 * Mark outgoing packet
2021 */
2022 skb->mark = cm->connection->mark;
2023 if (skb->mark) {
2024 DEBUG_TRACE("SKB MARK is NON ZERO %x\n", skb->mark);
2025 }
2026
2027 si->packets_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002028 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002029
2030 /*
2031 * We're going to check for GSO flags when we transmit the packet so
2032 * start fetching the necessary cache line now.
2033 */
2034 prefetch(skb_shinfo(skb));
2035
2036 /*
2037 * Mark that this packet has been fast forwarded.
2038 */
2039 skb->fast_forwarded = 1;
2040
2041 /*
2042 * Send the packet on its way.
2043 */
2044 dev_queue_xmit(skb);
2045
2046 return 1;
2047}
2048
2049/*
2050 * sfe_ipv6_recv_icmp()
2051 * Handle ICMP packet receives.
2052 *
2053 * ICMP packets aren't handled as a "fast path" and always have us process them
2054 * through the default Linux stack. What we do need to do is look for any errors
2055 * about connections we are handling in the fast path. If we find any such
2056 * connections then we want to flush their state so that the ICMP error path
2057 * within Linux has all of the correct state should it need it.
2058 */
2059static int sfe_ipv6_recv_icmp(struct sfe_ipv6 *si, struct sk_buff *skb, struct net_device *dev,
2060 unsigned int len, struct sfe_ipv6_ip_hdr *iph, unsigned int ihl)
2061{
2062 struct icmp6hdr *icmph;
2063 struct sfe_ipv6_ip_hdr *icmp_iph;
2064 struct sfe_ipv6_udp_hdr *icmp_udph;
2065 struct sfe_ipv6_tcp_hdr *icmp_tcph;
2066 struct sfe_ipv6_addr *src_ip;
2067 struct sfe_ipv6_addr *dest_ip;
2068 __be16 src_port;
2069 __be16 dest_port;
2070 struct sfe_ipv6_connection_match *cm;
2071 struct sfe_ipv6_connection *c;
2072 uint8_t next_hdr;
2073
2074 /*
2075 * Is our packet too short to contain a valid UDP header?
2076 */
2077 len -= ihl;
2078 if (!pskb_may_pull(skb, ihl + sizeof(struct icmp6hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002079 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002080 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_HEADER_INCOMPLETE]++;
2081 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002082 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002083
2084 DEBUG_TRACE("packet too short for ICMP header\n");
2085 return 0;
2086 }
2087
2088 /*
2089 * We only handle "destination unreachable" and "time exceeded" messages.
2090 */
2091 icmph = (struct icmp6hdr *)(skb->data + ihl);
2092 if ((icmph->icmp6_type != ICMPV6_DEST_UNREACH)
2093 && (icmph->icmp6_type != ICMPV6_TIME_EXCEED)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002094 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002095 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_UNHANDLED_TYPE]++;
2096 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002097 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002098
2099 DEBUG_TRACE("unhandled ICMP type: 0x%x\n", icmph->icmp6_type);
2100 return 0;
2101 }
2102
2103 /*
2104 * Do we have the full embedded IP header?
2105 * We should have 8 bytes of next L4 header - that's enough to identify
2106 * the connection.
2107 */
2108 len -= sizeof(struct icmp6hdr);
2109 ihl += sizeof(struct icmp6hdr);
2110 if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ip_hdr) + sizeof(struct sfe_ipv6_ext_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002111 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002112 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_HEADER_INCOMPLETE]++;
2113 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002114 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002115
2116 DEBUG_TRACE("Embedded IP header not complete\n");
2117 return 0;
2118 }
2119
2120 /*
2121 * Is our embedded IP version wrong?
2122 */
2123 icmp_iph = (struct sfe_ipv6_ip_hdr *)(icmph + 1);
2124 if (unlikely(icmp_iph->version != 6)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002125 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002126 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_NON_V6]++;
2127 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002128 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002129
2130 DEBUG_TRACE("IP version: %u\n", icmp_iph->version);
2131 return 0;
2132 }
2133
2134 len -= sizeof(struct sfe_ipv6_ip_hdr);
2135 ihl += sizeof(struct sfe_ipv6_ip_hdr);
2136 next_hdr = icmp_iph->nexthdr;
2137 while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) {
2138 struct sfe_ipv6_ext_hdr *ext_hdr;
2139 unsigned int ext_hdr_len;
2140
2141 ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl);
2142 if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) {
2143 struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr;
2144 unsigned int frag_off = ntohs(frag_hdr->frag_off);
2145
2146 if (frag_off & SFE_IPV6_FRAG_OFFSET) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002147 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002148 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2149 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002150 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002151
2152 DEBUG_TRACE("non-initial fragment\n");
2153 return 0;
2154 }
2155 }
2156
2157 ext_hdr_len = ext_hdr->hdr_len;
2158 ext_hdr_len <<= 3;
2159 ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr);
2160 len -= ext_hdr_len;
2161 ihl += ext_hdr_len;
2162 /*
2163 * We should have 8 bytes of next header - that's enough to identify
2164 * the connection.
2165 */
2166 if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002167 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002168 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2169 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002170 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002171
2172 DEBUG_TRACE("extension header %d not completed\n", next_hdr);
2173 return 0;
2174 }
2175
2176 next_hdr = ext_hdr->next_hdr;
2177 }
2178
2179 /*
2180 * Handle the embedded transport layer header.
2181 */
2182 switch (next_hdr) {
2183 case IPPROTO_UDP:
2184 icmp_udph = (struct sfe_ipv6_udp_hdr *)(skb->data + ihl);
2185 src_port = icmp_udph->source;
2186 dest_port = icmp_udph->dest;
2187 break;
2188
2189 case IPPROTO_TCP:
2190 icmp_tcph = (struct sfe_ipv6_tcp_hdr *)(skb->data + ihl);
2191 src_port = icmp_tcph->source;
2192 dest_port = icmp_tcph->dest;
2193 break;
2194
2195 default:
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002196 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002197 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_IPV6_UNHANDLED_PROTOCOL]++;
2198 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002199 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002200
Ryan Sherlock47c5a702016-01-12 07:27:05 -06002201 DEBUG_TRACE("Unhandled embedded IP protocol: %u\n", next_hdr);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002202 return 0;
2203 }
2204
2205 src_ip = &icmp_iph->saddr;
2206 dest_ip = &icmp_iph->daddr;
2207
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002208 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002209
2210 /*
2211 * Look for a connection match. Note that we reverse the source and destination
2212 * here because our embedded message contains a packet that was sent in the
2213 * opposite direction to the one in which we just received it. It will have
2214 * been sent on the interface from which we received it though so that's still
2215 * ok to use.
2216 */
2217 cm = sfe_ipv6_find_connection_match(si, dev, icmp_iph->nexthdr, dest_ip, dest_port, src_ip, src_port);
2218 if (unlikely(!cm)) {
2219 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_NO_CONNECTION]++;
2220 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002221 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002222
2223 DEBUG_TRACE("no connection found\n");
2224 return 0;
2225 }
2226
2227 /*
2228 * We found a connection so now remove it from the connection list and flush
2229 * its state.
2230 */
2231 c = cm->connection;
2232 sfe_ipv6_remove_connection(si, c);
2233 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_ICMP_FLUSHED_CONNECTION]++;
2234 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002235 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002236
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002237 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_FLUSH);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002238 return 0;
2239}
2240
2241/*
2242 * sfe_ipv6_recv()
2243 * Handle packet receives and forwaring.
2244 *
2245 * Returns 1 if the packet is forwarded or 0 if it isn't.
2246 */
2247int sfe_ipv6_recv(struct net_device *dev, struct sk_buff *skb)
2248{
2249 struct sfe_ipv6 *si = &__si6;
2250 unsigned int len;
2251 unsigned int payload_len;
2252 unsigned int ihl = sizeof(struct sfe_ipv6_ip_hdr);
2253 bool flush_on_find = false;
2254 struct sfe_ipv6_ip_hdr *iph;
2255 uint8_t next_hdr;
2256
2257 /*
2258 * Check that we have space for an IP header and an uplayer header here.
2259 */
2260 len = skb->len;
2261 if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002262 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002263 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2264 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002265 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002266
2267 DEBUG_TRACE("len: %u is too short\n", len);
2268 return 0;
2269 }
2270
2271 /*
2272 * Is our IP version wrong?
2273 */
2274 iph = (struct sfe_ipv6_ip_hdr *)skb->data;
2275 if (unlikely(iph->version != 6)) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002276 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002277 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_V6]++;
2278 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002279 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002280
2281 DEBUG_TRACE("IP version: %u\n", iph->version);
2282 return 0;
2283 }
2284
2285 /*
2286 * Does our datagram fit inside the skb?
2287 */
2288 payload_len = ntohs(iph->payload_len);
2289 if (unlikely(payload_len > (len - ihl))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002290 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002291 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_DATAGRAM_INCOMPLETE]++;
2292 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002293 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002294
2295 DEBUG_TRACE("payload_len: %u, exceeds len: %u\n", payload_len, (len - sizeof(struct sfe_ipv6_ip_hdr)));
2296 return 0;
2297 }
2298
2299 next_hdr = iph->nexthdr;
2300 while (unlikely(sfe_ipv6_is_ext_hdr(next_hdr))) {
2301 struct sfe_ipv6_ext_hdr *ext_hdr;
2302 unsigned int ext_hdr_len;
2303
2304 ext_hdr = (struct sfe_ipv6_ext_hdr *)(skb->data + ihl);
2305 if (next_hdr == SFE_IPV6_EXT_HDR_FRAG) {
2306 struct sfe_ipv6_frag_hdr *frag_hdr = (struct sfe_ipv6_frag_hdr *)ext_hdr;
2307 unsigned int frag_off = ntohs(frag_hdr->frag_off);
2308
2309 if (frag_off & SFE_IPV6_FRAG_OFFSET) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002310 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002311 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_NON_INITIAL_FRAGMENT]++;
2312 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002313 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002314
2315 DEBUG_TRACE("non-initial fragment\n");
2316 return 0;
2317 }
2318 }
2319
2320 ext_hdr_len = ext_hdr->hdr_len;
2321 ext_hdr_len <<= 3;
2322 ext_hdr_len += sizeof(struct sfe_ipv6_ext_hdr);
2323 ihl += ext_hdr_len;
2324 if (!pskb_may_pull(skb, ihl + sizeof(struct sfe_ipv6_ext_hdr))) {
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002325 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002326 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_HEADER_INCOMPLETE]++;
2327 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002328 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002329
2330 DEBUG_TRACE("extension header %d not completed\n", next_hdr);
2331 return 0;
2332 }
2333
2334 flush_on_find = true;
2335 next_hdr = ext_hdr->next_hdr;
2336 }
2337
2338 if (IPPROTO_UDP == next_hdr) {
2339 return sfe_ipv6_recv_udp(si, skb, dev, len, iph, ihl, flush_on_find);
2340 }
2341
2342 if (IPPROTO_TCP == next_hdr) {
2343 return sfe_ipv6_recv_tcp(si, skb, dev, len, iph, ihl, flush_on_find);
2344 }
2345
2346 if (IPPROTO_ICMPV6 == next_hdr) {
2347 return sfe_ipv6_recv_icmp(si, skb, dev, len, iph, ihl);
2348 }
2349
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002350 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002351 si->exception_events[SFE_IPV6_EXCEPTION_EVENT_UNHANDLED_PROTOCOL]++;
2352 si->packets_not_forwarded++;
Xiaoping Fan3c423e32015-07-03 03:09:29 -07002353 spin_unlock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002354
2355 DEBUG_TRACE("not UDP, TCP or ICMP: %u\n", next_hdr);
2356 return 0;
2357}
2358
2359/*
2360 * sfe_ipv6_update_tcp_state()
2361 * update TCP window variables.
2362 */
2363static void
2364sfe_ipv6_update_tcp_state(struct sfe_ipv6_connection *c,
2365 struct sfe_connection_create *sic)
2366{
2367 struct sfe_ipv6_connection_match *orig_cm;
2368 struct sfe_ipv6_connection_match *repl_cm;
2369 struct sfe_ipv6_tcp_connection_match *orig_tcp;
2370 struct sfe_ipv6_tcp_connection_match *repl_tcp;
2371
2372 orig_cm = c->original_match;
2373 repl_cm = c->reply_match;
2374 orig_tcp = &orig_cm->protocol_state.tcp;
2375 repl_tcp = &repl_cm->protocol_state.tcp;
2376
2377 /* update orig */
2378 if (orig_tcp->max_win < sic->src_td_max_window) {
2379 orig_tcp->max_win = sic->src_td_max_window;
2380 }
2381 if ((int32_t)(orig_tcp->end - sic->src_td_end) < 0) {
2382 orig_tcp->end = sic->src_td_end;
2383 }
2384 if ((int32_t)(orig_tcp->max_end - sic->src_td_max_end) < 0) {
2385 orig_tcp->max_end = sic->src_td_max_end;
2386 }
2387
2388 /* update reply */
2389 if (repl_tcp->max_win < sic->dest_td_max_window) {
2390 repl_tcp->max_win = sic->dest_td_max_window;
2391 }
2392 if ((int32_t)(repl_tcp->end - sic->dest_td_end) < 0) {
2393 repl_tcp->end = sic->dest_td_end;
2394 }
2395 if ((int32_t)(repl_tcp->max_end - sic->dest_td_max_end) < 0) {
2396 repl_tcp->max_end = sic->dest_td_max_end;
2397 }
2398
2399 /* update match flags */
2400 orig_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2401 repl_cm->flags &= ~SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2402 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
2403 orig_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2404 repl_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2405 }
2406}
2407
2408/*
2409 * sfe_ipv6_update_protocol_state()
2410 * update protocol specified state machine.
2411 */
2412static void
2413sfe_ipv6_update_protocol_state(struct sfe_ipv6_connection *c,
2414 struct sfe_connection_create *sic)
2415{
2416 switch (sic->protocol) {
2417 case IPPROTO_TCP:
2418 sfe_ipv6_update_tcp_state(c, sic);
2419 break;
2420 }
2421}
2422
2423/*
2424 * sfe_ipv6_update_rule()
2425 * update forwarding rule after rule is created.
2426 */
2427void sfe_ipv6_update_rule(struct sfe_connection_create *sic)
2428{
2429 struct sfe_ipv6_connection *c;
2430 struct sfe_ipv6 *si = &__si6;
2431
2432 spin_lock_bh(&si->lock);
2433
2434 c = sfe_ipv6_find_connection(si,
2435 sic->protocol,
2436 sic->src_ip.ip6,
2437 sic->src_port,
2438 sic->dest_ip.ip6,
2439 sic->dest_port);
2440 if (c != NULL) {
2441 sfe_ipv6_update_protocol_state(c, sic);
2442 }
2443
2444 spin_unlock_bh(&si->lock);
2445}
2446
2447/*
2448 * sfe_ipv6_create_rule()
2449 * Create a forwarding rule.
2450 */
2451int sfe_ipv6_create_rule(struct sfe_connection_create *sic)
2452{
2453 struct sfe_ipv6 *si = &__si6;
2454 struct sfe_ipv6_connection *c;
2455 struct sfe_ipv6_connection_match *original_cm;
2456 struct sfe_ipv6_connection_match *reply_cm;
2457 struct net_device *dest_dev;
2458 struct net_device *src_dev;
2459
2460 dest_dev = sic->dest_dev;
2461 src_dev = sic->src_dev;
2462
2463 if (unlikely((dest_dev->reg_state != NETREG_REGISTERED) ||
2464 (src_dev->reg_state != NETREG_REGISTERED))) {
2465 return -EINVAL;
2466 }
2467
2468 spin_lock_bh(&si->lock);
2469 si->connection_create_requests++;
2470
2471 /*
2472 * Check to see if there is already a flow that matches the rule we're
2473 * trying to create. If there is then we can't create a new one.
2474 */
2475 c = sfe_ipv6_find_connection(si,
2476 sic->protocol,
2477 sic->src_ip.ip6,
2478 sic->src_port,
2479 sic->dest_ip.ip6,
2480 sic->dest_port);
2481 if (c != NULL) {
2482 si->connection_create_collisions++;
2483
2484 /*
2485 * If we already have the flow then it's likely that this
2486 * request to create the connection rule contains more
2487 * up-to-date information. Check and update accordingly.
2488 */
2489 sfe_ipv6_update_protocol_state(c, sic);
2490 spin_unlock_bh(&si->lock);
2491
2492 DEBUG_TRACE("connection already exists - mark: %08x, p: %d\n"
2493 " s: %s:%pM:%pI6:%u, d: %s:%pM:%pI6:%u\n",
2494 sic->mark, sic->protocol,
2495 sic->src_dev->name, sic->src_mac, sic->src_ip.ip6, ntohs(sic->src_port),
2496 sic->dest_dev->name, sic->dest_mac, sic->dest_ip.ip6, ntohs(sic->dest_port));
2497 return -EADDRINUSE;
2498 }
2499
2500 /*
2501 * Allocate the various connection tracking objects.
2502 */
2503 c = (struct sfe_ipv6_connection *)kmalloc(sizeof(struct sfe_ipv6_connection), GFP_ATOMIC);
2504 if (unlikely(!c)) {
2505 spin_unlock_bh(&si->lock);
2506 return -ENOMEM;
2507 }
2508
2509 original_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC);
2510 if (unlikely(!original_cm)) {
2511 spin_unlock_bh(&si->lock);
2512 kfree(c);
2513 return -ENOMEM;
2514 }
2515
2516 reply_cm = (struct sfe_ipv6_connection_match *)kmalloc(sizeof(struct sfe_ipv6_connection_match), GFP_ATOMIC);
2517 if (unlikely(!reply_cm)) {
2518 spin_unlock_bh(&si->lock);
2519 kfree(original_cm);
2520 kfree(c);
2521 return -ENOMEM;
2522 }
2523
2524 /*
2525 * Fill in the "original" direction connection matching object.
2526 * Note that the transmit MAC address is "dest_mac_xlate" because
2527 * we always know both ends of a connection by their translated
2528 * addresses and not their public addresses.
2529 */
2530 original_cm->match_dev = src_dev;
2531 original_cm->match_protocol = sic->protocol;
2532 original_cm->match_src_ip[0] = sic->src_ip.ip6[0];
2533 original_cm->match_src_port = sic->src_port;
2534 original_cm->match_dest_ip[0] = sic->dest_ip.ip6[0];
2535 original_cm->match_dest_port = sic->dest_port;
2536 original_cm->xlate_src_ip[0] = sic->src_ip_xlate.ip6[0];
2537 original_cm->xlate_src_port = sic->src_port_xlate;
2538 original_cm->xlate_dest_ip[0] = sic->dest_ip_xlate.ip6[0];
2539 original_cm->xlate_dest_port = sic->dest_port_xlate;
2540 original_cm->rx_packet_count = 0;
2541 original_cm->rx_packet_count64 = 0;
2542 original_cm->rx_byte_count = 0;
2543 original_cm->rx_byte_count64 = 0;
2544 original_cm->xmit_dev = dest_dev;
2545 original_cm->xmit_dev_mtu = sic->dest_mtu;
2546 memcpy(original_cm->xmit_src_mac, dest_dev->dev_addr, ETH_ALEN);
2547 memcpy(original_cm->xmit_dest_mac, sic->dest_mac_xlate, ETH_ALEN);
2548 original_cm->connection = c;
2549 original_cm->counter_match = reply_cm;
2550 original_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002551 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2552 original_cm->priority = sic->src_priority;
2553 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2554 }
2555 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2556 original_cm->dscp = sic->src_dscp << SFE_IPV6_DSCP_SHIFT;
2557 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2558 }
Xiaoping Fan978b3772015-05-27 14:15:18 -07002559#ifdef CONFIG_NF_FLOW_COOKIE
2560 original_cm->flow_cookie = 0;
2561#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002562#ifdef CONFIG_XFRM
2563 original_cm->flow_accel = sic->original_accel;
2564#endif
Xiaoping Fan978b3772015-05-27 14:15:18 -07002565 original_cm->active_next = NULL;
2566 original_cm->active_prev = NULL;
2567 original_cm->active = false;
2568
2569 /*
2570 * For PPP links we don't write an L2 header. For everything else we do.
2571 */
2572 if (!(dest_dev->flags & IFF_POINTOPOINT)) {
2573 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2574
2575 /*
2576 * If our dev writes Ethernet headers then we can write a really fast
2577 * version.
2578 */
2579 if (dest_dev->header_ops) {
2580 if (dest_dev->header_ops->create == eth_header) {
2581 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2582 }
2583 }
2584 }
2585
2586 /*
2587 * Fill in the "reply" direction connection matching object.
2588 */
2589 reply_cm->match_dev = dest_dev;
2590 reply_cm->match_protocol = sic->protocol;
2591 reply_cm->match_src_ip[0] = sic->dest_ip_xlate.ip6[0];
2592 reply_cm->match_src_port = sic->dest_port_xlate;
2593 reply_cm->match_dest_ip[0] = sic->src_ip_xlate.ip6[0];
2594 reply_cm->match_dest_port = sic->src_port_xlate;
2595 reply_cm->xlate_src_ip[0] = sic->dest_ip.ip6[0];
2596 reply_cm->xlate_src_port = sic->dest_port;
2597 reply_cm->xlate_dest_ip[0] = sic->src_ip.ip6[0];
2598 reply_cm->xlate_dest_port = sic->src_port;
2599 reply_cm->rx_packet_count = 0;
2600 reply_cm->rx_packet_count64 = 0;
2601 reply_cm->rx_byte_count = 0;
2602 reply_cm->rx_byte_count64 = 0;
2603 reply_cm->xmit_dev = src_dev;
2604 reply_cm->xmit_dev_mtu = sic->src_mtu;
2605 memcpy(reply_cm->xmit_src_mac, src_dev->dev_addr, ETH_ALEN);
2606 memcpy(reply_cm->xmit_dest_mac, sic->src_mac, ETH_ALEN);
2607 reply_cm->connection = c;
2608 reply_cm->counter_match = original_cm;
2609 reply_cm->flags = 0;
Xiaoping Fane1963d42015-08-25 17:06:19 -07002610 if (sic->flags & SFE_CREATE_FLAG_REMARK_PRIORITY) {
2611 reply_cm->priority = sic->dest_priority;
2612 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_PRIORITY_REMARK;
2613 }
2614 if (sic->flags & SFE_CREATE_FLAG_REMARK_DSCP) {
2615 reply_cm->dscp = sic->dest_dscp << SFE_IPV6_DSCP_SHIFT;
2616 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_DSCP_REMARK;
2617 }
Xiaoping Fan978b3772015-05-27 14:15:18 -07002618#ifdef CONFIG_NF_FLOW_COOKIE
2619 reply_cm->flow_cookie = 0;
2620#endif
Zhi Chen8748eb32015-06-18 12:58:48 -07002621#ifdef CONFIG_XFRM
2622 reply_cm->flow_accel = sic->reply_accel;
2623#endif
Xiaoping Fan978b3772015-05-27 14:15:18 -07002624 reply_cm->active_next = NULL;
2625 reply_cm->active_prev = NULL;
2626 reply_cm->active = false;
2627
2628 /*
2629 * For PPP links we don't write an L2 header. For everything else we do.
2630 */
2631 if (!(src_dev->flags & IFF_POINTOPOINT)) {
2632 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_L2_HDR;
2633
2634 /*
2635 * If our dev writes Ethernet headers then we can write a really fast
2636 * version.
2637 */
2638 if (src_dev->header_ops) {
2639 if (src_dev->header_ops->create == eth_header) {
2640 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_WRITE_FAST_ETH_HDR;
2641 }
2642 }
2643 }
2644
2645
2646 if (!sfe_ipv6_addr_equal(sic->dest_ip.ip6, sic->dest_ip_xlate.ip6) || sic->dest_port != sic->dest_port_xlate) {
2647 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST;
2648 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC;
2649 }
2650
2651 if (!sfe_ipv6_addr_equal(sic->src_ip.ip6, sic->src_ip_xlate.ip6) || sic->src_port != sic->src_port_xlate) {
2652 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_SRC;
2653 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_XLATE_DEST;
2654 }
2655
2656 c->protocol = sic->protocol;
2657 c->src_ip[0] = sic->src_ip.ip6[0];
2658 c->src_ip_xlate[0] = sic->src_ip_xlate.ip6[0];
2659 c->src_port = sic->src_port;
2660 c->src_port_xlate = sic->src_port_xlate;
2661 c->original_dev = src_dev;
2662 c->original_match = original_cm;
2663 c->dest_ip[0] = sic->dest_ip.ip6[0];
2664 c->dest_ip_xlate[0] = sic->dest_ip_xlate.ip6[0];
2665 c->dest_port = sic->dest_port;
2666 c->dest_port_xlate = sic->dest_port_xlate;
2667 c->reply_dev = dest_dev;
2668 c->reply_match = reply_cm;
2669 c->mark = sic->mark;
Xiaoping Fan34586472015-07-03 02:20:35 -07002670 c->debug_read_seq = 0;
Xiaoping Fan978b3772015-05-27 14:15:18 -07002671 c->last_sync_jiffies = get_jiffies_64();
Xiaoping Fan978b3772015-05-27 14:15:18 -07002672
2673 /*
2674 * Take hold of our source and dest devices for the duration of the connection.
2675 */
2676 dev_hold(c->original_dev);
2677 dev_hold(c->reply_dev);
2678
2679 /*
2680 * Initialize the protocol-specific information that we track.
2681 */
2682 switch (sic->protocol) {
2683 case IPPROTO_TCP:
2684 original_cm->protocol_state.tcp.win_scale = sic->src_td_window_scale;
2685 original_cm->protocol_state.tcp.max_win = sic->src_td_max_window ? sic->src_td_max_window : 1;
2686 original_cm->protocol_state.tcp.end = sic->src_td_end;
2687 original_cm->protocol_state.tcp.max_end = sic->src_td_max_end;
2688 reply_cm->protocol_state.tcp.win_scale = sic->dest_td_window_scale;
2689 reply_cm->protocol_state.tcp.max_win = sic->dest_td_max_window ? sic->dest_td_max_window : 1;
2690 reply_cm->protocol_state.tcp.end = sic->dest_td_end;
2691 reply_cm->protocol_state.tcp.max_end = sic->dest_td_max_end;
2692 if (sic->flags & SFE_CREATE_FLAG_NO_SEQ_CHECK) {
2693 original_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2694 reply_cm->flags |= SFE_IPV6_CONNECTION_MATCH_FLAG_NO_SEQ_CHECK;
2695 }
2696 break;
2697 }
2698
2699 sfe_ipv6_connection_match_compute_translations(original_cm);
2700 sfe_ipv6_connection_match_compute_translations(reply_cm);
2701 sfe_ipv6_insert_connection(si, c);
2702
2703 spin_unlock_bh(&si->lock);
2704
2705 /*
2706 * We have everything we need!
2707 */
2708 DEBUG_INFO("new connection - mark: %08x, p: %d\n"
2709 " s: %s:%pM(%pM):%pI6(%pI6):%u(%u)\n"
2710 " d: %s:%pM(%pM):%pI6(%pI6):%u(%u)\n",
2711 sic->mark, sic->protocol,
2712 sic->src_dev->name, sic->src_mac, sic->src_mac_xlate,
2713 sic->src_ip.ip6, sic->src_ip_xlate.ip6, ntohs(sic->src_port), ntohs(sic->src_port_xlate),
2714 dest_dev->name, sic->dest_mac, sic->dest_mac_xlate,
2715 sic->dest_ip.ip6, sic->dest_ip_xlate.ip6, ntohs(sic->dest_port), ntohs(sic->dest_port_xlate));
2716
2717 return 0;
2718}
2719
2720/*
2721 * sfe_ipv6_destroy_rule()
2722 * Destroy a forwarding rule.
2723 */
2724void sfe_ipv6_destroy_rule(struct sfe_connection_destroy *sid)
2725{
2726 struct sfe_ipv6 *si = &__si6;
2727 struct sfe_ipv6_connection *c;
2728
2729 spin_lock_bh(&si->lock);
2730 si->connection_destroy_requests++;
2731
2732 /*
2733 * Check to see if we have a flow that matches the rule we're trying
2734 * to destroy. If there isn't then we can't destroy it.
2735 */
2736 c = sfe_ipv6_find_connection(si, sid->protocol, sid->src_ip.ip6, sid->src_port,
2737 sid->dest_ip.ip6, sid->dest_port);
2738 if (!c) {
2739 si->connection_destroy_misses++;
2740 spin_unlock_bh(&si->lock);
2741
2742 DEBUG_TRACE("connection does not exist - p: %d, s: %pI6:%u, d: %pI6:%u\n",
2743 sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port),
2744 sid->dest_ip.ip6, ntohs(sid->dest_port));
2745 return;
2746 }
2747
2748 /*
2749 * Remove our connection details from the hash tables.
2750 */
2751 sfe_ipv6_remove_connection(si, c);
2752 spin_unlock_bh(&si->lock);
2753
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002754 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002755
2756 DEBUG_INFO("connection destroyed - p: %d, s: %pI6:%u, d: %pI6:%u\n",
2757 sid->protocol, sid->src_ip.ip6, ntohs(sid->src_port),
2758 sid->dest_ip.ip6, ntohs(sid->dest_port));
2759}
2760
2761/*
2762 * sfe_ipv6_register_sync_rule_callback()
2763 * Register a callback for rule synchronization.
2764 */
2765void sfe_ipv6_register_sync_rule_callback(sfe_sync_rule_callback_t sync_rule_callback)
2766{
2767 struct sfe_ipv6 *si = &__si6;
2768
2769 spin_lock_bh(&si->lock);
2770 rcu_assign_pointer(si->sync_rule_callback, sync_rule_callback);
2771 spin_unlock_bh(&si->lock);
2772}
2773
2774/*
2775 * sfe_ipv6_get_debug_dev()
2776 */
2777static ssize_t sfe_ipv6_get_debug_dev(struct device *dev,
2778 struct device_attribute *attr,
2779 char *buf)
2780{
2781 struct sfe_ipv6 *si = &__si6;
2782 ssize_t count;
2783 int num;
2784
2785 spin_lock_bh(&si->lock);
2786 num = si->debug_dev;
2787 spin_unlock_bh(&si->lock);
2788
2789 count = snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", num);
2790 return count;
2791}
2792
2793/*
2794 * sfe_ipv6_destroy_all_rules_for_dev()
2795 * Destroy all connections that match a particular device.
2796 *
2797 * If we pass dev as NULL then this destroys all connections.
2798 */
2799void sfe_ipv6_destroy_all_rules_for_dev(struct net_device *dev)
2800{
2801 struct sfe_ipv6 *si = &__si6;
2802 struct sfe_ipv6_connection *c;
Xiaoping Fan978b3772015-05-27 14:15:18 -07002803
Xiaoping Fan34586472015-07-03 02:20:35 -07002804another_round:
Xiaoping Fan978b3772015-05-27 14:15:18 -07002805 spin_lock_bh(&si->lock);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002806
Xiaoping Fan34586472015-07-03 02:20:35 -07002807 for (c = si->all_connections_head; c; c = c->all_connections_next) {
Xiaoping Fan978b3772015-05-27 14:15:18 -07002808 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07002809 * Does this connection relate to the device we are destroying?
Xiaoping Fan978b3772015-05-27 14:15:18 -07002810 */
2811 if (!dev
2812 || (dev == c->original_dev)
2813 || (dev == c->reply_dev)) {
Xiaoping Fan34586472015-07-03 02:20:35 -07002814 break;
Xiaoping Fan978b3772015-05-27 14:15:18 -07002815 }
Xiaoping Fan34586472015-07-03 02:20:35 -07002816 }
Xiaoping Fan978b3772015-05-27 14:15:18 -07002817
Xiaoping Fan34586472015-07-03 02:20:35 -07002818 if (c) {
2819 sfe_ipv6_remove_connection(si, c);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002820 }
2821
2822 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07002823
2824 if (c) {
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002825 sfe_ipv6_flush_connection(si, c, SFE_SYNC_REASON_DESTROY);
Xiaoping Fan34586472015-07-03 02:20:35 -07002826 goto another_round;
2827 }
Xiaoping Fan978b3772015-05-27 14:15:18 -07002828}
2829
2830/*
2831 * sfe_ipv6_periodic_sync()
2832 */
2833static void sfe_ipv6_periodic_sync(unsigned long arg)
2834{
2835 struct sfe_ipv6 *si = (struct sfe_ipv6 *)arg;
2836 uint64_t now_jiffies;
2837 int quota;
2838 sfe_sync_rule_callback_t sync_rule_callback;
2839
2840 now_jiffies = get_jiffies_64();
2841
2842 rcu_read_lock();
2843 sync_rule_callback = rcu_dereference(si->sync_rule_callback);
2844 if (!sync_rule_callback) {
2845 rcu_read_unlock();
2846 goto done;
2847 }
2848
2849 spin_lock_bh(&si->lock);
2850 sfe_ipv6_update_summary_stats(si);
2851
2852 /*
2853 * Get an estimate of the number of connections to parse in this sync.
2854 */
2855 quota = (si->num_connections + 63) / 64;
2856
2857 /*
2858 * Walk the "active" list and sync the connection state.
2859 */
2860 while (quota--) {
2861 struct sfe_ipv6_connection_match *cm;
2862 struct sfe_ipv6_connection_match *counter_cm;
2863 struct sfe_ipv6_connection *c;
2864 struct sfe_connection_sync sis;
2865
2866 cm = si->active_head;
2867 if (!cm) {
2868 break;
2869 }
2870
2871 /*
2872 * There's a possibility that our counter match is in the active list too.
2873 * If it is then remove it.
2874 */
2875 counter_cm = cm->counter_match;
2876 if (counter_cm->active) {
2877 counter_cm->active = false;
2878
2879 /*
2880 * We must have a connection preceding this counter match
2881 * because that's the one that got us to this point, so we don't have
2882 * to worry about removing the head of the list.
2883 */
2884 counter_cm->active_prev->active_next = counter_cm->active_next;
2885
2886 if (likely(counter_cm->active_next)) {
2887 counter_cm->active_next->active_prev = counter_cm->active_prev;
2888 } else {
2889 si->active_tail = counter_cm->active_prev;
2890 }
2891
2892 counter_cm->active_next = NULL;
2893 counter_cm->active_prev = NULL;
2894 }
2895
2896 /*
2897 * Now remove the head of the active scan list.
2898 */
2899 cm->active = false;
2900 si->active_head = cm->active_next;
2901 if (likely(cm->active_next)) {
2902 cm->active_next->active_prev = NULL;
2903 } else {
2904 si->active_tail = NULL;
2905 }
2906 cm->active_next = NULL;
2907
2908 /*
2909 * Sync the connection state.
2910 */
2911 c = cm->connection;
Xiaoping Fan99cb4c12015-08-21 19:07:32 -07002912 sfe_ipv6_gen_sync_connection(si, c, &sis, SFE_SYNC_REASON_STATS, now_jiffies);
Xiaoping Fan978b3772015-05-27 14:15:18 -07002913
2914 /*
2915 * We don't want to be holding the lock when we sync!
2916 */
2917 spin_unlock_bh(&si->lock);
2918 sync_rule_callback(&sis);
2919 spin_lock_bh(&si->lock);
2920 }
2921
2922 spin_unlock_bh(&si->lock);
2923 rcu_read_unlock();
2924
2925done:
2926 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
2927}
2928
2929/*
2930 * sfe_ipv6_debug_dev_read_start()
2931 * Generate part of the XML output.
2932 */
2933static bool sfe_ipv6_debug_dev_read_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
2934 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
2935{
2936 int bytes_read;
2937
Xiaoping Fan34586472015-07-03 02:20:35 -07002938 si->debug_read_seq++;
2939
Xiaoping Fan978b3772015-05-27 14:15:18 -07002940 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "<sfe_ipv6>\n");
2941 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2942 return false;
2943 }
2944
2945 *length -= bytes_read;
2946 *total_read += bytes_read;
2947
2948 ws->state++;
2949 return true;
2950}
2951
2952/*
2953 * sfe_ipv6_debug_dev_read_connections_start()
2954 * Generate part of the XML output.
2955 */
2956static bool sfe_ipv6_debug_dev_read_connections_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
2957 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
2958{
2959 int bytes_read;
2960
2961 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<connections>\n");
2962 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
2963 return false;
2964 }
2965
2966 *length -= bytes_read;
2967 *total_read += bytes_read;
2968
2969 ws->state++;
2970 return true;
2971}
2972
2973/*
2974 * sfe_ipv6_debug_dev_read_connections_connection()
2975 * Generate part of the XML output.
2976 */
2977static bool sfe_ipv6_debug_dev_read_connections_connection(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
2978 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
2979{
2980 struct sfe_ipv6_connection *c;
Xiaoping Fan978b3772015-05-27 14:15:18 -07002981 struct sfe_ipv6_connection_match *original_cm;
2982 struct sfe_ipv6_connection_match *reply_cm;
2983 int bytes_read;
2984 int protocol;
2985 struct net_device *src_dev;
2986 struct sfe_ipv6_addr src_ip;
2987 struct sfe_ipv6_addr src_ip_xlate;
2988 __be16 src_port;
2989 __be16 src_port_xlate;
2990 uint64_t src_rx_packets;
2991 uint64_t src_rx_bytes;
2992 struct net_device *dest_dev;
2993 struct sfe_ipv6_addr dest_ip;
2994 struct sfe_ipv6_addr dest_ip_xlate;
2995 __be16 dest_port;
2996 __be16 dest_port_xlate;
2997 uint64_t dest_rx_packets;
2998 uint64_t dest_rx_bytes;
2999 uint64_t last_sync_jiffies;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003000 uint32_t mark, src_priority, dest_priority, src_dscp, dest_dscp;
Xiaoping Fan978b3772015-05-27 14:15:18 -07003001#ifdef CONFIG_NF_FLOW_COOKIE
3002 int src_flow_cookie, dst_flow_cookie;
3003#endif
3004
3005 spin_lock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07003006
3007 for (c = si->all_connections_head; c; c = c->all_connections_next) {
3008 if (c->debug_read_seq < si->debug_read_seq) {
3009 c->debug_read_seq = si->debug_read_seq;
3010 break;
3011 }
3012 }
Xiaoping Fan978b3772015-05-27 14:15:18 -07003013
3014 /*
Xiaoping Fan34586472015-07-03 02:20:35 -07003015 * If there were no connections then move to the next state.
Xiaoping Fan978b3772015-05-27 14:15:18 -07003016 */
3017 if (!c) {
Xiaoping Fan978b3772015-05-27 14:15:18 -07003018 spin_unlock_bh(&si->lock);
Xiaoping Fan34586472015-07-03 02:20:35 -07003019 ws->state++;
Xiaoping Fan978b3772015-05-27 14:15:18 -07003020 return true;
3021 }
3022
3023 original_cm = c->original_match;
3024 reply_cm = c->reply_match;
3025
3026 protocol = c->protocol;
3027 src_dev = c->original_dev;
3028 src_ip = c->src_ip[0];
3029 src_ip_xlate = c->src_ip_xlate[0];
3030 src_port = c->src_port;
3031 src_port_xlate = c->src_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003032 src_priority = original_cm->priority;
3033 src_dscp = original_cm->dscp >> SFE_IPV6_DSCP_SHIFT;
Xiaoping Fan978b3772015-05-27 14:15:18 -07003034
3035 sfe_ipv6_connection_match_update_summary_stats(original_cm);
3036 sfe_ipv6_connection_match_update_summary_stats(reply_cm);
3037
3038 src_rx_packets = original_cm->rx_packet_count64;
3039 src_rx_bytes = original_cm->rx_byte_count64;
3040 dest_dev = c->reply_dev;
3041 dest_ip = c->dest_ip[0];
3042 dest_ip_xlate = c->dest_ip_xlate[0];
3043 dest_port = c->dest_port;
3044 dest_port_xlate = c->dest_port_xlate;
Xiaoping Fane1963d42015-08-25 17:06:19 -07003045 dest_priority = reply_cm->priority;
3046 dest_dscp = reply_cm->dscp >> SFE_IPV6_DSCP_SHIFT;
Xiaoping Fan978b3772015-05-27 14:15:18 -07003047 dest_rx_packets = reply_cm->rx_packet_count64;
3048 dest_rx_bytes = reply_cm->rx_byte_count64;
3049 last_sync_jiffies = get_jiffies_64() - c->last_sync_jiffies;
3050 mark = c->mark;
3051#ifdef CONFIG_NF_FLOW_COOKIE
3052 src_flow_cookie = original_cm->flow_cookie;
3053 dst_flow_cookie = reply_cm->flow_cookie;
3054#endif
3055 spin_unlock_bh(&si->lock);
3056
3057 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t\t<connection "
3058 "protocol=\"%u\" "
3059 "src_dev=\"%s\" "
3060 "src_ip=\"%pI6\" src_ip_xlate=\"%pI6\" "
3061 "src_port=\"%u\" src_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003062 "src_priority=\"%u\" src_dscp=\"%u\" "
Xiaoping Fan978b3772015-05-27 14:15:18 -07003063 "src_rx_pkts=\"%llu\" src_rx_bytes=\"%llu\" "
3064 "dest_dev=\"%s\" "
3065 "dest_ip=\"%pI6\" dest_ip_xlate=\"%pI6\" "
3066 "dest_port=\"%u\" dest_port_xlate=\"%u\" "
Xiaoping Fane1963d42015-08-25 17:06:19 -07003067 "dest_priority=\"%u\" dest_dscp=\"%u\" "
Xiaoping Fan978b3772015-05-27 14:15:18 -07003068 "dest_rx_pkts=\"%llu\" dest_rx_bytes=\"%llu\" "
3069#ifdef CONFIG_NF_FLOW_COOKIE
3070 "src_flow_cookie=\"%d\" dst_flow_cookie=\"%d\" "
3071#endif
3072 "last_sync=\"%llu\" "
3073 "mark=\"%08x\" />\n",
3074 protocol,
3075 src_dev->name,
3076 &src_ip, &src_ip_xlate,
3077 ntohs(src_port), ntohs(src_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003078 src_priority, src_dscp,
Xiaoping Fan978b3772015-05-27 14:15:18 -07003079 src_rx_packets, src_rx_bytes,
3080 dest_dev->name,
3081 &dest_ip, &dest_ip_xlate,
3082 ntohs(dest_port), ntohs(dest_port_xlate),
Xiaoping Fane1963d42015-08-25 17:06:19 -07003083 dest_priority, dest_dscp,
Xiaoping Fan978b3772015-05-27 14:15:18 -07003084 dest_rx_packets, dest_rx_bytes,
3085#ifdef CONFIG_NF_FLOW_COOKIE
3086 src_flow_cookie, dst_flow_cookie,
3087#endif
3088 last_sync_jiffies, mark);
3089
3090 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3091 return false;
3092 }
3093
3094 *length -= bytes_read;
3095 *total_read += bytes_read;
3096
Xiaoping Fan978b3772015-05-27 14:15:18 -07003097 return true;
3098}
3099
3100/*
3101 * sfe_ipv6_debug_dev_read_connections_end()
3102 * Generate part of the XML output.
3103 */
3104static bool sfe_ipv6_debug_dev_read_connections_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3105 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3106{
3107 int bytes_read;
3108
3109 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</connections>\n");
3110 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3111 return false;
3112 }
3113
3114 *length -= bytes_read;
3115 *total_read += bytes_read;
3116
3117 ws->state++;
3118 return true;
3119}
3120
3121/*
3122 * sfe_ipv6_debug_dev_read_exceptions_start()
3123 * Generate part of the XML output.
3124 */
3125static bool sfe_ipv6_debug_dev_read_exceptions_start(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3126 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3127{
3128 int bytes_read;
3129
3130 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<exceptions>\n");
3131 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3132 return false;
3133 }
3134
3135 *length -= bytes_read;
3136 *total_read += bytes_read;
3137
3138 ws->state++;
3139 return true;
3140}
3141
3142/*
3143 * sfe_ipv6_debug_dev_read_exceptions_exception()
3144 * Generate part of the XML output.
3145 */
3146static bool sfe_ipv6_debug_dev_read_exceptions_exception(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3147 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3148{
3149 uint64_t ct;
3150
3151 spin_lock_bh(&si->lock);
3152 ct = si->exception_events64[ws->iter_exception];
3153 spin_unlock_bh(&si->lock);
3154
3155 if (ct) {
3156 int bytes_read;
3157
3158 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE,
3159 "\t\t<exception name=\"%s\" count=\"%llu\" />\n",
3160 sfe_ipv6_exception_events_string[ws->iter_exception],
3161 ct);
3162 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3163 return false;
3164 }
3165
3166 *length -= bytes_read;
3167 *total_read += bytes_read;
3168 }
3169
3170 ws->iter_exception++;
3171 if (ws->iter_exception >= SFE_IPV6_EXCEPTION_EVENT_LAST) {
3172 ws->iter_exception = 0;
3173 ws->state++;
3174 }
3175
3176 return true;
3177}
3178
3179/*
3180 * sfe_ipv6_debug_dev_read_exceptions_end()
3181 * Generate part of the XML output.
3182 */
3183static bool sfe_ipv6_debug_dev_read_exceptions_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3184 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3185{
3186 int bytes_read;
3187
3188 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t</exceptions>\n");
3189 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3190 return false;
3191 }
3192
3193 *length -= bytes_read;
3194 *total_read += bytes_read;
3195
3196 ws->state++;
3197 return true;
3198}
3199
3200/*
3201 * sfe_ipv6_debug_dev_read_stats()
3202 * Generate part of the XML output.
3203 */
3204static bool sfe_ipv6_debug_dev_read_stats(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3205 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3206{
3207 int bytes_read;
3208 unsigned int num_connections;
3209 uint64_t packets_forwarded;
3210 uint64_t packets_not_forwarded;
3211 uint64_t connection_create_requests;
3212 uint64_t connection_create_collisions;
3213 uint64_t connection_destroy_requests;
3214 uint64_t connection_destroy_misses;
3215 uint64_t connection_flushes;
3216 uint64_t connection_match_hash_hits;
3217 uint64_t connection_match_hash_reorders;
3218
3219 spin_lock_bh(&si->lock);
3220 sfe_ipv6_update_summary_stats(si);
3221
3222 num_connections = si->num_connections;
3223 packets_forwarded = si->packets_forwarded64;
3224 packets_not_forwarded = si->packets_not_forwarded64;
3225 connection_create_requests = si->connection_create_requests64;
3226 connection_create_collisions = si->connection_create_collisions64;
3227 connection_destroy_requests = si->connection_destroy_requests64;
3228 connection_destroy_misses = si->connection_destroy_misses64;
3229 connection_flushes = si->connection_flushes64;
3230 connection_match_hash_hits = si->connection_match_hash_hits64;
3231 connection_match_hash_reorders = si->connection_match_hash_reorders64;
3232 spin_unlock_bh(&si->lock);
3233
3234 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "\t<stats "
3235 "num_connections=\"%u\" "
3236 "pkts_forwarded=\"%llu\" pkts_not_forwarded=\"%llu\" "
3237 "create_requests=\"%llu\" create_collisions=\"%llu\" "
3238 "destroy_requests=\"%llu\" destroy_misses=\"%llu\" "
3239 "flushes=\"%llu\" "
3240 "hash_hits=\"%llu\" hash_reorders=\"%llu\" />\n",
3241 num_connections,
3242 packets_forwarded,
3243 packets_not_forwarded,
3244 connection_create_requests,
3245 connection_create_collisions,
3246 connection_destroy_requests,
3247 connection_destroy_misses,
3248 connection_flushes,
3249 connection_match_hash_hits,
3250 connection_match_hash_reorders);
3251 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3252 return false;
3253 }
3254
3255 *length -= bytes_read;
3256 *total_read += bytes_read;
3257
3258 ws->state++;
3259 return true;
3260}
3261
3262/*
3263 * sfe_ipv6_debug_dev_read_end()
3264 * Generate part of the XML output.
3265 */
3266static bool sfe_ipv6_debug_dev_read_end(struct sfe_ipv6 *si, char *buffer, char *msg, size_t *length,
3267 int *total_read, struct sfe_ipv6_debug_xml_write_state *ws)
3268{
3269 int bytes_read;
3270
3271 bytes_read = snprintf(msg, CHAR_DEV_MSG_SIZE, "</sfe_ipv6>\n");
3272 if (copy_to_user(buffer + *total_read, msg, CHAR_DEV_MSG_SIZE)) {
3273 return false;
3274 }
3275
3276 *length -= bytes_read;
3277 *total_read += bytes_read;
3278
3279 ws->state++;
3280 return true;
3281}
3282
3283/*
3284 * Array of write functions that write various XML elements that correspond to
3285 * our XML output state machine.
3286 */
3287static sfe_ipv6_debug_xml_write_method_t sfe_ipv6_debug_xml_write_methods[SFE_IPV6_DEBUG_XML_STATE_DONE] = {
3288 sfe_ipv6_debug_dev_read_start,
3289 sfe_ipv6_debug_dev_read_connections_start,
3290 sfe_ipv6_debug_dev_read_connections_connection,
3291 sfe_ipv6_debug_dev_read_connections_end,
3292 sfe_ipv6_debug_dev_read_exceptions_start,
3293 sfe_ipv6_debug_dev_read_exceptions_exception,
3294 sfe_ipv6_debug_dev_read_exceptions_end,
3295 sfe_ipv6_debug_dev_read_stats,
3296 sfe_ipv6_debug_dev_read_end,
3297};
3298
3299/*
3300 * sfe_ipv6_debug_dev_read()
3301 * Send info to userspace upon read request from user
3302 */
3303static ssize_t sfe_ipv6_debug_dev_read(struct file *filp, char *buffer, size_t length, loff_t *offset)
3304{
3305 char msg[CHAR_DEV_MSG_SIZE];
3306 int total_read = 0;
3307 struct sfe_ipv6_debug_xml_write_state *ws;
3308 struct sfe_ipv6 *si = &__si6;
3309
3310 ws = (struct sfe_ipv6_debug_xml_write_state *)filp->private_data;
3311 while ((ws->state != SFE_IPV6_DEBUG_XML_STATE_DONE) && (length > CHAR_DEV_MSG_SIZE)) {
3312 if ((sfe_ipv6_debug_xml_write_methods[ws->state])(si, buffer, msg, &length, &total_read, ws)) {
3313 continue;
3314 }
3315 }
3316
3317 return total_read;
3318}
3319
3320/*
3321 * sfe_ipv6_debug_dev_write()
3322 * Write to char device resets some stats
3323 */
3324static ssize_t sfe_ipv6_debug_dev_write(struct file *filp, const char *buffer, size_t length, loff_t *offset)
3325{
3326 struct sfe_ipv6 *si = &__si6;
3327
3328 spin_lock_bh(&si->lock);
3329 sfe_ipv6_update_summary_stats(si);
3330
3331 si->packets_forwarded64 = 0;
3332 si->packets_not_forwarded64 = 0;
3333 si->connection_create_requests64 = 0;
3334 si->connection_create_collisions64 = 0;
3335 si->connection_destroy_requests64 = 0;
3336 si->connection_destroy_misses64 = 0;
3337 si->connection_flushes64 = 0;
3338 si->connection_match_hash_hits64 = 0;
3339 si->connection_match_hash_reorders64 = 0;
3340 spin_unlock_bh(&si->lock);
3341
3342 return length;
3343}
3344
3345/*
3346 * sfe_ipv6_debug_dev_open()
3347 */
3348static int sfe_ipv6_debug_dev_open(struct inode *inode, struct file *file)
3349{
3350 struct sfe_ipv6_debug_xml_write_state *ws;
3351
3352 ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data;
3353 if (ws) {
3354 return 0;
3355 }
3356
3357 ws = kzalloc(sizeof(struct sfe_ipv6_debug_xml_write_state), GFP_KERNEL);
3358 if (!ws) {
3359 return -ENOMEM;
3360 }
3361
3362 ws->state = SFE_IPV6_DEBUG_XML_STATE_START;
3363 file->private_data = ws;
3364
3365 return 0;
3366}
3367
3368/*
3369 * sfe_ipv6_debug_dev_release()
3370 */
3371static int sfe_ipv6_debug_dev_release(struct inode *inode, struct file *file)
3372{
3373 struct sfe_ipv6_debug_xml_write_state *ws;
Xiaoping Fan978b3772015-05-27 14:15:18 -07003374
3375 ws = (struct sfe_ipv6_debug_xml_write_state *)file->private_data;
Xiaoping Fan34586472015-07-03 02:20:35 -07003376 if (ws) {
3377 /*
3378 * We've finished with our output so free the write state.
3379 */
3380 kfree(ws);
Xiaoping Fan978b3772015-05-27 14:15:18 -07003381 }
3382
Xiaoping Fan978b3772015-05-27 14:15:18 -07003383 return 0;
3384}
3385
3386/*
3387 * File operations used in the debug char device
3388 */
3389static struct file_operations sfe_ipv6_debug_dev_fops = {
3390 .read = sfe_ipv6_debug_dev_read,
3391 .write = sfe_ipv6_debug_dev_write,
3392 .open = sfe_ipv6_debug_dev_open,
3393 .release = sfe_ipv6_debug_dev_release
3394};
3395
3396#ifdef CONFIG_NF_FLOW_COOKIE
3397/*
3398 * sfe_ipv6_register_flow_cookie_cb
3399 * register a function in SFE to let SFE use this function to configure flow cookie for a flow
3400 *
3401 * Hardware driver which support flow cookie should register a callback function in SFE. Then SFE
3402 * can use this function to configure flow cookie for a flow.
3403 * return: 0, success; !=0, fail
3404 */
3405int sfe_ipv6_register_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb)
3406{
3407 struct sfe_ipv6 *si = &__si6;
3408
3409 BUG_ON(!cb);
3410
3411 if (si->flow_cookie_set_func) {
3412 return -1;
3413 }
3414
3415 rcu_assign_pointer(si->flow_cookie_set_func, cb);
3416 return 0;
3417}
3418
3419/*
3420 * sfe_ipv6_unregister_flow_cookie_cb
3421 * unregister function which is used to configure flow cookie for a flow
3422 *
3423 * return: 0, success; !=0, fail
3424 */
3425int sfe_ipv6_unregister_flow_cookie_cb(sfe_ipv6_flow_cookie_set_func_t cb)
3426{
3427 struct sfe_ipv6 *si = &__si6;
3428
3429 RCU_INIT_POINTER(si->flow_cookie_set_func, NULL);
3430 return 0;
3431}
Xiaoping Fan640faf42015-08-28 15:50:55 -07003432
3433/*
3434 * sfe_ipv6_get_flow_cookie()
3435 */
3436static ssize_t sfe_ipv6_get_flow_cookie(struct device *dev,
3437 struct device_attribute *attr,
3438 char *buf)
3439{
3440 struct sfe_ipv6 *si = &__si6;
Xiaoping Fan01c67cc2015-11-09 11:31:57 -08003441 return snprintf(buf, (ssize_t)PAGE_SIZE, "%d\n", si->flow_cookie_enable);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003442}
3443
3444/*
3445 * sfe_ipv6_set_flow_cookie()
3446 */
3447static ssize_t sfe_ipv6_set_flow_cookie(struct device *dev,
3448 struct device_attribute *attr,
3449 const char *buf, size_t size)
3450{
3451 struct sfe_ipv6 *si = &__si6;
3452 strict_strtol(buf, 0, (long int *)&si->flow_cookie_enable);
3453
3454 return size;
3455}
3456
3457/*
3458 * sysfs attributes.
3459 */
3460static const struct device_attribute sfe_ipv6_flow_cookie_attr =
Xiaoping Fane70da412016-02-26 16:47:57 -08003461 __ATTR(flow_cookie_enable, S_IWUSR | S_IRUGO, sfe_ipv6_get_flow_cookie, sfe_ipv6_set_flow_cookie);
Xiaoping Fan978b3772015-05-27 14:15:18 -07003462#endif /*CONFIG_NF_FLOW_COOKIE*/
3463
3464/*
3465 * sfe_ipv6_init()
3466 */
3467static int __init sfe_ipv6_init(void)
3468{
3469 struct sfe_ipv6 *si = &__si6;
3470 int result = -1;
3471
3472 DEBUG_INFO("SFE IPv6 init\n");
3473
3474 /*
3475 * Create sys/sfe_ipv6
3476 */
3477 si->sys_sfe_ipv6 = kobject_create_and_add("sfe_ipv6", NULL);
3478 if (!si->sys_sfe_ipv6) {
3479 DEBUG_ERROR("failed to register sfe_ipv6\n");
3480 goto exit1;
3481 }
3482
3483 /*
3484 * Create files, one for each parameter supported by this module.
3485 */
3486 result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr);
3487 if (result) {
3488 DEBUG_ERROR("failed to register debug dev file: %d\n", result);
3489 goto exit2;
3490 }
3491
Xiaoping Fan640faf42015-08-28 15:50:55 -07003492#ifdef CONFIG_NF_FLOW_COOKIE
3493 result = sysfs_create_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr);
3494 if (result) {
3495 DEBUG_ERROR("failed to register flow cookie enable file: %d\n", result);
3496 goto exit3;
3497 }
3498#endif /* CONFIG_NF_FLOW_COOKIE */
3499
Xiaoping Fan978b3772015-05-27 14:15:18 -07003500 /*
3501 * Register our debug char device.
3502 */
3503 result = register_chrdev(0, "sfe_ipv6", &sfe_ipv6_debug_dev_fops);
3504 if (result < 0) {
3505 DEBUG_ERROR("Failed to register chrdev: %d\n", result);
Xiaoping Fan640faf42015-08-28 15:50:55 -07003506 goto exit4;
Xiaoping Fan978b3772015-05-27 14:15:18 -07003507 }
3508
3509 si->debug_dev = result;
3510
3511 /*
3512 * Create a timer to handle periodic statistics.
3513 */
3514 setup_timer(&si->timer, sfe_ipv6_periodic_sync, (unsigned long)si);
3515 mod_timer(&si->timer, jiffies + ((HZ + 99) / 100));
3516
3517 spin_lock_init(&si->lock);
3518
3519 return 0;
3520
Xiaoping Fan640faf42015-08-28 15:50:55 -07003521exit4:
3522#ifdef CONFIG_NF_FLOW_COOKIE
3523 sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr);
3524
Xiaoping Fan978b3772015-05-27 14:15:18 -07003525exit3:
Xiaoping Fan640faf42015-08-28 15:50:55 -07003526#endif /* CONFIG_NF_FLOW_COOKIE */
Xiaoping Fan978b3772015-05-27 14:15:18 -07003527 sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr);
3528
3529exit2:
3530 kobject_put(si->sys_sfe_ipv6);
3531
3532exit1:
3533 return result;
3534}
3535
3536/*
3537 * sfe_ipv6_exit()
3538 */
3539static void __exit sfe_ipv6_exit(void)
3540{
3541 struct sfe_ipv6 *si = &__si6;
3542
3543 DEBUG_INFO("SFE IPv6 exit\n");
3544
3545 /*
3546 * Destroy all connections.
3547 */
3548 sfe_ipv6_destroy_all_rules_for_dev(NULL);
3549
3550 del_timer_sync(&si->timer);
3551
3552 unregister_chrdev(si->debug_dev, "sfe_ipv6");
3553
Xiaoping Fan640faf42015-08-28 15:50:55 -07003554#ifdef CONFIG_NF_FLOW_COOKIE
3555 sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_flow_cookie_attr.attr);
3556#endif /* CONFIG_NF_FLOW_COOKIE */
Xiaoping Fan978b3772015-05-27 14:15:18 -07003557 sysfs_remove_file(si->sys_sfe_ipv6, &sfe_ipv6_debug_dev_attr.attr);
3558
3559 kobject_put(si->sys_sfe_ipv6);
3560
3561}
3562
3563module_init(sfe_ipv6_init)
3564module_exit(sfe_ipv6_exit)
3565
3566EXPORT_SYMBOL(sfe_ipv6_recv);
3567EXPORT_SYMBOL(sfe_ipv6_create_rule);
3568EXPORT_SYMBOL(sfe_ipv6_destroy_rule);
3569EXPORT_SYMBOL(sfe_ipv6_destroy_all_rules_for_dev);
3570EXPORT_SYMBOL(sfe_ipv6_register_sync_rule_callback);
3571EXPORT_SYMBOL(sfe_ipv6_mark_rule);
3572EXPORT_SYMBOL(sfe_ipv6_update_rule);
3573#ifdef CONFIG_NF_FLOW_COOKIE
3574EXPORT_SYMBOL(sfe_ipv6_register_flow_cookie_cb);
3575EXPORT_SYMBOL(sfe_ipv6_unregister_flow_cookie_cb);
3576#endif
3577
3578MODULE_AUTHOR("Qualcomm Atheros Inc.");
3579MODULE_DESCRIPTION("Shortcut Forwarding Engine - IPv6 support");
3580MODULE_LICENSE("Dual BSD/GPL");
3581