Blame - net/ipv4/tcp_ipv4.c - codeaurora/cp-linux

blob: ccf1998a39e6bd79db1841c82dceff77fa5cf739 [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* Implementation of the Transmission Control Protocol(TCP).
				7	*
				8	* IPv4 specific functions
				9	*
				10	*
				11	* code split from:
				12	* linux/ipv4/tcp.c
				13	* linux/ipv4/tcp_input.c
				14	* linux/ipv4/tcp_output.c
				15	*
				16	* See tcp.c for author information
				17	*
				18	* This program is free software; you can redistribute it and/or
				19	* modify it under the terms of the GNU General Public License
				20	* as published by the Free Software Foundation; either version
				21	* 2 of the License, or (at your option) any later version.
				22	*/
				23
				24	/*
				25	* Changes:
				26	* David S. Miller : New socket lookup architecture.
				27	* This code is dedicated to John Dyson.
				28	* David S. Miller : Change semantics of established hash,
				29	* half is devoted to TIME_WAIT sockets
				30	* and the rest go in the other half.
				31	* Andi Kleen : Add support for syncookies and fixed
				32	* some bugs: ip options weren't passed to
				33	* the TCP layer, missed a check for an
				34	* ACK bit.
				35	* Andi Kleen : Implemented fast path mtu discovery.
				36	* Fixed many serious bugs in the
				37	* request_sock handling and moved
				38	* most of it into the af independent code.
				39	* Added tail drop and some other bugfixes.
				40	* Added new listen semantics.
				41	* Mike McLagan : Routing by source
				42	* Juan Jose Ciarlante: ip_dynaddr bits
				43	* Andi Kleen: various fixes.
				44	* Vitaly E. Lavrov : Transparent proxy revived after year
				45	* coma.
				46	* Andi Kleen : Fix new listen.
				47	* Andi Kleen : Fix accept error reporting.
				48	* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
				49	* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
				50	* a single port at the same time.
				51	*/
				52
				53	#define pr_fmt(fmt) "TCP: " fmt
				54
				55	#include <linux/bottom_half.h>
				56	#include <linux/types.h>
				57	#include <linux/fcntl.h>
				58	#include <linux/module.h>
				59	#include <linux/random.h>
				60	#include <linux/cache.h>
				61	#include <linux/jhash.h>
				62	#include <linux/init.h>
				63	#include <linux/times.h>
				64	#include <linux/slab.h>
				65
				66	#include <net/net_namespace.h>
				67	#include <net/icmp.h>
				68	#include <net/inet_hashtables.h>
				69	#include <net/tcp.h>
				70	#include <net/transp_v6.h>
				71	#include <net/ipv6.h>
				72	#include <net/inet_common.h>
				73	#include <net/timewait_sock.h>
				74	#include <net/xfrm.h>
				75	#include <net/secure_seq.h>
				76	#include <net/tcp_memcontrol.h>
				77	#include <net/busy_poll.h>
				78
				79	#include <linux/inet.h>
				80	#include <linux/ipv6.h>
				81	#include <linux/stddef.h>
				82	#include <linux/proc_fs.h>
				83	#include <linux/seq_file.h>
				84
				85	#include <linux/crypto.h>
				86	#include <linux/scatterlist.h>
				87
				88	int sysctl_tcp_tw_reuse __read_mostly;
				89	int sysctl_tcp_low_latency __read_mostly;
				90	EXPORT_SYMBOL(sysctl_tcp_low_latency);
				91
				92	#ifdef CONFIG_TCP_MD5SIG
				93	static int tcp_v4_md5_hash_hdr(char md5_hash, const struct tcp_md5sig_key key,
				94	__be32 daddr, __be32 saddr, const struct tcphdr *th);
				95	#endif
				96
				97	struct inet_hashinfo tcp_hashinfo;
				98	EXPORT_SYMBOL(tcp_hashinfo);
				99
				100	static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
				101	{
				102	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
				103	ip_hdr(skb)->saddr,
				104	tcp_hdr(skb)->dest,
				105	tcp_hdr(skb)->source);
				106	}
				107
				108	int tcp_twsk_unique(struct sock sk, struct sock sktw, void *twp)
				109	{
				110	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
				111	struct tcp_sock *tp = tcp_sk(sk);
				112
				113	/* With PAWS, it is safe from the viewpoint
				114	of data integrity. Even without PAWS it is safe provided sequence
				115	spaces do not overlap i.e. at data rates <= 80Mbit/sec.
				116
				117	Actually, the idea is close to VJ's one, only timestamp cache is
				118	held not per host, but per port pair and TW bucket is used as state
				119	holder.
				120
				121	If TW bucket has been already destroyed we fall back to VJ's scheme
				122	and use initial timestamp retrieved from peer table.
				123	*/
				124	if (tcptw->tw_ts_recent_stamp &&
				125	(!twp \|\| (sysctl_tcp_tw_reuse &&
				126	get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
				127	tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
				128	if (tp->write_seq == 0)
				129	tp->write_seq = 1;
				130	tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
				131	tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
				132	sock_hold(sktw);
				133	return 1;
				134	}
				135
				136	return 0;
				137	}
				138	EXPORT_SYMBOL_GPL(tcp_twsk_unique);
				139
				140	/* This will initiate an outgoing connection. */
				141	int tcp_v4_connect(struct sock sk, struct sockaddr uaddr, int addr_len)
				142	{
				143	struct sockaddr_in usin = (struct sockaddr_in )uaddr;
				144	struct inet_sock *inet = inet_sk(sk);
				145	struct tcp_sock *tp = tcp_sk(sk);
				146	__be16 orig_sport, orig_dport;
				147	__be32 daddr, nexthop;
				148	struct flowi4 *fl4;
				149	struct rtable *rt;
				150	int err;
				151	struct ip_options_rcu *inet_opt;
				152
				153	if (addr_len < sizeof(struct sockaddr_in))
				154	return -EINVAL;
				155
				156	if (usin->sin_family != AF_INET)
				157	return -EAFNOSUPPORT;
				158
				159	nexthop = daddr = usin->sin_addr.s_addr;
				160	inet_opt = rcu_dereference_protected(inet->inet_opt,
				161	sock_owned_by_user(sk));
				162	if (inet_opt && inet_opt->opt.srr) {
				163	if (!daddr)
				164	return -EINVAL;
				165	nexthop = inet_opt->opt.faddr;
				166	}
				167
				168	orig_sport = inet->inet_sport;
				169	orig_dport = usin->sin_port;
				170	fl4 = &inet->cork.fl.u.ip4;
				171	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
				172	RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
				173	IPPROTO_TCP,
				174	orig_sport, orig_dport, sk);
				175	if (IS_ERR(rt)) {
				176	err = PTR_ERR(rt);
				177	if (err == -ENETUNREACH)
				178	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
				179	return err;
				180	}
				181
				182	if (rt->rt_flags & (RTCF_MULTICAST \| RTCF_BROADCAST)) {
				183	ip_rt_put(rt);
				184	return -ENETUNREACH;
				185	}
				186
				187	if (!inet_opt \|\| !inet_opt->opt.srr)
				188	daddr = fl4->daddr;
				189
				190	if (!inet->inet_saddr)
				191	inet->inet_saddr = fl4->saddr;
				192	sk_rcv_saddr_set(sk, inet->inet_saddr);
				193
				194	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
				195	/* Reset inherited state */
				196	tp->rx_opt.ts_recent = 0;
				197	tp->rx_opt.ts_recent_stamp = 0;
				198	if (likely(!tp->repair))
				199	tp->write_seq = 0;
				200	}
				201
				202	if (tcp_death_row.sysctl_tw_recycle &&
				203	!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
				204	tcp_fetch_timewait_stamp(sk, &rt->dst);
				205
				206	inet->inet_dport = usin->sin_port;
				207	sk_daddr_set(sk, daddr);
				208
				209	inet_csk(sk)->icsk_ext_hdr_len = 0;
				210	if (inet_opt)
				211	inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
				212
				213	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
				214
				215	/* Socket identity is still unknown (sport may be zero).
				216	* However we set state to SYN-SENT and not releasing socket
				217	* lock select source port, enter ourselves into the hash tables and
				218	* complete initialization after this.
				219	*/
				220	tcp_set_state(sk, TCP_SYN_SENT);
				221	err = inet_hash_connect(&tcp_death_row, sk);
				222	if (err)
				223	goto failure;
				224
				225	sk_set_txhash(sk);
				226
				227	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
				228	inet->inet_sport, inet->inet_dport, sk);
				229	if (IS_ERR(rt)) {
				230	err = PTR_ERR(rt);
				231	rt = NULL;
				232	goto failure;
				233	}
				234	/* OK, now commit destination to socket. */
				235	sk->sk_gso_type = SKB_GSO_TCPV4;
				236	sk_setup_caps(sk, &rt->dst);
				237
				238	if (!tp->write_seq && likely(!tp->repair))
				239	tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
				240	inet->inet_daddr,
				241	inet->inet_sport,
				242	usin->sin_port);
				243
				244	inet->inet_id = tp->write_seq ^ jiffies;
				245
				246	err = tcp_connect(sk);
				247
				248	rt = NULL;
				249	if (err)
				250	goto failure;
				251
				252	return 0;
				253
				254	failure:
				255	/*
				256	* This unhashes the socket and releases the local port,
				257	* if necessary.
				258	*/
				259	tcp_set_state(sk, TCP_CLOSE);
				260	ip_rt_put(rt);
				261	sk->sk_route_caps = 0;
				262	inet->inet_dport = 0;
				263	return err;
				264	}
				265	EXPORT_SYMBOL(tcp_v4_connect);
				266
				267	/*
				268	* This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
				269	* It can be called through tcp_release_cb() if socket was owned by user
				270	* at the time tcp_v4_err() was called to handle ICMP message.
				271	*/
				272	void tcp_v4_mtu_reduced(struct sock *sk)
				273	{
				274	struct inet_sock *inet = inet_sk(sk);
				275	struct dst_entry *dst;
				276	u32 mtu;
				277
				278	if ((1 << sk->sk_state) & (TCPF_LISTEN \| TCPF_CLOSE))
				279	return;
				280	mtu = tcp_sk(sk)->mtu_info;
				281	dst = inet_csk_update_pmtu(sk, mtu);
				282	if (!dst)
				283	return;
				284
				285	/* Something is about to be wrong... Remember soft error
				286	* for the case, if this connection will not able to recover.
				287	*/
				288	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
				289	sk->sk_err_soft = EMSGSIZE;
				290
				291	mtu = dst_mtu(dst);
				292
				293	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
				294	ip_sk_accept_pmtu(sk) &&
				295	inet_csk(sk)->icsk_pmtu_cookie > mtu) {
				296	tcp_sync_mss(sk, mtu);
				297
				298	/* Resend the TCP packet because it's
				299	* clear that the old packet has been
				300	* dropped. This is the new "fast" path mtu
				301	* discovery.
				302	*/
				303	tcp_simple_retransmit(sk);
				304	} /* else let the usual retransmit timer handle it */
				305	}
				306	EXPORT_SYMBOL(tcp_v4_mtu_reduced);
				307
				308	static void do_redirect(struct sk_buff skb, struct sock sk)
				309	{
				310	struct dst_entry *dst = __sk_dst_check(sk, 0);
				311
				312	if (dst)
				313	dst->ops->redirect(dst, sk, skb);
				314	}
				315
				316
				317	/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
				318	void tcp_req_err(struct sock *sk, u32 seq, bool abort)
				319	{
				320	struct request_sock *req = inet_reqsk(sk);
				321	struct net *net = sock_net(sk);
				322
				323	/* ICMPs are not backlogged, hence we cannot get
				324	* an established socket here.
				325	*/
				326	if (seq != tcp_rsk(req)->snt_isn) {
				327	NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
				328	} else if (abort) {
				329	/*
				330	* Still in SYN_RECV, just remove it silently.
				331	* There is no good way to pass the error to the newly
				332	* created socket, and POSIX does not want network
				333	* errors returned from accept().
				334	*/
				335	inet_csk_reqsk_queue_drop(req->rsk_listener, req);
				336	NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
				337	}
				338	reqsk_put(req);
				339	}
				340	EXPORT_SYMBOL(tcp_req_err);
				341
				342	/*
				343	* This routine is called by the ICMP module when it gets some
				344	* sort of error condition. If err < 0 then the socket should
				345	* be closed and the error returned to the user. If err > 0
				346	* it's just the icmp type << 8 \| icmp code. After adjustment
				347	* header points to the first 8 bytes of the tcp header. We need
				348	* to find the appropriate port.
				349	*
				350	* The locking strategy used here is very "optimistic". When
				351	* someone else accesses the socket the ICMP is just dropped
				352	* and for some paths there is no check at all.
				353	* A more general error queue to queue errors for later handling
				354	* is probably better.
				355	*
				356	*/
				357
				358	void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
				359	{
				360	const struct iphdr iph = (const struct iphdr )icmp_skb->data;
				361	struct tcphdr th = (struct tcphdr )(icmp_skb->data + (iph->ihl << 2));
				362	struct inet_connection_sock *icsk;
				363	struct tcp_sock *tp;
				364	struct inet_sock *inet;
				365	const int type = icmp_hdr(icmp_skb)->type;
				366	const int code = icmp_hdr(icmp_skb)->code;
				367	struct sock *sk;
				368	struct sk_buff *skb;
				369	struct request_sock *fastopen;
				370	__u32 seq, snd_una;
				371	__u32 remaining;
				372	int err;
				373	struct net *net = dev_net(icmp_skb->dev);
				374
				375	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
				376	th->dest, iph->saddr, ntohs(th->source),
				377	inet_iif(icmp_skb));
				378	if (!sk) {
				379	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
				380	return;
				381	}
				382	if (sk->sk_state == TCP_TIME_WAIT) {
				383	inet_twsk_put(inet_twsk(sk));
				384	return;
				385	}
				386	seq = ntohl(th->seq);
				387	if (sk->sk_state == TCP_NEW_SYN_RECV)
				388	return tcp_req_err(sk, seq,
				389	type == ICMP_PARAMETERPROB \|\|
				390	type == ICMP_TIME_EXCEEDED \|\|
				391	(type == ICMP_DEST_UNREACH &&
				392	(code == ICMP_NET_UNREACH \|\|
				393	code == ICMP_HOST_UNREACH)));
				394
				395	bh_lock_sock(sk);
				396	/* If too many ICMPs get dropped on busy
				397	* servers this needs to be solved differently.
				398	* We do take care of PMTU discovery (RFC1191) special case :
				399	* we can receive locally generated ICMP messages while socket is held.
				400	*/
				401	if (sock_owned_by_user(sk)) {
				402	if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
				403	NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
				404	}
				405	if (sk->sk_state == TCP_CLOSE)
				406	goto out;
				407
				408	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
				409	NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
				410	goto out;
				411	}
				412
				413	icsk = inet_csk(sk);
				414	tp = tcp_sk(sk);
				415	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
				416	fastopen = tp->fastopen_rsk;
				417	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
				418	if (sk->sk_state != TCP_LISTEN &&
				419	!between(seq, snd_una, tp->snd_nxt)) {
				420	NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
				421	goto out;
				422	}
				423
				424	switch (type) {
				425	case ICMP_REDIRECT:
				426	if (!sock_owned_by_user(sk))
				427	do_redirect(icmp_skb, sk);
				428	goto out;
				429	case ICMP_SOURCE_QUENCH:
				430	/* Just silently ignore these. */
				431	goto out;
				432	case ICMP_PARAMETERPROB:
				433	err = EPROTO;
				434	break;
				435	case ICMP_DEST_UNREACH:
				436	if (code > NR_ICMP_UNREACH)
				437	goto out;
				438
				439	if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
				440	/* We are not interested in TCP_LISTEN and open_requests
				441	* (SYN-ACKs send out by Linux are always <576bytes so
				442	* they should go through unfragmented).
				443	*/
				444	if (sk->sk_state == TCP_LISTEN)
				445	goto out;
				446
				447	tp->mtu_info = info;
				448	if (!sock_owned_by_user(sk)) {
				449	tcp_v4_mtu_reduced(sk);
				450	} else {
				451	if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
				452	sock_hold(sk);
				453	}
				454	goto out;
				455	}
				456
				457	err = icmp_err_convert[code].errno;
				458	/* check if icmp_skb allows revert of backoff
				459	* (see draft-zimmermann-tcp-lcd) */
				460	if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
				461	break;
				462	if (seq != tp->snd_una \|\| !icsk->icsk_retransmits \|\|
				463	!icsk->icsk_backoff \|\| fastopen)
				464	break;
				465
				466	if (sock_owned_by_user(sk))
				467	break;
				468
				469	icsk->icsk_backoff--;
				470	icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
				471	TCP_TIMEOUT_INIT;
				472	icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
				473
				474	skb = tcp_write_queue_head(sk);
				475	BUG_ON(!skb);
				476
				477	remaining = icsk->icsk_rto -
				478	min(icsk->icsk_rto,
				479	tcp_time_stamp - tcp_skb_timestamp(skb));
				480
				481	if (remaining) {
				482	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
				483	remaining, TCP_RTO_MAX);
				484	} else {
				485	/* RTO revert clocked out retransmission.
				486	* Will retransmit now */
				487	tcp_retransmit_timer(sk);
				488	}
				489
				490	break;
				491	case ICMP_TIME_EXCEEDED:
				492	err = EHOSTUNREACH;
				493	break;
				494	default:
				495	goto out;
				496	}
				497
				498	switch (sk->sk_state) {
				499	case TCP_SYN_SENT:
				500	case TCP_SYN_RECV:
				501	/* Only in fast or simultaneous open. If a fast open socket is
				502	* is already accepted it is treated as a connected one below.
				503	*/
				504	if (fastopen && !fastopen->sk)
				505	break;
				506
				507	if (!sock_owned_by_user(sk)) {
				508	sk->sk_err = err;
				509
				510	sk->sk_error_report(sk);
				511
				512	tcp_done(sk);
				513	} else {
				514	sk->sk_err_soft = err;
				515	}
				516	goto out;
				517	}
				518
				519	/* If we've already connected we will keep trying
				520	* until we time out, or the user gives up.
				521	*
				522	* rfc1122 4.2.3.9 allows to consider as hard errors
				523	* only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
				524	* but it is obsoleted by pmtu discovery).
				525	*
				526	* Note, that in modern internet, where routing is unreliable
				527	* and in each dark corner broken firewalls sit, sending random
				528	* errors ordered by their masters even this two messages finally lose
				529	* their original sense (even Linux sends invalid PORT_UNREACHs)
				530	*
				531	* Now we are in compliance with RFCs.
				532	* --ANK (980905)
				533	*/
				534
				535	inet = inet_sk(sk);
				536	if (!sock_owned_by_user(sk) && inet->recverr) {
				537	sk->sk_err = err;
				538	sk->sk_error_report(sk);
				539	} else { /* Only an error on timeout */
				540	sk->sk_err_soft = err;
				541	}
				542
				543	out:
				544	bh_unlock_sock(sk);
				545	sock_put(sk);
				546	}
				547
				548	void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
				549	{
				550	struct tcphdr *th = tcp_hdr(skb);
				551
				552	if (skb->ip_summed == CHECKSUM_PARTIAL) {
				553	th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
				554	skb->csum_start = skb_transport_header(skb) - skb->head;
				555	skb->csum_offset = offsetof(struct tcphdr, check);
				556	} else {
				557	th->check = tcp_v4_check(skb->len, saddr, daddr,
				558	csum_partial(th,
				559	th->doff << 2,
				560	skb->csum));
				561	}
				562	}
				563
				564	/* This routine computes an IPv4 TCP checksum. */
				565	void tcp_v4_send_check(struct sock sk, struct sk_buff skb)
				566	{
				567	const struct inet_sock *inet = inet_sk(sk);
				568
				569	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
				570	}
				571	EXPORT_SYMBOL(tcp_v4_send_check);
				572
				573	/*
				574	* This routine will send an RST to the other tcp.
				575	*
				576	* Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
				577	* for reset.
				578	* Answer: if a packet caused RST, it is not for a socket
				579	* existing in our system, if it is matched to a socket,
				580	* it is just duplicate segment or bug in other side's TCP.
				581	* So that we build reply only basing on parameters
				582	* arrived with segment.
				583	* Exception: precedence violation. We do not implement it in any case.
				584	*/
				585
				586	static void tcp_v4_send_reset(const struct sock sk, struct sk_buff skb)
				587	{
				588	const struct tcphdr *th = tcp_hdr(skb);
				589	struct {
				590	struct tcphdr th;
				591	#ifdef CONFIG_TCP_MD5SIG
				592	__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
				593	#endif
				594	} rep;
				595	struct ip_reply_arg arg;
				596	#ifdef CONFIG_TCP_MD5SIG
				597	struct tcp_md5sig_key *key;
				598	const __u8 *hash_location = NULL;
				599	unsigned char newhash[16];
				600	int genhash;
				601	struct sock *sk1 = NULL;
				602	#endif
				603	struct net *net;
				604
				605	/* Never send a reset in response to a reset. */
				606	if (th->rst)
				607	return;
				608
				609	/* If sk not NULL, it means we did a successful lookup and incoming
				610	* route had to be correct. prequeue might have dropped our dst.
				611	*/
				612	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
				613	return;
				614
				615	/* Swap the send and the receive. */
				616	memset(&rep, 0, sizeof(rep));
				617	rep.th.dest = th->source;
				618	rep.th.source = th->dest;
				619	rep.th.doff = sizeof(struct tcphdr) / 4;
				620	rep.th.rst = 1;
				621
				622	if (th->ack) {
				623	rep.th.seq = th->ack_seq;
				624	} else {
				625	rep.th.ack = 1;
				626	rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
				627	skb->len - (th->doff << 2));
				628	}
				629
				630	memset(&arg, 0, sizeof(arg));
				631	arg.iov[0].iov_base = (unsigned char *)&rep;
				632	arg.iov[0].iov_len = sizeof(rep.th);
				633
				634	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
				635	#ifdef CONFIG_TCP_MD5SIG
				636	hash_location = tcp_parse_md5sig_option(th);
				637	if (!sk && hash_location) {
				638	/*
				639	* active side is lost. Try to find listening socket through
				640	* source port, and then find md5 key through listening socket.
				641	* we are not loose security here:
				642	* Incoming packet is checked with md5 hash with finding key,
				643	* no RST generated if md5 hash doesn't match.
				644	*/
				645	sk1 = __inet_lookup_listener(net,
				646	&tcp_hashinfo, ip_hdr(skb)->saddr,
				647	th->source, ip_hdr(skb)->daddr,
				648	ntohs(th->source), inet_iif(skb));
				649	/* don't send rst if it can't find key */
				650	if (!sk1)
				651	return;
				652	rcu_read_lock();
				653	key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
				654	&ip_hdr(skb)->saddr, AF_INET);
				655	if (!key)
				656	goto release_sk1;
				657
				658	genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
				659	if (genhash \|\| memcmp(hash_location, newhash, 16) != 0)
				660	goto release_sk1;
				661	} else {
				662	key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
				663	&ip_hdr(skb)->saddr,
				664	AF_INET) : NULL;
				665	}
				666
				667	if (key) {
				668	rep.opt[0] = htonl((TCPOPT_NOP << 24) \|
				669	(TCPOPT_NOP << 16) \|
				670	(TCPOPT_MD5SIG << 8) \|
				671	TCPOLEN_MD5SIG);
				672	/* Update length and the length the header thinks exists */
				673	arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
				674	rep.th.doff = arg.iov[0].iov_len / 4;
				675
				676	tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
				677	key, ip_hdr(skb)->saddr,
				678	ip_hdr(skb)->daddr, &rep.th);
				679	}
				680	#endif
				681	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				682	ip_hdr(skb)->saddr, /* XXX */
				683	arg.iov[0].iov_len, IPPROTO_TCP, 0);
				684	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
				685	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
				686	/* When socket is gone, all binding information is lost.
				687	* routing might fail in this case. No choice here, if we choose to force
				688	* input interface, we will misroute in case of asymmetric route.
				689	*/
				690	if (sk)
				691	arg.bound_dev_if = sk->sk_bound_dev_if;
				692
				693	arg.tos = ip_hdr(skb)->tos;
				694	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
				695	skb, &TCP_SKB_CB(skb)->header.h4.opt,
				696	ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
				697	&arg, arg.iov[0].iov_len);
				698
				699	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
				700	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
				701
				702	#ifdef CONFIG_TCP_MD5SIG
				703	release_sk1:
				704	if (sk1) {
				705	rcu_read_unlock();
				706	sock_put(sk1);
				707	}
				708	#endif
				709	}
				710
				711	/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
				712	outside socket context is ugly, certainly. What can I do?
				713	*/
				714
				715	static void tcp_v4_send_ack(struct net *net,
				716	struct sk_buff *skb, u32 seq, u32 ack,
				717	u32 win, u32 tsval, u32 tsecr, int oif,
				718	struct tcp_md5sig_key *key,
				719	int reply_flags, u8 tos)
				720	{
				721	const struct tcphdr *th = tcp_hdr(skb);
				722	struct {
				723	struct tcphdr th;
				724	__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
				725	#ifdef CONFIG_TCP_MD5SIG
				726	+ (TCPOLEN_MD5SIG_ALIGNED >> 2)
				727	#endif
				728	];
				729	} rep;
				730	struct ip_reply_arg arg;
				731
				732	memset(&rep.th, 0, sizeof(struct tcphdr));
				733	memset(&arg, 0, sizeof(arg));
				734
				735	arg.iov[0].iov_base = (unsigned char *)&rep;
				736	arg.iov[0].iov_len = sizeof(rep.th);
				737	if (tsecr) {
				738	rep.opt[0] = htonl((TCPOPT_NOP << 24) \| (TCPOPT_NOP << 16) \|
				739	(TCPOPT_TIMESTAMP << 8) \|
				740	TCPOLEN_TIMESTAMP);
				741	rep.opt[1] = htonl(tsval);
				742	rep.opt[2] = htonl(tsecr);
				743	arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
				744	}
				745
				746	/* Swap the send and the receive. */
				747	rep.th.dest = th->source;
				748	rep.th.source = th->dest;
				749	rep.th.doff = arg.iov[0].iov_len / 4;
				750	rep.th.seq = htonl(seq);
				751	rep.th.ack_seq = htonl(ack);
				752	rep.th.ack = 1;
				753	rep.th.window = htons(win);
				754
				755	#ifdef CONFIG_TCP_MD5SIG
				756	if (key) {
				757	int offset = (tsecr) ? 3 : 0;
				758
				759	rep.opt[offset++] = htonl((TCPOPT_NOP << 24) \|
				760	(TCPOPT_NOP << 16) \|
				761	(TCPOPT_MD5SIG << 8) \|
				762	TCPOLEN_MD5SIG);
				763	arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
				764	rep.th.doff = arg.iov[0].iov_len/4;
				765
				766	tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
				767	key, ip_hdr(skb)->saddr,
				768	ip_hdr(skb)->daddr, &rep.th);
				769	}
				770	#endif
				771	arg.flags = reply_flags;
				772	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				773	ip_hdr(skb)->saddr, /* XXX */
				774	arg.iov[0].iov_len, IPPROTO_TCP, 0);
				775	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
				776	if (oif)
				777	arg.bound_dev_if = oif;
				778	arg.tos = tos;
				779	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
				780	skb, &TCP_SKB_CB(skb)->header.h4.opt,
				781	ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
				782	&arg, arg.iov[0].iov_len);
				783
				784	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
				785	}
				786
				787	static void tcp_v4_timewait_ack(struct sock sk, struct sk_buff skb)
				788	{
				789	struct inet_timewait_sock *tw = inet_twsk(sk);
				790	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
				791
				792	tcp_v4_send_ack(sock_net(sk), skb,
				793	tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
				794	tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
				795	tcp_time_stamp + tcptw->tw_ts_offset,
				796	tcptw->tw_ts_recent,
				797	tw->tw_bound_dev_if,
				798	tcp_twsk_md5_key(tcptw),
				799	tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
				800	tw->tw_tos
				801	);
				802
				803	inet_twsk_put(tw);
				804	}
				805
				806	static void tcp_v4_reqsk_send_ack(const struct sock sk, struct sk_buff skb,
				807	struct request_sock *req)
				808	{
				809	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
				810	* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
				811	*/
				812	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
				813	tcp_sk(sk)->snd_nxt;
				814
				815	/* RFC 7323 2.3
				816	* The window field (SEG.WND) of every outgoing segment, with the
				817	* exception of <SYN> segments, MUST be right-shifted by
				818	* Rcv.Wind.Shift bits:
				819	*/
				820	tcp_v4_send_ack(sock_net(sk), skb, seq,
				821	tcp_rsk(req)->rcv_nxt,
				822	req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
				823	tcp_time_stamp,
				824	req->ts_recent,
				825	0,
				826	tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
				827	AF_INET),
				828	inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
				829	ip_hdr(skb)->tos);
				830	}
				831
				832	/*
				833	* Send a SYN-ACK after having received a SYN.
				834	* This still operates on a request_sock only, not on a big
				835	* socket.
				836	*/
				837	static int tcp_v4_send_synack(const struct sock sk, struct dst_entry dst,
				838	struct flowi *fl,
				839	struct request_sock *req,
				840	struct tcp_fastopen_cookie *foc,
				841	bool attach_req)
				842	{
				843	const struct inet_request_sock *ireq = inet_rsk(req);
				844	struct flowi4 fl4;
				845	int err = -1;
				846	struct sk_buff *skb;
				847
				848	/* First, grab a route. */
				849	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
				850	return -1;
				851
				852	skb = tcp_make_synack(sk, dst, req, foc, attach_req);
				853
				854	if (skb) {
				855	__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
				856
				857	err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
				858	ireq->ir_rmt_addr,
				859	ireq_opt_deref(ireq));
				860	err = net_xmit_eval(err);
				861	}
				862
				863	return err;
				864	}
				865
				866	/*
				867	* IPv4 request_sock destructor.
				868	*/
				869	static void tcp_v4_reqsk_destructor(struct request_sock *req)
				870	{
				871	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
				872	}
				873
				874
				875	#ifdef CONFIG_TCP_MD5SIG
				876	/*
				877	* RFC2385 MD5 checksumming requires a mapping of
				878	* IP address->MD5 Key.
				879	* We need to maintain these in the sk structure.
				880	*/
				881
				882	/* Find the Key structure for an address. */
				883	struct tcp_md5sig_key tcp_md5_do_lookup(const struct sock sk,
				884	const union tcp_md5_addr *addr,
				885	int family)
				886	{
				887	const struct tcp_sock *tp = tcp_sk(sk);
				888	struct tcp_md5sig_key *key;
				889	unsigned int size = sizeof(struct in_addr);
				890	const struct tcp_md5sig_info *md5sig;
				891
				892	/* caller either holds rcu_read_lock() or socket lock */
				893	md5sig = rcu_dereference_check(tp->md5sig_info,
				894	sock_owned_by_user(sk) \|\|
				895	lockdep_is_held((spinlock_t *)&sk->sk_lock.slock));
				896	if (!md5sig)
				897	return NULL;
				898	#if IS_ENABLED(CONFIG_IPV6)
				899	if (family == AF_INET6)
				900	size = sizeof(struct in6_addr);
				901	#endif
				902	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
				903	if (key->family != family)
				904	continue;
				905	if (!memcmp(&key->addr, addr, size))
				906	return key;
				907	}
				908	return NULL;
				909	}
				910	EXPORT_SYMBOL(tcp_md5_do_lookup);
				911
				912	struct tcp_md5sig_key tcp_v4_md5_lookup(const struct sock sk,
				913	const struct sock *addr_sk)
				914	{
				915	const union tcp_md5_addr *addr;
				916
				917	addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
				918	return tcp_md5_do_lookup(sk, addr, AF_INET);
				919	}
				920	EXPORT_SYMBOL(tcp_v4_md5_lookup);
				921
				922	/* This can be called on a newly created socket, from other files */
				923	int tcp_md5_do_add(struct sock sk, const union tcp_md5_addr addr,
				924	int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
				925	{
				926	/* Add Key to the list */
				927	struct tcp_md5sig_key *key;
				928	struct tcp_sock *tp = tcp_sk(sk);
				929	struct tcp_md5sig_info *md5sig;
				930
				931	key = tcp_md5_do_lookup(sk, addr, family);
				932	if (key) {
				933	/* Pre-existing entry - just update that one. */
				934	memcpy(key->key, newkey, newkeylen);
				935	key->keylen = newkeylen;
				936	return 0;
				937	}
				938
				939	md5sig = rcu_dereference_protected(tp->md5sig_info,
				940	sock_owned_by_user(sk) \|\|
				941	lockdep_is_held(&sk->sk_lock.slock));
				942	if (!md5sig) {
				943	md5sig = kmalloc(sizeof(*md5sig), gfp);
				944	if (!md5sig)
				945	return -ENOMEM;
				946
				947	sk_nocaps_add(sk, NETIF_F_GSO_MASK);
				948	INIT_HLIST_HEAD(&md5sig->head);
				949	rcu_assign_pointer(tp->md5sig_info, md5sig);
				950	}
				951
				952	key = sock_kmalloc(sk, sizeof(*key), gfp);
				953	if (!key)
				954	return -ENOMEM;
				955	if (!tcp_alloc_md5sig_pool()) {
				956	sock_kfree_s(sk, key, sizeof(*key));
				957	return -ENOMEM;
				958	}
				959
				960	memcpy(key->key, newkey, newkeylen);
				961	key->keylen = newkeylen;
				962	key->family = family;
				963	memcpy(&key->addr, addr,
				964	(family == AF_INET6) ? sizeof(struct in6_addr) :
				965	sizeof(struct in_addr));
				966	hlist_add_head_rcu(&key->node, &md5sig->head);
				967	return 0;
				968	}
				969	EXPORT_SYMBOL(tcp_md5_do_add);
				970
				971	int tcp_md5_do_del(struct sock sk, const union tcp_md5_addr addr, int family)
				972	{
				973	struct tcp_md5sig_key *key;
				974
				975	key = tcp_md5_do_lookup(sk, addr, family);
				976	if (!key)
				977	return -ENOENT;
				978	hlist_del_rcu(&key->node);
				979	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
				980	kfree_rcu(key, rcu);
				981	return 0;
				982	}
				983	EXPORT_SYMBOL(tcp_md5_do_del);
				984
				985	static void tcp_clear_md5_list(struct sock *sk)
				986	{
				987	struct tcp_sock *tp = tcp_sk(sk);
				988	struct tcp_md5sig_key *key;
				989	struct hlist_node *n;
				990	struct tcp_md5sig_info *md5sig;
				991
				992	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
				993
				994	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
				995	hlist_del_rcu(&key->node);
				996	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
				997	kfree_rcu(key, rcu);
				998	}
				999	}
				1000
				1001	static int tcp_v4_parse_md5_keys(struct sock sk, char __user optval,
				1002	int optlen)
				1003	{
				1004	struct tcp_md5sig cmd;
				1005	struct sockaddr_in sin = (struct sockaddr_in )&cmd.tcpm_addr;
				1006
				1007	if (optlen < sizeof(cmd))
				1008	return -EINVAL;
				1009
				1010	if (copy_from_user(&cmd, optval, sizeof(cmd)))
				1011	return -EFAULT;
				1012
				1013	if (sin->sin_family != AF_INET)
				1014	return -EINVAL;
				1015
				1016	if (!cmd.tcpm_keylen)
				1017	return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
				1018	AF_INET);
				1019
				1020	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
				1021	return -EINVAL;
				1022
				1023	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
				1024	AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
				1025	GFP_KERNEL);
				1026	}
				1027
				1028	static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
				1029	__be32 daddr, __be32 saddr, int nbytes)
				1030	{
				1031	struct tcp4_pseudohdr *bp;
				1032	struct scatterlist sg;
				1033
				1034	bp = &hp->md5_blk.ip4;
				1035
				1036	/*
				1037	* 1. the TCP pseudo-header (in the order: source IP address,
				1038	* destination IP address, zero-padded protocol number, and
				1039	* segment length)
				1040	*/
				1041	bp->saddr = saddr;
				1042	bp->daddr = daddr;
				1043	bp->pad = 0;
				1044	bp->protocol = IPPROTO_TCP;
				1045	bp->len = cpu_to_be16(nbytes);
				1046
				1047	sg_init_one(&sg, bp, sizeof(*bp));
				1048	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
				1049	}
				1050
				1051	static int tcp_v4_md5_hash_hdr(char md5_hash, const struct tcp_md5sig_key key,
				1052	__be32 daddr, __be32 saddr, const struct tcphdr *th)
				1053	{
				1054	struct tcp_md5sig_pool *hp;
				1055	struct hash_desc *desc;
				1056
				1057	hp = tcp_get_md5sig_pool();
				1058	if (!hp)
				1059	goto clear_hash_noput;
				1060	desc = &hp->md5_desc;
				1061
				1062	if (crypto_hash_init(desc))
				1063	goto clear_hash;
				1064	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
				1065	goto clear_hash;
				1066	if (tcp_md5_hash_header(hp, th))
				1067	goto clear_hash;
				1068	if (tcp_md5_hash_key(hp, key))
				1069	goto clear_hash;
				1070	if (crypto_hash_final(desc, md5_hash))
				1071	goto clear_hash;
				1072
				1073	tcp_put_md5sig_pool();
				1074	return 0;
				1075
				1076	clear_hash:
				1077	tcp_put_md5sig_pool();
				1078	clear_hash_noput:
				1079	memset(md5_hash, 0, 16);
				1080	return 1;
				1081	}
				1082
				1083	int tcp_v4_md5_hash_skb(char md5_hash, const struct tcp_md5sig_key key,
				1084	const struct sock *sk,
				1085	const struct sk_buff *skb)
				1086	{
				1087	struct tcp_md5sig_pool *hp;
				1088	struct hash_desc *desc;
				1089	const struct tcphdr *th = tcp_hdr(skb);
				1090	__be32 saddr, daddr;
				1091
				1092	if (sk) { /* valid for establish/request sockets */
				1093	saddr = sk->sk_rcv_saddr;
				1094	daddr = sk->sk_daddr;
				1095	} else {
				1096	const struct iphdr *iph = ip_hdr(skb);
				1097	saddr = iph->saddr;
				1098	daddr = iph->daddr;
				1099	}
				1100
				1101	hp = tcp_get_md5sig_pool();
				1102	if (!hp)
				1103	goto clear_hash_noput;
				1104	desc = &hp->md5_desc;
				1105
				1106	if (crypto_hash_init(desc))
				1107	goto clear_hash;
				1108
				1109	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
				1110	goto clear_hash;
				1111	if (tcp_md5_hash_header(hp, th))
				1112	goto clear_hash;
				1113	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
				1114	goto clear_hash;
				1115	if (tcp_md5_hash_key(hp, key))
				1116	goto clear_hash;
				1117	if (crypto_hash_final(desc, md5_hash))
				1118	goto clear_hash;
				1119
				1120	tcp_put_md5sig_pool();
				1121	return 0;
				1122
				1123	clear_hash:
				1124	tcp_put_md5sig_pool();
				1125	clear_hash_noput:
				1126	memset(md5_hash, 0, 16);
				1127	return 1;
				1128	}
				1129	EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
				1130
				1131	#endif
				1132
				1133	/* Called with rcu_read_lock() */
				1134	static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
				1135	const struct sk_buff *skb)
				1136	{
				1137	#ifdef CONFIG_TCP_MD5SIG
				1138	/*
				1139	* This gets called for each TCP segment that arrives
				1140	* so we want to be efficient.
				1141	* We have 3 drop cases:
				1142	* o No MD5 hash and one expected.
				1143	* o MD5 hash and we're not expecting one.
				1144	* o MD5 hash and its wrong.
				1145	*/
				1146	const __u8 *hash_location = NULL;
				1147	struct tcp_md5sig_key *hash_expected;
				1148	const struct iphdr *iph = ip_hdr(skb);
				1149	const struct tcphdr *th = tcp_hdr(skb);
				1150	int genhash;
				1151	unsigned char newhash[16];
				1152
				1153	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
				1154	AF_INET);
				1155	hash_location = tcp_parse_md5sig_option(th);
				1156
				1157	/* We've parsed the options - do we have a hash? */
				1158	if (!hash_expected && !hash_location)
				1159	return false;
				1160
				1161	if (hash_expected && !hash_location) {
				1162	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
				1163	return true;
				1164	}
				1165
				1166	if (!hash_expected && hash_location) {
				1167	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
				1168	return true;
				1169	}
				1170
				1171	/* Okay, so this is hash_expected and hash_location -
				1172	* so we need to calculate the checksum.
				1173	*/
				1174	genhash = tcp_v4_md5_hash_skb(newhash,
				1175	hash_expected,
				1176	NULL, skb);
				1177
				1178	if (genhash \|\| memcmp(hash_location, newhash, 16) != 0) {
				1179	net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
				1180	&iph->saddr, ntohs(th->source),
				1181	&iph->daddr, ntohs(th->dest),
				1182	genhash ? " tcp_v4_calc_md5_hash failed"
				1183	: "");
				1184	return true;
				1185	}
				1186	return false;
				1187	#endif
				1188	return false;
				1189	}
				1190
				1191	static void tcp_v4_init_req(struct request_sock *req,
				1192	const struct sock *sk_listener,
				1193	struct sk_buff *skb)
				1194	{
				1195	struct inet_request_sock *ireq = inet_rsk(req);
				1196
				1197	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
				1198	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
				1199	ireq->no_srccheck = inet_sk(sk_listener)->transparent;
				1200	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
				1201	}
				1202
				1203	static struct dst_entry tcp_v4_route_req(const struct sock sk,
				1204	struct flowi *fl,
				1205	const struct request_sock *req,
				1206	bool *strict)
				1207	{
				1208	struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
				1209
				1210	if (strict) {
				1211	if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
				1212	*strict = true;
				1213	else
				1214	*strict = false;
				1215	}
				1216
				1217	return dst;
				1218	}
				1219
				1220	struct request_sock_ops tcp_request_sock_ops __read_mostly = {
				1221	.family = PF_INET,
				1222	.obj_size = sizeof(struct tcp_request_sock),
				1223	.rtx_syn_ack = tcp_rtx_synack,
				1224	.send_ack = tcp_v4_reqsk_send_ack,
				1225	.destructor = tcp_v4_reqsk_destructor,
				1226	.send_reset = tcp_v4_send_reset,
				1227	.syn_ack_timeout = tcp_syn_ack_timeout,
				1228	};
				1229
				1230	static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
				1231	.mss_clamp = TCP_MSS_DEFAULT,
				1232	#ifdef CONFIG_TCP_MD5SIG
				1233	.req_md5_lookup = tcp_v4_md5_lookup,
				1234	.calc_md5_hash = tcp_v4_md5_hash_skb,
				1235	#endif
				1236	.init_req = tcp_v4_init_req,
				1237	#ifdef CONFIG_SYN_COOKIES
				1238	.cookie_init_seq = cookie_v4_init_sequence,
				1239	#endif
				1240	.route_req = tcp_v4_route_req,
				1241	.init_seq = tcp_v4_init_sequence,
				1242	.send_synack = tcp_v4_send_synack,
				1243	};
				1244
				1245	int tcp_v4_conn_request(struct sock sk, struct sk_buff skb)
				1246	{
				1247	/* Never answer to SYNs send to broadcast or multicast */
				1248	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST \| RTCF_MULTICAST))
				1249	goto drop;
				1250
				1251	return tcp_conn_request(&tcp_request_sock_ops,
				1252	&tcp_request_sock_ipv4_ops, sk, skb);
				1253
				1254	drop:
				1255	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
				1256	return 0;
				1257	}
				1258	EXPORT_SYMBOL(tcp_v4_conn_request);
				1259
				1260
				1261	/*
				1262	* The three way handshake has completed - we got a valid synack -
				1263	* now create the new socket.
				1264	*/
				1265	struct sock tcp_v4_syn_recv_sock(const struct sock sk, struct sk_buff *skb,
				1266	struct request_sock *req,
				1267	struct dst_entry *dst,
				1268	struct request_sock *req_unhash,
				1269	bool *own_req)
				1270	{
				1271	struct inet_request_sock *ireq;
				1272	struct inet_sock *newinet;
				1273	struct tcp_sock *newtp;
				1274	struct sock *newsk;
				1275	#ifdef CONFIG_TCP_MD5SIG
				1276	struct tcp_md5sig_key *key;
				1277	#endif
				1278	struct ip_options_rcu *inet_opt;
				1279
				1280	if (sk_acceptq_is_full(sk))
				1281	goto exit_overflow;
				1282
				1283	newsk = tcp_create_openreq_child(sk, req, skb);
				1284	if (!newsk)
				1285	goto exit_nonewsk;
				1286
				1287	newsk->sk_gso_type = SKB_GSO_TCPV4;
				1288	inet_sk_rx_dst_set(newsk, skb);
				1289
				1290	newtp = tcp_sk(newsk);
				1291	newinet = inet_sk(newsk);
				1292	ireq = inet_rsk(req);
				1293	sk_daddr_set(newsk, ireq->ir_rmt_addr);
				1294	sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
				1295	newinet->inet_saddr = ireq->ir_loc_addr;
				1296	inet_opt = rcu_dereference(ireq->ireq_opt);
				1297	RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
				1298	newinet->mc_index = inet_iif(skb);
				1299	newinet->mc_ttl = ip_hdr(skb)->ttl;
				1300	newinet->rcv_tos = ip_hdr(skb)->tos;
				1301	inet_csk(newsk)->icsk_ext_hdr_len = 0;
				1302	if (inet_opt)
				1303	inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
				1304	newinet->inet_id = newtp->write_seq ^ jiffies;
				1305
				1306	if (!dst) {
				1307	dst = inet_csk_route_child_sock(sk, newsk, req);
				1308	if (!dst)
				1309	goto put_and_exit;
				1310	} else {
				1311	/* syncookie case : see end of cookie_v4_check() */
				1312	}
				1313	sk_setup_caps(newsk, dst);
				1314
				1315	tcp_ca_openreq_child(newsk, dst);
				1316
				1317	tcp_sync_mss(newsk, dst_mtu(dst));
				1318	newtp->advmss = dst_metric_advmss(dst);
				1319	if (tcp_sk(sk)->rx_opt.user_mss &&
				1320	tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
				1321	newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
				1322
				1323	tcp_initialize_rcv_mss(newsk);
				1324
				1325	#ifdef CONFIG_TCP_MD5SIG
				1326	/* Copy over the MD5 key from the original socket */
				1327	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
				1328	AF_INET);
				1329	if (key) {
				1330	/*
				1331	* We're using one, so create a matching key
				1332	* on the newsk structure. If we fail to get
				1333	* memory, then we end up not copying the key
				1334	* across. Shucks.
				1335	*/
				1336	tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
				1337	AF_INET, key->key, key->keylen, GFP_ATOMIC);
				1338	sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
				1339	}
				1340	#endif
				1341
				1342	if (__inet_inherit_port(sk, newsk) < 0)
				1343	goto put_and_exit;
				1344	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
				1345	if (likely(*own_req)) {
				1346	tcp_move_syn(newtp, req);
				1347	ireq->ireq_opt = NULL;
				1348	} else {
				1349	newinet->inet_opt = NULL;
				1350	}
				1351	return newsk;
				1352
				1353	exit_overflow:
				1354	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
				1355	exit_nonewsk:
				1356	dst_release(dst);
				1357	exit:
				1358	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
				1359	return NULL;
				1360	put_and_exit:
				1361	newinet->inet_opt = NULL;
				1362	inet_csk_prepare_forced_close(newsk);
				1363	tcp_done(newsk);
				1364	goto exit;
				1365	}
				1366	EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
				1367
				1368	static struct sock tcp_v4_cookie_check(struct sock sk, struct sk_buff *skb)
				1369	{
				1370	#ifdef CONFIG_SYN_COOKIES
				1371	const struct tcphdr *th = tcp_hdr(skb);
				1372
				1373	if (!th->syn)
				1374	sk = cookie_v4_check(sk, skb);
				1375	#endif
				1376	return sk;
				1377	}
				1378
				1379	/* The socket must have it's spinlock held when we get
				1380	* here, unless it is a TCP_LISTEN socket.
				1381	*
				1382	* We have a potential double-lock case here, so even when
				1383	* doing backlog processing we use the BH locking scheme.
				1384	* This is because we cannot sleep with the original spinlock
				1385	* held.
				1386	*/
				1387	int tcp_v4_do_rcv(struct sock sk, struct sk_buff skb)
				1388	{
				1389	struct sock *rsk;
				1390
				1391	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
				1392	struct dst_entry *dst = sk->sk_rx_dst;
				1393
				1394	sock_rps_save_rxhash(sk, skb);
				1395	sk_mark_napi_id(sk, skb);
				1396	if (dst) {
				1397	if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif \|\|
				1398	!dst->ops->check(dst, 0)) {
				1399	dst_release(dst);
				1400	sk->sk_rx_dst = NULL;
				1401	}
				1402	}
				1403	tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
				1404	return 0;
				1405	}
				1406
				1407	if (tcp_checksum_complete(skb))
				1408	goto csum_err;
				1409
				1410	if (sk->sk_state == TCP_LISTEN) {
				1411	struct sock *nsk = tcp_v4_cookie_check(sk, skb);
				1412
				1413	if (!nsk)
				1414	goto discard;
				1415	if (nsk != sk) {
				1416	sock_rps_save_rxhash(nsk, skb);
				1417	sk_mark_napi_id(nsk, skb);
				1418	if (tcp_child_process(sk, nsk, skb)) {
				1419	rsk = nsk;
				1420	goto reset;
				1421	}
				1422	return 0;
				1423	}
				1424	} else
				1425	sock_rps_save_rxhash(sk, skb);
				1426
				1427	if (tcp_rcv_state_process(sk, skb)) {
				1428	rsk = sk;
				1429	goto reset;
				1430	}
				1431	return 0;
				1432
				1433	reset:
				1434	tcp_v4_send_reset(rsk, skb);
				1435	discard:
				1436	kfree_skb(skb);
				1437	/* Be careful here. If this function gets more complicated and
				1438	* gcc suffers from register pressure on the x86, sk (in %ebx)
				1439	* might be destroyed here. This current version compiles correctly,
				1440	* but you have been warned.
				1441	*/
				1442	return 0;
				1443
				1444	csum_err:
				1445	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
				1446	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
				1447	goto discard;
				1448	}
				1449	EXPORT_SYMBOL(tcp_v4_do_rcv);
				1450
				1451	void tcp_v4_early_demux(struct sk_buff *skb)
				1452	{
				1453	const struct iphdr *iph;
				1454	const struct tcphdr *th;
				1455	struct sock *sk;
				1456
				1457	if (skb->pkt_type != PACKET_HOST)
				1458	return;
				1459
				1460	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
				1461	return;
				1462
				1463	iph = ip_hdr(skb);
				1464	th = tcp_hdr(skb);
				1465
				1466	if (th->doff < sizeof(struct tcphdr) / 4)
				1467	return;
				1468
				1469	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
				1470	iph->saddr, th->source,
				1471	iph->daddr, ntohs(th->dest),
				1472	skb->skb_iif);
				1473	if (sk) {
				1474	skb->sk = sk;
				1475	skb->destructor = sock_edemux;
				1476	if (sk_fullsock(sk)) {
				1477	struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
				1478
				1479	if (dst)
				1480	dst = dst_check(dst, 0);
				1481	if (dst &&
				1482	inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
				1483	skb_dst_set_noref(skb, dst);
				1484	}
				1485	}
				1486	}
				1487
				1488	/* Packet is added to VJ-style prequeue for processing in process
				1489	* context, if a reader task is waiting. Apparently, this exciting
				1490	* idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
				1491	* failed somewhere. Latency? Burstiness? Well, at least now we will
				1492	* see, why it failed. 8)8) --ANK
				1493	*
				1494	*/
				1495	bool tcp_prequeue(struct sock sk, struct sk_buff skb)
				1496	{
				1497	struct tcp_sock *tp = tcp_sk(sk);
				1498
				1499	if (sysctl_tcp_low_latency \|\| !tp->ucopy.task)
				1500	return false;
				1501
				1502	if (skb->len <= tcp_hdrlen(skb) &&
				1503	skb_queue_len(&tp->ucopy.prequeue) == 0)
				1504	return false;
				1505
				1506	/* Before escaping RCU protected region, we need to take care of skb
				1507	* dst. Prequeue is only enabled for established sockets.
				1508	* For such sockets, we might need the skb dst only to set sk->sk_rx_dst
				1509	* Instead of doing full sk_rx_dst validity here, let's perform
				1510	* an optimistic check.
				1511	*/
				1512	if (likely(sk->sk_rx_dst))
				1513	skb_dst_drop(skb);
				1514	else
				1515	skb_dst_force_safe(skb);
				1516
				1517	__skb_queue_tail(&tp->ucopy.prequeue, skb);
				1518	tp->ucopy.memory += skb->truesize;
				1519	if (tp->ucopy.memory > sk->sk_rcvbuf) {
				1520	struct sk_buff *skb1;
				1521
				1522	BUG_ON(sock_owned_by_user(sk));
				1523
				1524	while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
				1525	sk_backlog_rcv(sk, skb1);
				1526	NET_INC_STATS_BH(sock_net(sk),
				1527	LINUX_MIB_TCPPREQUEUEDROPPED);
				1528	}
				1529
				1530	tp->ucopy.memory = 0;
				1531	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
				1532	wake_up_interruptible_sync_poll(sk_sleep(sk),
				1533	POLLIN \| POLLRDNORM \| POLLRDBAND);
				1534	if (!inet_csk_ack_scheduled(sk))
				1535	inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
				1536	(3 * tcp_rto_min(sk)) / 4,
				1537	TCP_RTO_MAX);
				1538	}
				1539	return true;
				1540	}
				1541	EXPORT_SYMBOL(tcp_prequeue);
				1542
				1543	int tcp_filter(struct sock sk, struct sk_buff skb)
				1544	{
				1545	struct tcphdr th = (struct tcphdr )skb->data;
				1546	unsigned int eaten = skb->len;
				1547	int err;
				1548
				1549	err = sk_filter_trim_cap(sk, skb, th->doff * 4);
				1550	if (!err) {
				1551	eaten -= skb->len;
				1552	TCP_SKB_CB(skb)->end_seq -= eaten;
				1553	}
				1554	return err;
				1555	}
				1556	EXPORT_SYMBOL(tcp_filter);
				1557
				1558	/*
				1559	* From tcp_input.c
				1560	*/
				1561
				1562	int tcp_v4_rcv(struct sk_buff *skb)
				1563	{
				1564	const struct iphdr *iph;
				1565	const struct tcphdr *th;
				1566	struct sock *sk;
				1567	int ret;
				1568	struct net *net = dev_net(skb->dev);
				1569
				1570	if (skb->pkt_type != PACKET_HOST)
				1571	goto discard_it;
				1572
				1573	/* Count it even if it's bad */
				1574	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
				1575
				1576	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
				1577	goto discard_it;
				1578
				1579	th = tcp_hdr(skb);
				1580
				1581	if (th->doff < sizeof(struct tcphdr) / 4)
				1582	goto bad_packet;
				1583	if (!pskb_may_pull(skb, th->doff * 4))
				1584	goto discard_it;
				1585
				1586	/* An explanation is required here, I think.
				1587	* Packet length and doff are validated by header prediction,
				1588	* provided case of th->doff==0 is eliminated.
				1589	* So, we defer the checks. */
				1590
				1591	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
				1592	goto csum_error;
				1593
				1594	th = tcp_hdr(skb);
				1595	iph = ip_hdr(skb);
				1596	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
				1597	* barrier() makes sure compiler wont play fool^Waliasing games.
				1598	*/
				1599	memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
				1600	sizeof(struct inet_skb_parm));
				1601	barrier();
				1602
				1603	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
				1604	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
				1605	skb->len - th->doff * 4);
				1606	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
				1607	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
				1608	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
				1609	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
				1610	TCP_SKB_CB(skb)->sacked = 0;
				1611
				1612	lookup:
				1613	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
				1614	if (!sk)
				1615	goto no_tcp_socket;
				1616
				1617	process:
				1618	if (sk->sk_state == TCP_TIME_WAIT)
				1619	goto do_time_wait;
				1620
				1621	if (sk->sk_state == TCP_NEW_SYN_RECV) {
				1622	struct request_sock *req = inet_reqsk(sk);
				1623	struct sock *nsk;
				1624
				1625	sk = req->rsk_listener;
				1626	if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
				1627	reqsk_put(req);
				1628	goto discard_it;
				1629	}
				1630	if (unlikely(sk->sk_state != TCP_LISTEN)) {
				1631	inet_csk_reqsk_queue_drop_and_put(sk, req);
				1632	goto lookup;
				1633	}
				1634	sock_hold(sk);
				1635	nsk = tcp_check_req(sk, skb, req, false);
				1636	if (!nsk) {
				1637	reqsk_put(req);
				1638	goto discard_and_relse;
				1639	}
				1640	if (nsk == sk) {
				1641	reqsk_put(req);
				1642	} else if (tcp_child_process(sk, nsk, skb)) {
				1643	tcp_v4_send_reset(nsk, skb);
				1644	goto discard_and_relse;
				1645	} else {
				1646	sock_put(sk);
				1647	return 0;
				1648	}
				1649	}
				1650	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
				1651	NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
				1652	goto discard_and_relse;
				1653	}
				1654
				1655	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
				1656	goto discard_and_relse;
				1657
				1658	if (tcp_v4_inbound_md5_hash(sk, skb))
				1659	goto discard_and_relse;
				1660
				1661	nf_reset(skb);
				1662
				1663	if (tcp_filter(sk, skb))
				1664	goto discard_and_relse;
				1665	th = (const struct tcphdr *)skb->data;
				1666	iph = ip_hdr(skb);
				1667
				1668	skb->dev = NULL;
				1669
				1670	if (sk->sk_state == TCP_LISTEN) {
				1671	ret = tcp_v4_do_rcv(sk, skb);
				1672	goto put_and_return;
				1673	}
				1674
				1675	sk_incoming_cpu_update(sk);
				1676
				1677	bh_lock_sock_nested(sk);
				1678	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
				1679	ret = 0;
				1680	if (!sock_owned_by_user(sk)) {
				1681	if (!tcp_prequeue(sk, skb))
				1682	ret = tcp_v4_do_rcv(sk, skb);
				1683	} else if (unlikely(sk_add_backlog(sk, skb,
				1684	sk->sk_rcvbuf + sk->sk_sndbuf))) {
				1685	bh_unlock_sock(sk);
				1686	NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
				1687	goto discard_and_relse;
				1688	}
				1689	bh_unlock_sock(sk);
				1690
				1691	put_and_return:
				1692	sock_put(sk);
				1693
				1694	return ret;
				1695
				1696	no_tcp_socket:
				1697	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
				1698	goto discard_it;
				1699
				1700	if (tcp_checksum_complete(skb)) {
				1701	csum_error:
				1702	TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
				1703	bad_packet:
				1704	TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
				1705	} else {
				1706	tcp_v4_send_reset(NULL, skb);
				1707	}
				1708
				1709	discard_it:
				1710	/* Discard frame. */
				1711	kfree_skb(skb);
				1712	return 0;
				1713
				1714	discard_and_relse:
				1715	sock_put(sk);
				1716	goto discard_it;
				1717
				1718	do_time_wait:
				1719	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
				1720	inet_twsk_put(inet_twsk(sk));
				1721	goto discard_it;
				1722	}
				1723
				1724	if (tcp_checksum_complete(skb)) {
				1725	inet_twsk_put(inet_twsk(sk));
				1726	goto csum_error;
				1727	}
				1728	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
				1729	case TCP_TW_SYN: {
				1730	struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
				1731	&tcp_hashinfo,
				1732	iph->saddr, th->source,
				1733	iph->daddr, th->dest,
				1734	inet_iif(skb));
				1735	if (sk2) {
				1736	inet_twsk_deschedule_put(inet_twsk(sk));
				1737	sk = sk2;
				1738	goto process;
				1739	}
				1740	/* Fall through to ACK */
				1741	}
				1742	case TCP_TW_ACK:
				1743	tcp_v4_timewait_ack(sk, skb);
				1744	break;
				1745	case TCP_TW_RST:
				1746	goto no_tcp_socket;
				1747	case TCP_TW_SUCCESS:;
				1748	}
				1749	goto discard_it;
				1750	}
				1751
				1752	static struct timewait_sock_ops tcp_timewait_sock_ops = {
				1753	.twsk_obj_size = sizeof(struct tcp_timewait_sock),
				1754	.twsk_unique = tcp_twsk_unique,
				1755	.twsk_destructor= tcp_twsk_destructor,
				1756	};
				1757
				1758	void inet_sk_rx_dst_set(struct sock sk, const struct sk_buff skb)
				1759	{
				1760	struct dst_entry *dst = skb_dst(skb);
				1761
				1762	if (dst && dst_hold_safe(dst)) {
				1763	sk->sk_rx_dst = dst;
				1764	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
				1765	}
				1766	}
				1767	EXPORT_SYMBOL(inet_sk_rx_dst_set);
				1768
				1769	const struct inet_connection_sock_af_ops ipv4_specific = {
				1770	.queue_xmit = ip_queue_xmit,
				1771	.send_check = tcp_v4_send_check,
				1772	.rebuild_header = inet_sk_rebuild_header,
				1773	.sk_rx_dst_set = inet_sk_rx_dst_set,
				1774	.conn_request = tcp_v4_conn_request,
				1775	.syn_recv_sock = tcp_v4_syn_recv_sock,
				1776	.net_header_len = sizeof(struct iphdr),
				1777	.setsockopt = ip_setsockopt,
				1778	.getsockopt = ip_getsockopt,
				1779	.addr2sockaddr = inet_csk_addr2sockaddr,
				1780	.sockaddr_len = sizeof(struct sockaddr_in),
				1781	.bind_conflict = inet_csk_bind_conflict,
				1782	#ifdef CONFIG_COMPAT
				1783	.compat_setsockopt = compat_ip_setsockopt,
				1784	.compat_getsockopt = compat_ip_getsockopt,
				1785	#endif
				1786	.mtu_reduced = tcp_v4_mtu_reduced,
				1787	};
				1788	EXPORT_SYMBOL(ipv4_specific);
				1789
				1790	#ifdef CONFIG_TCP_MD5SIG
				1791	static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
				1792	.md5_lookup = tcp_v4_md5_lookup,
				1793	.calc_md5_hash = tcp_v4_md5_hash_skb,
				1794	.md5_parse = tcp_v4_parse_md5_keys,
				1795	};
				1796	#endif
				1797
				1798	/* NOTE: A lot of things set to zero explicitly by call to
				1799	* sk_alloc() so need not be done here.
				1800	*/
				1801	static int tcp_v4_init_sock(struct sock *sk)
				1802	{
				1803	struct inet_connection_sock *icsk = inet_csk(sk);
				1804
				1805	tcp_init_sock(sk);
				1806
				1807	icsk->icsk_af_ops = &ipv4_specific;
				1808
				1809	#ifdef CONFIG_TCP_MD5SIG
				1810	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
				1811	#endif
				1812
				1813	return 0;
				1814	}
				1815
				1816	void tcp_v4_destroy_sock(struct sock *sk)
				1817	{
				1818	struct tcp_sock *tp = tcp_sk(sk);
				1819
				1820	tcp_clear_xmit_timers(sk);
				1821
				1822	tcp_cleanup_congestion_control(sk);
				1823
				1824	/* Cleanup up the write buffer. */
				1825	tcp_write_queue_purge(sk);
				1826
				1827	/* Cleans up our, hopefully empty, out_of_order_queue. */
				1828	__skb_queue_purge(&tp->out_of_order_queue);
				1829
				1830	#ifdef CONFIG_TCP_MD5SIG
				1831	/* Clean up the MD5 key list, if any */
				1832	if (tp->md5sig_info) {
				1833	tcp_clear_md5_list(sk);
				1834	kfree_rcu(tp->md5sig_info, rcu);
				1835	tp->md5sig_info = NULL;
				1836	}
				1837	#endif
				1838
				1839	/* Clean prequeue, it must be empty really */
				1840	__skb_queue_purge(&tp->ucopy.prequeue);
				1841
				1842	/* Clean up a referenced TCP bind bucket. */
				1843	if (inet_csk(sk)->icsk_bind_hash)
				1844	inet_put_port(sk);
				1845
				1846	BUG_ON(tp->fastopen_rsk);
				1847
				1848	/* If socket is aborted during connect operation */
				1849	tcp_free_fastopen_req(tp);
				1850	tcp_saved_syn_free(tp);
				1851
				1852	sk_sockets_allocated_dec(sk);
				1853	sock_release_memcg(sk);
				1854	}
				1855	EXPORT_SYMBOL(tcp_v4_destroy_sock);
				1856
				1857	#ifdef CONFIG_PROC_FS
				1858	/* Proc filesystem TCP sock list dumping. */
				1859
				1860	/*
				1861	* Get next listener socket follow cur. If cur is NULL, get first socket
				1862	* starting from bucket given in st->bucket; when st->bucket is zero the
				1863	* very first socket in the hash table is returned.
				1864	*/
				1865	static void listening_get_next(struct seq_file seq, void *cur)
				1866	{
				1867	struct inet_connection_sock *icsk;
				1868	struct hlist_nulls_node *node;
				1869	struct sock *sk = cur;
				1870	struct inet_listen_hashbucket *ilb;
				1871	struct tcp_iter_state *st = seq->private;
				1872	struct net *net = seq_file_net(seq);
				1873
				1874	if (!sk) {
				1875	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				1876	spin_lock_bh(&ilb->lock);
				1877	sk = sk_nulls_head(&ilb->head);
				1878	st->offset = 0;
				1879	goto get_sk;
				1880	}
				1881	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				1882	++st->num;
				1883	++st->offset;
				1884
				1885	sk = sk_nulls_next(sk);
				1886	get_sk:
				1887	sk_nulls_for_each_from(sk, node) {
				1888	if (!net_eq(sock_net(sk), net))
				1889	continue;
				1890	if (sk->sk_family == st->family) {
				1891	cur = sk;
				1892	goto out;
				1893	}
				1894	icsk = inet_csk(sk);
				1895	}
				1896	spin_unlock_bh(&ilb->lock);
				1897	st->offset = 0;
				1898	if (++st->bucket < INET_LHTABLE_SIZE) {
				1899	ilb = &tcp_hashinfo.listening_hash[st->bucket];
				1900	spin_lock_bh(&ilb->lock);
				1901	sk = sk_nulls_head(&ilb->head);
				1902	goto get_sk;
				1903	}
				1904	cur = NULL;
				1905	out:
				1906	return cur;
				1907	}
				1908
				1909	static void listening_get_idx(struct seq_file seq, loff_t *pos)
				1910	{
				1911	struct tcp_iter_state *st = seq->private;
				1912	void *rc;
				1913
				1914	st->bucket = 0;
				1915	st->offset = 0;
				1916	rc = listening_get_next(seq, NULL);
				1917
				1918	while (rc && *pos) {
				1919	rc = listening_get_next(seq, rc);
				1920	--*pos;
				1921	}
				1922	return rc;
				1923	}
				1924
				1925	static inline bool empty_bucket(const struct tcp_iter_state *st)
				1926	{
				1927	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
				1928	}
				1929
				1930	/*
				1931	* Get first established socket starting from bucket given in st->bucket.
				1932	* If st->bucket is zero, the very first socket in the hash is returned.
				1933	*/
				1934	static void established_get_first(struct seq_file seq)
				1935	{
				1936	struct tcp_iter_state *st = seq->private;
				1937	struct net *net = seq_file_net(seq);
				1938	void *rc = NULL;
				1939
				1940	st->offset = 0;
				1941	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
				1942	struct sock *sk;
				1943	struct hlist_nulls_node *node;
				1944	spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
				1945
				1946	/* Lockless fast path for the common case of empty buckets */
				1947	if (empty_bucket(st))
				1948	continue;
				1949
				1950	spin_lock_bh(lock);
				1951	sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
				1952	if (sk->sk_family != st->family \|\|
				1953	!net_eq(sock_net(sk), net)) {
				1954	continue;
				1955	}
				1956	rc = sk;
				1957	goto out;
				1958	}
				1959	spin_unlock_bh(lock);
				1960	}
				1961	out:
				1962	return rc;
				1963	}
				1964
				1965	static void established_get_next(struct seq_file seq, void *cur)
				1966	{
				1967	struct sock *sk = cur;
				1968	struct hlist_nulls_node *node;
				1969	struct tcp_iter_state *st = seq->private;
				1970	struct net *net = seq_file_net(seq);
				1971
				1972	++st->num;
				1973	++st->offset;
				1974
				1975	sk = sk_nulls_next(sk);
				1976
				1977	sk_nulls_for_each_from(sk, node) {
				1978	if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
				1979	return sk;
				1980	}
				1981
				1982	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				1983	++st->bucket;
				1984	return established_get_first(seq);
				1985	}
				1986
				1987	static void established_get_idx(struct seq_file seq, loff_t pos)
				1988	{
				1989	struct tcp_iter_state *st = seq->private;
				1990	void *rc;
				1991
				1992	st->bucket = 0;
				1993	rc = established_get_first(seq);
				1994
				1995	while (rc && pos) {
				1996	rc = established_get_next(seq, rc);
				1997	--pos;
				1998	}
				1999	return rc;
				2000	}
				2001
				2002	static void tcp_get_idx(struct seq_file seq, loff_t pos)
				2003	{
				2004	void *rc;
				2005	struct tcp_iter_state *st = seq->private;
				2006
				2007	st->state = TCP_SEQ_STATE_LISTENING;
				2008	rc = listening_get_idx(seq, &pos);
				2009
				2010	if (!rc) {
				2011	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2012	rc = established_get_idx(seq, pos);
				2013	}
				2014
				2015	return rc;
				2016	}
				2017
				2018	static void tcp_seek_last_pos(struct seq_file seq)
				2019	{
				2020	struct tcp_iter_state *st = seq->private;
				2021	int offset = st->offset;
				2022	int orig_num = st->num;
				2023	void *rc = NULL;
				2024
				2025	switch (st->state) {
				2026	case TCP_SEQ_STATE_LISTENING:
				2027	if (st->bucket >= INET_LHTABLE_SIZE)
				2028	break;
				2029	st->state = TCP_SEQ_STATE_LISTENING;
				2030	rc = listening_get_next(seq, NULL);
				2031	while (offset-- && rc)
				2032	rc = listening_get_next(seq, rc);
				2033	if (rc)
				2034	break;
				2035	st->bucket = 0;
				2036	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2037	/* Fallthrough */
				2038	case TCP_SEQ_STATE_ESTABLISHED:
				2039	if (st->bucket > tcp_hashinfo.ehash_mask)
				2040	break;
				2041	rc = established_get_first(seq);
				2042	while (offset-- && rc)
				2043	rc = established_get_next(seq, rc);
				2044	}
				2045
				2046	st->num = orig_num;
				2047
				2048	return rc;
				2049	}
				2050
				2051	static void tcp_seq_start(struct seq_file seq, loff_t *pos)
				2052	{
				2053	struct tcp_iter_state *st = seq->private;
				2054	void *rc;
				2055
				2056	if (pos && pos == st->last_pos) {
				2057	rc = tcp_seek_last_pos(seq);
				2058	if (rc)
				2059	goto out;
				2060	}
				2061
				2062	st->state = TCP_SEQ_STATE_LISTENING;
				2063	st->num = 0;
				2064	st->bucket = 0;
				2065	st->offset = 0;
				2066	rc = pos ? tcp_get_idx(seq, pos - 1) : SEQ_START_TOKEN;
				2067
				2068	out:
				2069	st->last_pos = *pos;
				2070	return rc;
				2071	}
				2072
				2073	static void tcp_seq_next(struct seq_file seq, void v, loff_t pos)
				2074	{
				2075	struct tcp_iter_state *st = seq->private;
				2076	void *rc = NULL;
				2077
				2078	if (v == SEQ_START_TOKEN) {
				2079	rc = tcp_get_idx(seq, 0);
				2080	goto out;
				2081	}
				2082
				2083	switch (st->state) {
				2084	case TCP_SEQ_STATE_LISTENING:
				2085	rc = listening_get_next(seq, v);
				2086	if (!rc) {
				2087	st->state = TCP_SEQ_STATE_ESTABLISHED;
				2088	st->bucket = 0;
				2089	st->offset = 0;
				2090	rc = established_get_first(seq);
				2091	}
				2092	break;
				2093	case TCP_SEQ_STATE_ESTABLISHED:
				2094	rc = established_get_next(seq, v);
				2095	break;
				2096	}
				2097	out:
				2098	++*pos;
				2099	st->last_pos = *pos;
				2100	return rc;
				2101	}
				2102
				2103	static void tcp_seq_stop(struct seq_file seq, void v)
				2104	{
				2105	struct tcp_iter_state *st = seq->private;
				2106
				2107	switch (st->state) {
				2108	case TCP_SEQ_STATE_LISTENING:
				2109	if (v != SEQ_START_TOKEN)
				2110	spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
				2111	break;
				2112	case TCP_SEQ_STATE_ESTABLISHED:
				2113	if (v)
				2114	spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
				2115	break;
				2116	}
				2117	}
				2118
				2119	int tcp_seq_open(struct inode inode, struct file file)
				2120	{
				2121	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
				2122	struct tcp_iter_state *s;
				2123	int err;
				2124
				2125	err = seq_open_net(inode, file, &afinfo->seq_ops,
				2126	sizeof(struct tcp_iter_state));
				2127	if (err < 0)
				2128	return err;
				2129
				2130	s = ((struct seq_file *)file->private_data)->private;
				2131	s->family = afinfo->family;
				2132	s->last_pos = 0;
				2133	return 0;
				2134	}
				2135	EXPORT_SYMBOL(tcp_seq_open);
				2136
				2137	int tcp_proc_register(struct net net, struct tcp_seq_afinfo afinfo)
				2138	{
				2139	int rc = 0;
				2140	struct proc_dir_entry *p;
				2141
				2142	afinfo->seq_ops.start = tcp_seq_start;
				2143	afinfo->seq_ops.next = tcp_seq_next;
				2144	afinfo->seq_ops.stop = tcp_seq_stop;
				2145
				2146	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
				2147	afinfo->seq_fops, afinfo);
				2148	if (!p)
				2149	rc = -ENOMEM;
				2150	return rc;
				2151	}
				2152	EXPORT_SYMBOL(tcp_proc_register);
				2153
				2154	void tcp_proc_unregister(struct net net, struct tcp_seq_afinfo afinfo)
				2155	{
				2156	remove_proc_entry(afinfo->name, net->proc_net);
				2157	}
				2158	EXPORT_SYMBOL(tcp_proc_unregister);
				2159
				2160	static void get_openreq4(const struct request_sock *req,
				2161	struct seq_file *f, int i)
				2162	{
				2163	const struct inet_request_sock *ireq = inet_rsk(req);
				2164	long delta = req->rsk_timer.expires - jiffies;
				2165
				2166	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
				2167	" %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
				2168	i,
				2169	ireq->ir_loc_addr,
				2170	ireq->ir_num,
				2171	ireq->ir_rmt_addr,
				2172	ntohs(ireq->ir_rmt_port),
				2173	TCP_SYN_RECV,
				2174	0, 0, /* could print option size, but that is af dependent. */
				2175	1, /* timers active (only the expire timer) */
				2176	jiffies_delta_to_clock_t(delta),
				2177	req->num_timeout,
				2178	from_kuid_munged(seq_user_ns(f),
				2179	sock_i_uid(req->rsk_listener)),
				2180	0, /* non standard timer */
				2181	0, /* open_requests have no inode */
				2182	0,
				2183	req);
				2184	}
				2185
				2186	static void get_tcp4_sock(struct sock sk, struct seq_file f, int i)
				2187	{
				2188	int timer_active;
				2189	unsigned long timer_expires;
				2190	const struct tcp_sock *tp = tcp_sk(sk);
				2191	const struct inet_connection_sock *icsk = inet_csk(sk);
				2192	const struct inet_sock *inet = inet_sk(sk);
				2193	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
				2194	__be32 dest = inet->inet_daddr;
				2195	__be32 src = inet->inet_rcv_saddr;
				2196	__u16 destp = ntohs(inet->inet_dport);
				2197	__u16 srcp = ntohs(inet->inet_sport);
				2198	int rx_queue;
				2199	int state;
				2200
				2201	if (icsk->icsk_pending == ICSK_TIME_RETRANS \|\|
				2202	icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS \|\|
				2203	icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
				2204	timer_active = 1;
				2205	timer_expires = icsk->icsk_timeout;
				2206	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
				2207	timer_active = 4;
				2208	timer_expires = icsk->icsk_timeout;
				2209	} else if (timer_pending(&sk->sk_timer)) {
				2210	timer_active = 2;
				2211	timer_expires = sk->sk_timer.expires;
				2212	} else {
				2213	timer_active = 0;
				2214	timer_expires = jiffies;
				2215	}
				2216
				2217	state = sk_state_load(sk);
				2218	if (state == TCP_LISTEN)
				2219	rx_queue = sk->sk_ack_backlog;
				2220	else
				2221	/* Because we don't lock the socket,
				2222	* we might find a transient negative value.
				2223	*/
				2224	rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
				2225
				2226	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
				2227	"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
				2228	i, src, srcp, dest, destp, state,
				2229	tp->write_seq - tp->snd_una,
				2230	rx_queue,
				2231	timer_active,
				2232	jiffies_delta_to_clock_t(timer_expires - jiffies),
				2233	icsk->icsk_retransmits,
				2234	from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
				2235	icsk->icsk_probes_out,
				2236	sock_i_ino(sk),
				2237	atomic_read(&sk->sk_refcnt), sk,
				2238	jiffies_to_clock_t(icsk->icsk_rto),
				2239	jiffies_to_clock_t(icsk->icsk_ack.ato),
				2240	(icsk->icsk_ack.quick << 1) \| icsk->icsk_ack.pingpong,
				2241	tp->snd_cwnd,
				2242	state == TCP_LISTEN ?
				2243	fastopenq->max_qlen :
				2244	(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
				2245	}
				2246
				2247	static void get_timewait4_sock(const struct inet_timewait_sock *tw,
				2248	struct seq_file *f, int i)
				2249	{
				2250	long delta = tw->tw_timer.expires - jiffies;
				2251	__be32 dest, src;
				2252	__u16 destp, srcp;
				2253
				2254	dest = tw->tw_daddr;
				2255	src = tw->tw_rcv_saddr;
				2256	destp = ntohs(tw->tw_dport);
				2257	srcp = ntohs(tw->tw_sport);
				2258
				2259	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
				2260	" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
				2261	i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
				2262	3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
				2263	atomic_read(&tw->tw_refcnt), tw);
				2264	}
				2265
				2266	#define TMPSZ 150
				2267
				2268	static int tcp4_seq_show(struct seq_file seq, void v)
				2269	{
				2270	struct tcp_iter_state *st;
				2271	struct sock *sk = v;
				2272
				2273	seq_setwidth(seq, TMPSZ - 1);
				2274	if (v == SEQ_START_TOKEN) {
				2275	seq_puts(seq, " sl local_address rem_address st tx_queue "
				2276	"rx_queue tr tm->when retrnsmt uid timeout "
				2277	"inode");
				2278	goto out;
				2279	}
				2280	st = seq->private;
				2281
				2282	if (sk->sk_state == TCP_TIME_WAIT)
				2283	get_timewait4_sock(v, seq, st->num);
				2284	else if (sk->sk_state == TCP_NEW_SYN_RECV)
				2285	get_openreq4(v, seq, st->num);
				2286	else
				2287	get_tcp4_sock(v, seq, st->num);
				2288	out:
				2289	seq_pad(seq, '\n');
				2290	return 0;
				2291	}
				2292
				2293	static const struct file_operations tcp_afinfo_seq_fops = {
				2294	.owner = THIS_MODULE,
				2295	.open = tcp_seq_open,
				2296	.read = seq_read,
				2297	.llseek = seq_lseek,
				2298	.release = seq_release_net
				2299	};
				2300
				2301	static struct tcp_seq_afinfo tcp4_seq_afinfo = {
				2302	.name = "tcp",
				2303	.family = AF_INET,
				2304	.seq_fops = &tcp_afinfo_seq_fops,
				2305	.seq_ops = {
				2306	.show = tcp4_seq_show,
				2307	},
				2308	};
				2309
				2310	static int __net_init tcp4_proc_init_net(struct net *net)
				2311	{
				2312	return tcp_proc_register(net, &tcp4_seq_afinfo);
				2313	}
				2314
				2315	static void __net_exit tcp4_proc_exit_net(struct net *net)
				2316	{
				2317	tcp_proc_unregister(net, &tcp4_seq_afinfo);
				2318	}
				2319
				2320	static struct pernet_operations tcp4_net_ops = {
				2321	.init = tcp4_proc_init_net,
				2322	.exit = tcp4_proc_exit_net,
				2323	};
				2324
				2325	int __init tcp4_proc_init(void)
				2326	{
				2327	return register_pernet_subsys(&tcp4_net_ops);
				2328	}
				2329
				2330	void tcp4_proc_exit(void)
				2331	{
				2332	unregister_pernet_subsys(&tcp4_net_ops);
				2333	}
				2334	#endif /* CONFIG_PROC_FS */
				2335
				2336	struct proto tcp_prot = {
				2337	.name = "TCP",
				2338	.owner = THIS_MODULE,
				2339	.close = tcp_close,
				2340	.connect = tcp_v4_connect,
				2341	.disconnect = tcp_disconnect,
				2342	.accept = inet_csk_accept,
				2343	.ioctl = tcp_ioctl,
				2344	.init = tcp_v4_init_sock,
				2345	.destroy = tcp_v4_destroy_sock,
				2346	.shutdown = tcp_shutdown,
				2347	.setsockopt = tcp_setsockopt,
				2348	.getsockopt = tcp_getsockopt,
				2349	.recvmsg = tcp_recvmsg,
				2350	.sendmsg = tcp_sendmsg,
				2351	.sendpage = tcp_sendpage,
				2352	.backlog_rcv = tcp_v4_do_rcv,
				2353	.release_cb = tcp_release_cb,
				2354	.hash = inet_hash,
				2355	.unhash = inet_unhash,
				2356	.get_port = inet_csk_get_port,
				2357	.enter_memory_pressure = tcp_enter_memory_pressure,
				2358	.stream_memory_free = tcp_stream_memory_free,
				2359	.sockets_allocated = &tcp_sockets_allocated,
				2360	.orphan_count = &tcp_orphan_count,
				2361	.memory_allocated = &tcp_memory_allocated,
				2362	.memory_pressure = &tcp_memory_pressure,
				2363	.sysctl_mem = sysctl_tcp_mem,
				2364	.sysctl_wmem = sysctl_tcp_wmem,
				2365	.sysctl_rmem = sysctl_tcp_rmem,
				2366	.max_header = MAX_TCP_HEADER,
				2367	.obj_size = sizeof(struct tcp_sock),
				2368	.slab_flags = SLAB_DESTROY_BY_RCU,
				2369	.twsk_prot = &tcp_timewait_sock_ops,
				2370	.rsk_prot = &tcp_request_sock_ops,
				2371	.h.hashinfo = &tcp_hashinfo,
				2372	.no_autobind = true,
				2373	#ifdef CONFIG_COMPAT
				2374	.compat_setsockopt = compat_tcp_setsockopt,
				2375	.compat_getsockopt = compat_tcp_getsockopt,
				2376	#endif
				2377	#ifdef CONFIG_MEMCG_KMEM
				2378	.init_cgroup = tcp_init_cgroup,
				2379	.destroy_cgroup = tcp_destroy_cgroup,
				2380	.proto_cgroup = tcp_proto_cgroup,
				2381	#endif
				2382	};
				2383	EXPORT_SYMBOL(tcp_prot);
				2384
				2385	static void __net_exit tcp_sk_exit(struct net *net)
				2386	{
				2387	int cpu;
				2388
				2389	for_each_possible_cpu(cpu)
				2390	inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
				2391	free_percpu(net->ipv4.tcp_sk);
				2392	}
				2393
				2394	static int __net_init tcp_sk_init(struct net *net)
				2395	{
				2396	int res, cpu;
				2397
				2398	net->ipv4.tcp_sk = alloc_percpu(struct sock *);
				2399	if (!net->ipv4.tcp_sk)
				2400	return -ENOMEM;
				2401
				2402	for_each_possible_cpu(cpu) {
				2403	struct sock *sk;
				2404
				2405	res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
				2406	IPPROTO_TCP, net);
				2407	if (res)
				2408	goto fail;
				2409	*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
				2410	}
				2411
				2412	net->ipv4.sysctl_tcp_ecn = 2;
				2413	net->ipv4.sysctl_tcp_ecn_fallback = 1;
				2414
				2415	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
Kyle Swenson	e01461f	2021-03-15 11:14:57 -0600	[diff] [blame]	2416	net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	2417	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
				2418	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
				2419
				2420	return 0;
				2421	fail:
				2422	tcp_sk_exit(net);
				2423
				2424	return res;
				2425	}
				2426
				2427	static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
				2428	{
				2429	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
				2430	}
				2431
				2432	static struct pernet_operations __net_initdata tcp_sk_ops = {
				2433	.init = tcp_sk_init,
				2434	.exit = tcp_sk_exit,
				2435	.exit_batch = tcp_sk_exit_batch,
				2436	};
				2437
				2438	void __init tcp_v4_init(void)
				2439	{
				2440	inet_hashinfo_init(&tcp_hashinfo);
				2441	if (register_pernet_subsys(&tcp_sk_ops))
				2442	panic("Failed to create the TCP control socket.\n");
				2443	}