Blame - net/unix/af_unix.c - codeaurora/cp-linux

blob: e05ec54ac53f263932e9f6122cef532d5769e714 [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/*
				2	* NET4: Implementation of BSD Unix domain sockets.
				3	*
				4	* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	* Fixes:
				12	* Linus Torvalds : Assorted bug cures.
				13	* Niibe Yutaka : async I/O support.
				14	* Carsten Paeth : PF_UNIX check, address fixes.
				15	* Alan Cox : Limit size of allocated blocks.
				16	* Alan Cox : Fixed the stupid socketpair bug.
				17	* Alan Cox : BSD compatibility fine tuning.
				18	* Alan Cox : Fixed a bug in connect when interrupted.
				19	* Alan Cox : Sorted out a proper draft version of
				20	* file descriptor passing hacked up from
				21	* Mike Shaver's work.
				22	* Marty Leisner : Fixes to fd passing
				23	* Nick Nevin : recvmsg bugfix.
				24	* Alan Cox : Started proper garbage collector
				25	* Heiko EiBfeldt : Missing verify_area check
				26	* Alan Cox : Started POSIXisms
				27	* Andreas Schwab : Replace inode by dentry for proper
				28	* reference counting
				29	* Kirk Petersen : Made this a module
				30	* Christoph Rohland : Elegant non-blocking accept/connect algorithm.
				31	* Lots of bug fixes.
				32	* Alexey Kuznetosv : Repaired (I hope) bugs introduces
				33	* by above two patches.
				34	* Andrea Arcangeli : If possible we block in connect(2)
				35	* if the max backlog of the listen socket
				36	* is been reached. This won't break
				37	* old apps and it will avoid huge amount
				38	* of socks hashed (this for unix_gc()
				39	* performances reasons).
				40	* Security fix that limits the max
				41	* number of socks to 2*max_files and
				42	* the number of skb queueable in the
				43	* dgram receiver.
				44	* Artur Skawina : Hash function optimizations
				45	* Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
				46	* Malcolm Beattie : Set peercred for socketpair
				47	* Michal Ostrowski : Module initialization cleanup.
				48	* Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
				49	* the core infrastructure is doing that
				50	* for all net proto families now (2.5.69+)
				51	*
				52	*
				53	* Known differences from reference BSD that was tested:
				54	*
				55	* [TO FIX]
				56	* ECONNREFUSED is not returned from one end of a connected() socket to the
				57	* other the moment one end closes.
				58	* fstat() doesn't return st_dev=0, and give the blksize as high water mark
				59	* and a fake inode identifier (nor the BSD first socket fstat twice bug).
				60	* [NOT TO FIX]
				61	* accept() returns a path name even if the connecting socket has closed
				62	* in the meantime (BSD loses the path and gives up).
				63	* accept() returns 0 length path for an unbound connector. BSD returns 16
				64	* and a null first byte in the path (but not for gethost/peername - BSD bug ??)
				65	* socketpair(...SOCK_RAW..) doesn't panic the kernel.
				66	* BSD af_unix apparently has connect forgetting to block properly.
				67	* (need to check this with the POSIX spec in detail)
				68	*
				69	* Differences from 2.0.0-11-... (ANK)
				70	* Bug fixes and improvements.
				71	* - client shutdown killed server socket.
				72	* - removed all useless cli/sti pairs.
				73	*
				74	* Semantic changes/extensions.
				75	* - generic control message passing.
				76	* - SCM_CREDENTIALS control message.
				77	* - "Abstract" (not FS based) socket bindings.
				78	* Abstract names are sequences of bytes (not zero terminated)
				79	* started by 0, so that this name space does not intersect
				80	* with BSD names.
				81	*/
				82
				83	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				84
				85	#include <linux/module.h>
				86	#include <linux/kernel.h>
				87	#include <linux/signal.h>
				88	#include <linux/sched.h>
				89	#include <linux/errno.h>
				90	#include <linux/string.h>
				91	#include <linux/stat.h>
				92	#include <linux/dcache.h>
				93	#include <linux/namei.h>
				94	#include <linux/socket.h>
				95	#include <linux/un.h>
				96	#include <linux/fcntl.h>
				97	#include <linux/termios.h>
				98	#include <linux/sockios.h>
				99	#include <linux/net.h>
				100	#include <linux/in.h>
				101	#include <linux/fs.h>
				102	#include <linux/slab.h>
				103	#include <asm/uaccess.h>
				104	#include <linux/skbuff.h>
				105	#include <linux/netdevice.h>
				106	#include <net/net_namespace.h>
				107	#include <net/sock.h>
				108	#include <net/tcp_states.h>
				109	#include <net/af_unix.h>
				110	#include <linux/proc_fs.h>
				111	#include <linux/seq_file.h>
				112	#include <net/scm.h>
				113	#include <linux/init.h>
				114	#include <linux/poll.h>
				115	#include <linux/rtnetlink.h>
				116	#include <linux/mount.h>
				117	#include <net/checksum.h>
				118	#include <linux/security.h>
				119	#include <linux/freezer.h>
				120
				121	struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
				122	EXPORT_SYMBOL_GPL(unix_socket_table);
				123	DEFINE_SPINLOCK(unix_table_lock);
				124	EXPORT_SYMBOL_GPL(unix_table_lock);
				125	static atomic_long_t unix_nr_socks;
				126
				127
				128	static struct hlist_head unix_sockets_unbound(void addr)
				129	{
				130	unsigned long hash = (unsigned long)addr;
				131
				132	hash ^= hash >> 16;
				133	hash ^= hash >> 8;
				134	hash %= UNIX_HASH_SIZE;
				135	return &unix_socket_table[UNIX_HASH_SIZE + hash];
				136	}
				137
				138	#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
				139
				140	#ifdef CONFIG_SECURITY_NETWORK
				141	static void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				142	{
				143	UNIXCB(skb).secid = scm->secid;
				144	}
				145
				146	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				147	{
				148	scm->secid = UNIXCB(skb).secid;
				149	}
				150
				151	static inline bool unix_secdata_eq(struct scm_cookie scm, struct sk_buff skb)
				152	{
				153	return (scm->secid == UNIXCB(skb).secid);
				154	}
				155	#else
				156	static inline void unix_get_secdata(struct scm_cookie scm, struct sk_buff skb)
				157	{ }
				158
				159	static inline void unix_set_secdata(struct scm_cookie scm, struct sk_buff skb)
				160	{ }
				161
				162	static inline bool unix_secdata_eq(struct scm_cookie scm, struct sk_buff skb)
				163	{
				164	return true;
				165	}
				166	#endif /* CONFIG_SECURITY_NETWORK */
				167
				168	/*
				169	* SMP locking strategy:
				170	* hash table is protected with spinlock unix_table_lock
				171	* each socket state is protected by separate spin lock.
				172	*/
				173
				174	static inline unsigned int unix_hash_fold(__wsum n)
				175	{
				176	unsigned int hash = (__force unsigned int)csum_fold(n);
				177
				178	hash ^= hash>>8;
				179	return hash&(UNIX_HASH_SIZE-1);
				180	}
				181
				182	#define unix_peer(sk) (unix_sk(sk)->peer)
				183
				184	static inline int unix_our_peer(struct sock sk, struct sock osk)
				185	{
				186	return unix_peer(osk) == sk;
				187	}
				188
				189	static inline int unix_may_send(struct sock sk, struct sock osk)
				190	{
				191	return unix_peer(osk) == NULL \|\| unix_our_peer(sk, osk);
				192	}
				193
				194	static inline int unix_recvq_full(struct sock const *sk)
				195	{
				196	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
				197	}
				198
				199	struct sock unix_peer_get(struct sock s)
				200	{
				201	struct sock *peer;
				202
				203	unix_state_lock(s);
				204	peer = unix_peer(s);
				205	if (peer)
				206	sock_hold(peer);
				207	unix_state_unlock(s);
				208	return peer;
				209	}
				210	EXPORT_SYMBOL_GPL(unix_peer_get);
				211
				212	static inline void unix_release_addr(struct unix_address *addr)
				213	{
				214	if (atomic_dec_and_test(&addr->refcnt))
				215	kfree(addr);
				216	}
				217
				218	/*
				219	* Check unix socket name:
				220	* - should be not zero length.
				221	* - if started by not zero, should be NULL terminated (FS object)
				222	* - if started by zero, it is abstract name.
				223	*/
				224
				225	static int unix_mkname(struct sockaddr_un sunaddr, int len, unsigned int hashp)
				226	{
				227	if (len <= sizeof(short) \|\| len > sizeof(*sunaddr))
				228	return -EINVAL;
				229	if (!sunaddr \|\| sunaddr->sun_family != AF_UNIX)
				230	return -EINVAL;
				231	if (sunaddr->sun_path[0]) {
				232	/*
				233	* This may look like an off by one error but it is a bit more
				234	* subtle. 108 is the longest valid AF_UNIX path for a binding.
				235	* sun_path[108] doesn't as such exist. However in kernel space
				236	* we are guaranteed that it is a valid memory location in our
				237	* kernel address buffer.
				238	*/
				239	((char *)sunaddr)[len] = 0;
				240	len = strlen(sunaddr->sun_path)+1+sizeof(short);
				241	return len;
				242	}
				243
				244	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
				245	return len;
				246	}
				247
				248	static void __unix_remove_socket(struct sock *sk)
				249	{
				250	sk_del_node_init(sk);
				251	}
				252
				253	static void __unix_insert_socket(struct hlist_head list, struct sock sk)
				254	{
				255	WARN_ON(!sk_unhashed(sk));
				256	sk_add_node(sk, list);
				257	}
				258
				259	static inline void unix_remove_socket(struct sock *sk)
				260	{
				261	spin_lock(&unix_table_lock);
				262	__unix_remove_socket(sk);
				263	spin_unlock(&unix_table_lock);
				264	}
				265
				266	static inline void unix_insert_socket(struct hlist_head list, struct sock sk)
				267	{
				268	spin_lock(&unix_table_lock);
				269	__unix_insert_socket(list, sk);
				270	spin_unlock(&unix_table_lock);
				271	}
				272
				273	static struct sock __unix_find_socket_byname(struct net net,
				274	struct sockaddr_un *sunname,
				275	int len, int type, unsigned int hash)
				276	{
				277	struct sock *s;
				278
				279	sk_for_each(s, &unix_socket_table[hash ^ type]) {
				280	struct unix_sock *u = unix_sk(s);
				281
				282	if (!net_eq(sock_net(s), net))
				283	continue;
				284
				285	if (u->addr->len == len &&
				286	!memcmp(u->addr->name, sunname, len))
				287	goto found;
				288	}
				289	s = NULL;
				290	found:
				291	return s;
				292	}
				293
				294	static inline struct sock unix_find_socket_byname(struct net net,
				295	struct sockaddr_un *sunname,
				296	int len, int type,
				297	unsigned int hash)
				298	{
				299	struct sock *s;
				300
				301	spin_lock(&unix_table_lock);
				302	s = __unix_find_socket_byname(net, sunname, len, type, hash);
				303	if (s)
				304	sock_hold(s);
				305	spin_unlock(&unix_table_lock);
				306	return s;
				307	}
				308
				309	static struct sock unix_find_socket_byinode(struct inode i)
				310	{
				311	struct sock *s;
				312
				313	spin_lock(&unix_table_lock);
				314	sk_for_each(s,
				315	&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
				316	struct dentry *dentry = unix_sk(s)->path.dentry;
				317
				318	if (dentry && d_real_inode(dentry) == i) {
				319	sock_hold(s);
				320	goto found;
				321	}
				322	}
				323	s = NULL;
				324	found:
				325	spin_unlock(&unix_table_lock);
				326	return s;
				327	}
				328
				329	/* Support code for asymmetrically connected dgram sockets
				330	*
				331	* If a datagram socket is connected to a socket not itself connected
				332	* to the first socket (eg, /dev/log), clients may only enqueue more
				333	* messages if the present receive queue of the server socket is not
				334	* "too large". This means there's a second writeability condition
				335	* poll and sendmsg need to test. The dgram recv code will do a wake
				336	* up on the peer_wait wait queue of a socket upon reception of a
				337	* datagram which needs to be propagated to sleeping would-be writers
				338	* since these might not have sent anything so far. This can't be
				339	* accomplished via poll_wait because the lifetime of the server
				340	* socket might be less than that of its clients if these break their
				341	* association with it or if the server socket is closed while clients
				342	* are still connected to it and there's no way to inform "a polling
				343	* implementation" that it should let go of a certain wait queue
				344	*
				345	* In order to propagate a wake up, a wait_queue_t of the client
				346	* socket is enqueued on the peer_wait queue of the server socket
				347	* whose wake function does a wake_up on the ordinary client socket
				348	* wait queue. This connection is established whenever a write (or
				349	* poll for write) hit the flow control condition and broken when the
				350	* association to the server socket is dissolved or after a wake up
				351	* was relayed.
				352	*/
				353
				354	static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
				355	void *key)
				356	{
				357	struct unix_sock *u;
				358	wait_queue_head_t *u_sleep;
				359
				360	u = container_of(q, struct unix_sock, peer_wake);
				361
				362	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
				363	q);
				364	u->peer_wake.private = NULL;
				365
				366	/* relaying can only happen while the wq still exists */
				367	u_sleep = sk_sleep(&u->sk);
				368	if (u_sleep)
				369	wake_up_interruptible_poll(u_sleep, key);
				370
				371	return 0;
				372	}
				373
				374	static int unix_dgram_peer_wake_connect(struct sock sk, struct sock other)
				375	{
				376	struct unix_sock u, u_other;
				377	int rc;
				378
				379	u = unix_sk(sk);
				380	u_other = unix_sk(other);
				381	rc = 0;
				382	spin_lock(&u_other->peer_wait.lock);
				383
				384	if (!u->peer_wake.private) {
				385	u->peer_wake.private = other;
				386	__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
				387
				388	rc = 1;
				389	}
				390
				391	spin_unlock(&u_other->peer_wait.lock);
				392	return rc;
				393	}
				394
				395	static void unix_dgram_peer_wake_disconnect(struct sock *sk,
				396	struct sock *other)
				397	{
				398	struct unix_sock u, u_other;
				399
				400	u = unix_sk(sk);
				401	u_other = unix_sk(other);
				402	spin_lock(&u_other->peer_wait.lock);
				403
				404	if (u->peer_wake.private == other) {
				405	__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
				406	u->peer_wake.private = NULL;
				407	}
				408
				409	spin_unlock(&u_other->peer_wait.lock);
				410	}
				411
				412	static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
				413	struct sock *other)
				414	{
				415	unix_dgram_peer_wake_disconnect(sk, other);
				416	wake_up_interruptible_poll(sk_sleep(sk),
				417	POLLOUT \|
				418	POLLWRNORM \|
				419	POLLWRBAND);
				420	}
				421
				422	/* preconditions:
				423	* - unix_peer(sk) == other
				424	* - association is stable
				425	*/
				426	static int unix_dgram_peer_wake_me(struct sock sk, struct sock other)
				427	{
				428	int connected;
				429
				430	connected = unix_dgram_peer_wake_connect(sk, other);
				431
				432	if (unix_recvq_full(other))
				433	return 1;
				434
				435	if (connected)
				436	unix_dgram_peer_wake_disconnect(sk, other);
				437
				438	return 0;
				439	}
				440
				441	static int unix_writable(const struct sock *sk)
				442	{
				443	return sk->sk_state != TCP_LISTEN &&
				444	(atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
				445	}
				446
				447	static void unix_write_space(struct sock *sk)
				448	{
				449	struct socket_wq *wq;
				450
				451	rcu_read_lock();
				452	if (unix_writable(sk)) {
				453	wq = rcu_dereference(sk->sk_wq);
				454	if (wq_has_sleeper(wq))
				455	wake_up_interruptible_sync_poll(&wq->wait,
				456	POLLOUT \| POLLWRNORM \| POLLWRBAND);
				457	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
				458	}
				459	rcu_read_unlock();
				460	}
				461
				462	/* When dgram socket disconnects (or changes its peer), we clear its receive
				463	* queue of packets arrived from previous peer. First, it allows to do
				464	* flow control based only on wmem_alloc; second, sk connected to peer
				465	* may receive messages only from that peer. */
				466	static void unix_dgram_disconnected(struct sock sk, struct sock other)
				467	{
				468	if (!skb_queue_empty(&sk->sk_receive_queue)) {
				469	skb_queue_purge(&sk->sk_receive_queue);
				470	wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
				471
				472	/* If one link of bidirectional dgram pipe is disconnected,
				473	* we signal error. Messages are lost. Do not make this,
				474	* when peer was not connected to us.
				475	*/
				476	if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
				477	other->sk_err = ECONNRESET;
				478	other->sk_error_report(other);
				479	}
				480	}
				481	}
				482
				483	static void unix_sock_destructor(struct sock *sk)
				484	{
				485	struct unix_sock *u = unix_sk(sk);
				486
				487	skb_queue_purge(&sk->sk_receive_queue);
				488
				489	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
				490	WARN_ON(!sk_unhashed(sk));
				491	WARN_ON(sk->sk_socket);
				492	if (!sock_flag(sk, SOCK_DEAD)) {
				493	pr_info("Attempt to release alive unix socket: %p\n", sk);
				494	return;
				495	}
				496
				497	if (u->addr)
				498	unix_release_addr(u->addr);
				499
				500	atomic_long_dec(&unix_nr_socks);
				501	local_bh_disable();
				502	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
				503	local_bh_enable();
				504	#ifdef UNIX_REFCNT_DEBUG
				505	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
				506	atomic_long_read(&unix_nr_socks));
				507	#endif
				508	}
				509
				510	static void unix_release_sock(struct sock *sk, int embrion)
				511	{
				512	struct unix_sock *u = unix_sk(sk);
				513	struct path path;
				514	struct sock *skpair;
				515	struct sk_buff *skb;
				516	int state;
				517
				518	unix_remove_socket(sk);
				519
				520	/* Clear state */
				521	unix_state_lock(sk);
				522	sock_orphan(sk);
				523	sk->sk_shutdown = SHUTDOWN_MASK;
				524	path = u->path;
				525	u->path.dentry = NULL;
				526	u->path.mnt = NULL;
				527	state = sk->sk_state;
				528	sk->sk_state = TCP_CLOSE;
				529	unix_state_unlock(sk);
				530
				531	wake_up_interruptible_all(&u->peer_wait);
				532
				533	skpair = unix_peer(sk);
				534
				535	if (skpair != NULL) {
				536	if (sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) {
				537	unix_state_lock(skpair);
				538	/* No more writes */
				539	skpair->sk_shutdown = SHUTDOWN_MASK;
				540	if (!skb_queue_empty(&sk->sk_receive_queue) \|\| embrion)
				541	skpair->sk_err = ECONNRESET;
				542	unix_state_unlock(skpair);
				543	skpair->sk_state_change(skpair);
				544	sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
				545	}
				546
				547	unix_dgram_peer_wake_disconnect(sk, skpair);
				548	sock_put(skpair); /* It may now die */
				549	unix_peer(sk) = NULL;
				550	}
				551
				552	/* Try to flush out this socket. Throw out buffers at least */
				553
				554	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
				555	if (state == TCP_LISTEN)
				556	unix_release_sock(skb->sk, 1);
				557	/* passed fds are erased in the kfree_skb hook */
				558	UNIXCB(skb).consumed = skb->len;
				559	kfree_skb(skb);
				560	}
				561
				562	if (path.dentry)
				563	path_put(&path);
				564
				565	sock_put(sk);
				566
				567	/* ---- Socket is dead now and most probably destroyed ---- */
				568
				569	/*
				570	* Fixme: BSD difference: In BSD all sockets connected to us get
				571	* ECONNRESET and we die on the spot. In Linux we behave
				572	* like files and pipes do and wait for the last
				573	* dereference.
				574	*
				575	* Can't we simply set sock->err?
				576	*
				577	* What the above comment does talk about? --ANK(980817)
				578	*/
				579
				580	if (unix_tot_inflight)
				581	unix_gc(); /* Garbage collect fds */
				582	}
				583
				584	static void init_peercred(struct sock *sk)
				585	{
				586	put_pid(sk->sk_peer_pid);
				587	if (sk->sk_peer_cred)
				588	put_cred(sk->sk_peer_cred);
				589	sk->sk_peer_pid = get_pid(task_tgid(current));
				590	sk->sk_peer_cred = get_current_cred();
				591	}
				592
				593	static void copy_peercred(struct sock sk, struct sock peersk)
				594	{
				595	put_pid(sk->sk_peer_pid);
				596	if (sk->sk_peer_cred)
				597	put_cred(sk->sk_peer_cred);
				598	sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
				599	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
				600	}
				601
				602	static int unix_listen(struct socket *sock, int backlog)
				603	{
				604	int err;
				605	struct sock *sk = sock->sk;
				606	struct unix_sock *u = unix_sk(sk);
				607	struct pid *old_pid = NULL;
				608
				609	err = -EOPNOTSUPP;
				610	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				611	goto out; /* Only stream/seqpacket sockets accept */
				612	err = -EINVAL;
				613	if (!u->addr)
				614	goto out; /* No listens on an unbound socket */
				615	unix_state_lock(sk);
				616	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
				617	goto out_unlock;
				618	if (backlog > sk->sk_max_ack_backlog)
				619	wake_up_interruptible_all(&u->peer_wait);
				620	sk->sk_max_ack_backlog = backlog;
				621	sk->sk_state = TCP_LISTEN;
				622	/* set credentials so connect can copy them */
				623	init_peercred(sk);
				624	err = 0;
				625
				626	out_unlock:
				627	unix_state_unlock(sk);
				628	put_pid(old_pid);
				629	out:
				630	return err;
				631	}
				632
				633	static int unix_release(struct socket *);
				634	static int unix_bind(struct socket , struct sockaddr , int);
				635	static int unix_stream_connect(struct socket , struct sockaddr ,
				636	int addr_len, int flags);
				637	static int unix_socketpair(struct socket , struct socket );
				638	static int unix_accept(struct socket , struct socket , int);
				639	static int unix_getname(struct socket , struct sockaddr , int *, int);
				640	static unsigned int unix_poll(struct file , struct socket , poll_table *);
				641	static unsigned int unix_dgram_poll(struct file , struct socket ,
				642	poll_table *);
				643	static int unix_ioctl(struct socket *, unsigned int, unsigned long);
				644	static int unix_shutdown(struct socket *, int);
				645	static int unix_stream_sendmsg(struct socket , struct msghdr , size_t);
				646	static int unix_stream_recvmsg(struct socket , struct msghdr , size_t, int);
				647	static ssize_t unix_stream_sendpage(struct socket , struct page , int offset,
				648	size_t size, int flags);
				649	static ssize_t unix_stream_splice_read(struct socket , loff_t ppos,
				650	struct pipe_inode_info *, size_t size,
				651	unsigned int flags);
				652	static int unix_dgram_sendmsg(struct socket , struct msghdr , size_t);
				653	static int unix_dgram_recvmsg(struct socket , struct msghdr , size_t, int);
				654	static int unix_dgram_connect(struct socket , struct sockaddr ,
				655	int, int);
				656	static int unix_seqpacket_sendmsg(struct socket , struct msghdr , size_t);
				657	static int unix_seqpacket_recvmsg(struct socket , struct msghdr , size_t,
				658	int);
				659
				660	static int unix_set_peek_off(struct sock *sk, int val)
				661	{
				662	struct unix_sock *u = unix_sk(sk);
				663
				664	if (mutex_lock_interruptible(&u->iolock))
				665	return -EINTR;
				666
				667	sk->sk_peek_off = val;
				668	mutex_unlock(&u->iolock);
				669
				670	return 0;
				671	}
				672
				673
				674	static const struct proto_ops unix_stream_ops = {
				675	.family = PF_UNIX,
				676	.owner = THIS_MODULE,
				677	.release = unix_release,
				678	.bind = unix_bind,
				679	.connect = unix_stream_connect,
				680	.socketpair = unix_socketpair,
				681	.accept = unix_accept,
				682	.getname = unix_getname,
				683	.poll = unix_poll,
				684	.ioctl = unix_ioctl,
				685	.listen = unix_listen,
				686	.shutdown = unix_shutdown,
				687	.setsockopt = sock_no_setsockopt,
				688	.getsockopt = sock_no_getsockopt,
				689	.sendmsg = unix_stream_sendmsg,
				690	.recvmsg = unix_stream_recvmsg,
				691	.mmap = sock_no_mmap,
				692	.sendpage = unix_stream_sendpage,
				693	.splice_read = unix_stream_splice_read,
				694	.set_peek_off = unix_set_peek_off,
				695	};
				696
				697	static const struct proto_ops unix_dgram_ops = {
				698	.family = PF_UNIX,
				699	.owner = THIS_MODULE,
				700	.release = unix_release,
				701	.bind = unix_bind,
				702	.connect = unix_dgram_connect,
				703	.socketpair = unix_socketpair,
				704	.accept = sock_no_accept,
				705	.getname = unix_getname,
				706	.poll = unix_dgram_poll,
				707	.ioctl = unix_ioctl,
				708	.listen = sock_no_listen,
				709	.shutdown = unix_shutdown,
				710	.setsockopt = sock_no_setsockopt,
				711	.getsockopt = sock_no_getsockopt,
				712	.sendmsg = unix_dgram_sendmsg,
				713	.recvmsg = unix_dgram_recvmsg,
				714	.mmap = sock_no_mmap,
				715	.sendpage = sock_no_sendpage,
				716	.set_peek_off = unix_set_peek_off,
				717	};
				718
				719	static const struct proto_ops unix_seqpacket_ops = {
				720	.family = PF_UNIX,
				721	.owner = THIS_MODULE,
				722	.release = unix_release,
				723	.bind = unix_bind,
				724	.connect = unix_stream_connect,
				725	.socketpair = unix_socketpair,
				726	.accept = unix_accept,
				727	.getname = unix_getname,
				728	.poll = unix_dgram_poll,
				729	.ioctl = unix_ioctl,
				730	.listen = unix_listen,
				731	.shutdown = unix_shutdown,
				732	.setsockopt = sock_no_setsockopt,
				733	.getsockopt = sock_no_getsockopt,
				734	.sendmsg = unix_seqpacket_sendmsg,
				735	.recvmsg = unix_seqpacket_recvmsg,
				736	.mmap = sock_no_mmap,
				737	.sendpage = sock_no_sendpage,
				738	.set_peek_off = unix_set_peek_off,
				739	};
				740
				741	static struct proto unix_proto = {
				742	.name = "UNIX",
				743	.owner = THIS_MODULE,
				744	.obj_size = sizeof(struct unix_sock),
				745	};
				746
				747	/*
				748	* AF_UNIX sockets do not interact with hardware, hence they
				749	* dont trigger interrupts - so it's safe for them to have
				750	* bh-unsafe locking for their sk_receive_queue.lock. Split off
				751	* this special lock-class by reinitializing the spinlock key:
				752	*/
				753	static struct lock_class_key af_unix_sk_receive_queue_lock_key;
				754
				755	static struct sock unix_create1(struct net net, struct socket *sock, int kern)
				756	{
				757	struct sock *sk = NULL;
				758	struct unix_sock *u;
				759
				760	atomic_long_inc(&unix_nr_socks);
				761	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
				762	goto out;
				763
				764	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
				765	if (!sk)
				766	goto out;
				767
				768	sock_init_data(sock, sk);
				769	lockdep_set_class(&sk->sk_receive_queue.lock,
				770	&af_unix_sk_receive_queue_lock_key);
				771
				772	sk->sk_write_space = unix_write_space;
				773	sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
				774	sk->sk_destruct = unix_sock_destructor;
				775	u = unix_sk(sk);
				776	u->path.dentry = NULL;
				777	u->path.mnt = NULL;
				778	spin_lock_init(&u->lock);
				779	atomic_long_set(&u->inflight, 0);
				780	INIT_LIST_HEAD(&u->link);
				781	mutex_init(&u->iolock); /* single task reading lock */
				782	mutex_init(&u->bindlock); /* single task binding lock */
				783	init_waitqueue_head(&u->peer_wait);
				784	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
				785	unix_insert_socket(unix_sockets_unbound(sk), sk);
				786	out:
				787	if (sk == NULL)
				788	atomic_long_dec(&unix_nr_socks);
				789	else {
				790	local_bh_disable();
				791	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
				792	local_bh_enable();
				793	}
				794	return sk;
				795	}
				796
				797	static int unix_create(struct net net, struct socket sock, int protocol,
				798	int kern)
				799	{
				800	if (protocol && protocol != PF_UNIX)
				801	return -EPROTONOSUPPORT;
				802
				803	sock->state = SS_UNCONNECTED;
				804
				805	switch (sock->type) {
				806	case SOCK_STREAM:
				807	sock->ops = &unix_stream_ops;
				808	break;
				809	/*
				810	* Believe it or not BSD has AF_UNIX, SOCK_RAW though
				811	* nothing uses it.
				812	*/
				813	case SOCK_RAW:
				814	sock->type = SOCK_DGRAM;
				815	case SOCK_DGRAM:
				816	sock->ops = &unix_dgram_ops;
				817	break;
				818	case SOCK_SEQPACKET:
				819	sock->ops = &unix_seqpacket_ops;
				820	break;
				821	default:
				822	return -ESOCKTNOSUPPORT;
				823	}
				824
				825	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
				826	}
				827
				828	static int unix_release(struct socket *sock)
				829	{
				830	struct sock *sk = sock->sk;
				831
				832	if (!sk)
				833	return 0;
				834
				835	unix_release_sock(sk, 0);
				836	sock->sk = NULL;
				837
				838	return 0;
				839	}
				840
				841	static int unix_autobind(struct socket *sock)
				842	{
				843	struct sock *sk = sock->sk;
				844	struct net *net = sock_net(sk);
				845	struct unix_sock *u = unix_sk(sk);
				846	static u32 ordernum = 1;
				847	struct unix_address *addr;
				848	int err;
				849	unsigned int retries = 0;
				850
				851	err = mutex_lock_interruptible(&u->bindlock);
				852	if (err)
				853	return err;
				854
				855	err = 0;
				856	if (u->addr)
				857	goto out;
				858
				859	err = -ENOMEM;
				860	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
				861	if (!addr)
				862	goto out;
				863
				864	addr->name->sun_family = AF_UNIX;
				865	atomic_set(&addr->refcnt, 1);
				866
				867	retry:
				868	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
				869	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
				870
				871	spin_lock(&unix_table_lock);
				872	ordernum = (ordernum+1)&0xFFFFF;
				873
				874	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
				875	addr->hash)) {
				876	spin_unlock(&unix_table_lock);
				877	/*
				878	* __unix_find_socket_byname() may take long time if many names
				879	* are already in use.
				880	*/
				881	cond_resched();
				882	/* Give up if all names seems to be in use. */
				883	if (retries++ == 0xFFFFF) {
				884	err = -ENOSPC;
				885	kfree(addr);
				886	goto out;
				887	}
				888	goto retry;
				889	}
				890	addr->hash ^= sk->sk_type;
				891
				892	__unix_remove_socket(sk);
				893	u->addr = addr;
				894	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
				895	spin_unlock(&unix_table_lock);
				896	err = 0;
				897
				898	out: mutex_unlock(&u->bindlock);
				899	return err;
				900	}
				901
				902	static struct sock unix_find_other(struct net net,
				903	struct sockaddr_un *sunname, int len,
				904	int type, unsigned int hash, int *error)
				905	{
				906	struct sock *u;
				907	struct path path;
				908	int err = 0;
				909
				910	if (sunname->sun_path[0]) {
				911	struct inode *inode;
				912	err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
				913	if (err)
				914	goto fail;
				915	inode = d_real_inode(path.dentry);
				916	err = inode_permission(inode, MAY_WRITE);
				917	if (err)
				918	goto put_fail;
				919
				920	err = -ECONNREFUSED;
				921	if (!S_ISSOCK(inode->i_mode))
				922	goto put_fail;
				923	u = unix_find_socket_byinode(inode);
				924	if (!u)
				925	goto put_fail;
				926
				927	if (u->sk_type == type)
				928	touch_atime(&path);
				929
				930	path_put(&path);
				931
				932	err = -EPROTOTYPE;
				933	if (u->sk_type != type) {
				934	sock_put(u);
				935	goto fail;
				936	}
				937	} else {
				938	err = -ECONNREFUSED;
				939	u = unix_find_socket_byname(net, sunname, len, type, hash);
				940	if (u) {
				941	struct dentry *dentry;
				942	dentry = unix_sk(u)->path.dentry;
				943	if (dentry)
				944	touch_atime(&unix_sk(u)->path);
				945	} else
				946	goto fail;
				947	}
				948	return u;
				949
				950	put_fail:
				951	path_put(&path);
				952	fail:
				953	*error = err;
				954	return NULL;
				955	}
				956
				957	static int unix_mknod(const char sun_path, umode_t mode, struct path res)
				958	{
				959	struct dentry *dentry;
				960	struct path path;
				961	int err = 0;
				962	/*
				963	* Get the parent directory, calculate the hash for last
				964	* component.
				965	*/
				966	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
				967	err = PTR_ERR(dentry);
				968	if (IS_ERR(dentry))
				969	return err;
				970
				971	/*
				972	* All right, let's create it.
				973	*/
				974	err = security_path_mknod(&path, dentry, mode, 0);
				975	if (!err) {
				976	err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
				977	if (!err) {
				978	res->mnt = mntget(path.mnt);
				979	res->dentry = dget(dentry);
				980	}
				981	}
				982	done_path_create(&path, dentry);
				983	return err;
				984	}
				985
				986	static int unix_bind(struct socket sock, struct sockaddr uaddr, int addr_len)
				987	{
				988	struct sock *sk = sock->sk;
				989	struct net *net = sock_net(sk);
				990	struct unix_sock *u = unix_sk(sk);
				991	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				992	char *sun_path = sunaddr->sun_path;
				993	int err;
				994	unsigned int hash;
				995	struct unix_address *addr;
				996	struct hlist_head *list;
				997	struct path path = { NULL, NULL };
				998
				999	err = -EINVAL;
				1000	if (addr_len < offsetofend(struct sockaddr_un, sun_family) \|\|
				1001	sunaddr->sun_family != AF_UNIX)
				1002	goto out;
				1003
				1004	if (addr_len == sizeof(short)) {
				1005	err = unix_autobind(sock);
				1006	goto out;
				1007	}
				1008
				1009	err = unix_mkname(sunaddr, addr_len, &hash);
				1010	if (err < 0)
				1011	goto out;
				1012	addr_len = err;
				1013
				1014	if (sun_path[0]) {
				1015	umode_t mode = S_IFSOCK \|
				1016	(SOCK_INODE(sock)->i_mode & ~current_umask());
				1017	err = unix_mknod(sun_path, mode, &path);
				1018	if (err) {
				1019	if (err == -EEXIST)
				1020	err = -EADDRINUSE;
				1021	goto out;
				1022	}
				1023	}
				1024
				1025	err = mutex_lock_interruptible(&u->bindlock);
				1026	if (err)
				1027	goto out_put;
				1028
				1029	err = -EINVAL;
				1030	if (u->addr)
				1031	goto out_up;
				1032
				1033	err = -ENOMEM;
				1034	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
				1035	if (!addr)
				1036	goto out_up;
				1037
				1038	memcpy(addr->name, sunaddr, addr_len);
				1039	addr->len = addr_len;
				1040	addr->hash = hash ^ sk->sk_type;
				1041	atomic_set(&addr->refcnt, 1);
				1042
				1043	if (sun_path[0]) {
				1044	addr->hash = UNIX_HASH_SIZE;
				1045	hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
				1046	spin_lock(&unix_table_lock);
				1047	u->path = path;
				1048	list = &unix_socket_table[hash];
				1049	} else {
				1050	spin_lock(&unix_table_lock);
				1051	err = -EADDRINUSE;
				1052	if (__unix_find_socket_byname(net, sunaddr, addr_len,
				1053	sk->sk_type, hash)) {
				1054	unix_release_addr(addr);
				1055	goto out_unlock;
				1056	}
				1057
				1058	list = &unix_socket_table[addr->hash];
				1059	}
				1060
				1061	err = 0;
				1062	__unix_remove_socket(sk);
				1063	u->addr = addr;
				1064	__unix_insert_socket(list, sk);
				1065
				1066	out_unlock:
				1067	spin_unlock(&unix_table_lock);
				1068	out_up:
				1069	mutex_unlock(&u->bindlock);
				1070	out_put:
				1071	if (err)
				1072	path_put(&path);
				1073	out:
				1074	return err;
				1075	}
				1076
				1077	static void unix_state_double_lock(struct sock sk1, struct sock sk2)
				1078	{
				1079	if (unlikely(sk1 == sk2) \|\| !sk2) {
				1080	unix_state_lock(sk1);
				1081	return;
				1082	}
				1083	if (sk1 < sk2) {
				1084	unix_state_lock(sk1);
				1085	unix_state_lock_nested(sk2);
				1086	} else {
				1087	unix_state_lock(sk2);
				1088	unix_state_lock_nested(sk1);
				1089	}
				1090	}
				1091
				1092	static void unix_state_double_unlock(struct sock sk1, struct sock sk2)
				1093	{
				1094	if (unlikely(sk1 == sk2) \|\| !sk2) {
				1095	unix_state_unlock(sk1);
				1096	return;
				1097	}
				1098	unix_state_unlock(sk1);
				1099	unix_state_unlock(sk2);
				1100	}
				1101
				1102	static int unix_dgram_connect(struct socket sock, struct sockaddr addr,
				1103	int alen, int flags)
				1104	{
				1105	struct sock *sk = sock->sk;
				1106	struct net *net = sock_net(sk);
				1107	struct sockaddr_un sunaddr = (struct sockaddr_un )addr;
				1108	struct sock *other;
				1109	unsigned int hash;
				1110	int err;
				1111
				1112	err = -EINVAL;
				1113	if (alen < offsetofend(struct sockaddr, sa_family))
				1114	goto out;
				1115
				1116	if (addr->sa_family != AF_UNSPEC) {
				1117	err = unix_mkname(sunaddr, alen, &hash);
				1118	if (err < 0)
				1119	goto out;
				1120	alen = err;
				1121
				1122	if (test_bit(SOCK_PASSCRED, &sock->flags) &&
				1123	!unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
				1124	goto out;
				1125
				1126	restart:
				1127	other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
				1128	if (!other)
				1129	goto out;
				1130
				1131	unix_state_double_lock(sk, other);
				1132
				1133	/* Apparently VFS overslept socket death. Retry. */
				1134	if (sock_flag(other, SOCK_DEAD)) {
				1135	unix_state_double_unlock(sk, other);
				1136	sock_put(other);
				1137	goto restart;
				1138	}
				1139
				1140	err = -EPERM;
				1141	if (!unix_may_send(sk, other))
				1142	goto out_unlock;
				1143
				1144	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1145	if (err)
				1146	goto out_unlock;
				1147
				1148	} else {
				1149	/*
				1150	* 1003.1g breaking connected state with AF_UNSPEC
				1151	*/
				1152	other = NULL;
				1153	unix_state_double_lock(sk, other);
				1154	}
				1155
				1156	/*
				1157	* If it was connected, reconnect.
				1158	*/
				1159	if (unix_peer(sk)) {
				1160	struct sock *old_peer = unix_peer(sk);
				1161	unix_peer(sk) = other;
				1162	unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
				1163
				1164	unix_state_double_unlock(sk, other);
				1165
				1166	if (other != old_peer)
				1167	unix_dgram_disconnected(sk, old_peer);
				1168	sock_put(old_peer);
				1169	} else {
				1170	unix_peer(sk) = other;
				1171	unix_state_double_unlock(sk, other);
				1172	}
				1173	return 0;
				1174
				1175	out_unlock:
				1176	unix_state_double_unlock(sk, other);
				1177	sock_put(other);
				1178	out:
				1179	return err;
				1180	}
				1181
				1182	static long unix_wait_for_peer(struct sock *other, long timeo)
				1183	{
				1184	struct unix_sock *u = unix_sk(other);
				1185	int sched;
				1186	DEFINE_WAIT(wait);
				1187
				1188	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
				1189
				1190	sched = !sock_flag(other, SOCK_DEAD) &&
				1191	!(other->sk_shutdown & RCV_SHUTDOWN) &&
				1192	unix_recvq_full(other);
				1193
				1194	unix_state_unlock(other);
				1195
				1196	if (sched)
				1197	timeo = schedule_timeout(timeo);
				1198
				1199	finish_wait(&u->peer_wait, &wait);
				1200	return timeo;
				1201	}
				1202
				1203	static int unix_stream_connect(struct socket sock, struct sockaddr uaddr,
				1204	int addr_len, int flags)
				1205	{
				1206	struct sockaddr_un sunaddr = (struct sockaddr_un )uaddr;
				1207	struct sock *sk = sock->sk;
				1208	struct net *net = sock_net(sk);
				1209	struct unix_sock u = unix_sk(sk), newu, *otheru;
				1210	struct sock *newsk = NULL;
				1211	struct sock *other = NULL;
				1212	struct sk_buff *skb = NULL;
				1213	unsigned int hash;
				1214	int st;
				1215	int err;
				1216	long timeo;
				1217
				1218	err = unix_mkname(sunaddr, addr_len, &hash);
				1219	if (err < 0)
				1220	goto out;
				1221	addr_len = err;
				1222
				1223	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
				1224	(err = unix_autobind(sock)) != 0)
				1225	goto out;
				1226
				1227	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
				1228
				1229	/* First of all allocate resources.
				1230	If we will make it after state is locked,
				1231	we will have to recheck all again in any case.
				1232	*/
				1233
				1234	err = -ENOMEM;
				1235
				1236	/* create new sock for complete connection */
				1237	newsk = unix_create1(sock_net(sk), NULL, 0);
				1238	if (newsk == NULL)
				1239	goto out;
				1240
				1241	/* Allocate skb for sending to listening sock */
				1242	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
				1243	if (skb == NULL)
				1244	goto out;
				1245
				1246	restart:
				1247	/* Find listening sock. */
				1248	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
				1249	if (!other)
				1250	goto out;
				1251
				1252	/* Latch state of peer */
				1253	unix_state_lock(other);
				1254
				1255	/* Apparently VFS overslept socket death. Retry. */
				1256	if (sock_flag(other, SOCK_DEAD)) {
				1257	unix_state_unlock(other);
				1258	sock_put(other);
				1259	goto restart;
				1260	}
				1261
				1262	err = -ECONNREFUSED;
				1263	if (other->sk_state != TCP_LISTEN)
				1264	goto out_unlock;
				1265	if (other->sk_shutdown & RCV_SHUTDOWN)
				1266	goto out_unlock;
				1267
				1268	if (unix_recvq_full(other)) {
				1269	err = -EAGAIN;
				1270	if (!timeo)
				1271	goto out_unlock;
				1272
				1273	timeo = unix_wait_for_peer(other, timeo);
				1274
				1275	err = sock_intr_errno(timeo);
				1276	if (signal_pending(current))
				1277	goto out;
				1278	sock_put(other);
				1279	goto restart;
				1280	}
				1281
				1282	/* Latch our state.
				1283
				1284	It is tricky place. We need to grab our state lock and cannot
				1285	drop lock on peer. It is dangerous because deadlock is
				1286	possible. Connect to self case and simultaneous
				1287	attempt to connect are eliminated by checking socket
				1288	state. other is TCP_LISTEN, if sk is TCP_LISTEN we
				1289	check this before attempt to grab lock.
				1290
				1291	Well, and we have to recheck the state after socket locked.
				1292	*/
				1293	st = sk->sk_state;
				1294
				1295	switch (st) {
				1296	case TCP_CLOSE:
				1297	/* This is ok... continue with connect */
				1298	break;
				1299	case TCP_ESTABLISHED:
				1300	/* Socket is already connected */
				1301	err = -EISCONN;
				1302	goto out_unlock;
				1303	default:
				1304	err = -EINVAL;
				1305	goto out_unlock;
				1306	}
				1307
				1308	unix_state_lock_nested(sk);
				1309
				1310	if (sk->sk_state != st) {
				1311	unix_state_unlock(sk);
				1312	unix_state_unlock(other);
				1313	sock_put(other);
				1314	goto restart;
				1315	}
				1316
				1317	err = security_unix_stream_connect(sk, other, newsk);
				1318	if (err) {
				1319	unix_state_unlock(sk);
				1320	goto out_unlock;
				1321	}
				1322
				1323	/* The way is open! Fastly set all the necessary fields... */
				1324
				1325	sock_hold(sk);
				1326	unix_peer(newsk) = sk;
				1327	newsk->sk_state = TCP_ESTABLISHED;
				1328	newsk->sk_type = sk->sk_type;
				1329	init_peercred(newsk);
				1330	newu = unix_sk(newsk);
				1331	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
				1332	otheru = unix_sk(other);
				1333
				1334	/* copy address information from listening to new sock*/
				1335	if (otheru->addr) {
				1336	atomic_inc(&otheru->addr->refcnt);
				1337	newu->addr = otheru->addr;
				1338	}
				1339	if (otheru->path.dentry) {
				1340	path_get(&otheru->path);
				1341	newu->path = otheru->path;
				1342	}
				1343
				1344	/* Set credentials */
				1345	copy_peercred(sk, other);
				1346
				1347	sock->state = SS_CONNECTED;
				1348	sk->sk_state = TCP_ESTABLISHED;
				1349	sock_hold(newsk);
				1350
				1351	smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
				1352	unix_peer(sk) = newsk;
				1353
				1354	unix_state_unlock(sk);
				1355
				1356	/* take ten and and send info to listening sock */
				1357	spin_lock(&other->sk_receive_queue.lock);
				1358	__skb_queue_tail(&other->sk_receive_queue, skb);
				1359	spin_unlock(&other->sk_receive_queue.lock);
				1360	unix_state_unlock(other);
				1361	other->sk_data_ready(other);
				1362	sock_put(other);
				1363	return 0;
				1364
				1365	out_unlock:
				1366	if (other)
				1367	unix_state_unlock(other);
				1368
				1369	out:
				1370	kfree_skb(skb);
				1371	if (newsk)
				1372	unix_release_sock(newsk, 0);
				1373	if (other)
				1374	sock_put(other);
				1375	return err;
				1376	}
				1377
				1378	static int unix_socketpair(struct socket socka, struct socket sockb)
				1379	{
				1380	struct sock ska = socka->sk, skb = sockb->sk;
				1381
				1382	/* Join our sockets back to back */
				1383	sock_hold(ska);
				1384	sock_hold(skb);
				1385	unix_peer(ska) = skb;
				1386	unix_peer(skb) = ska;
				1387	init_peercred(ska);
				1388	init_peercred(skb);
				1389
				1390	if (ska->sk_type != SOCK_DGRAM) {
				1391	ska->sk_state = TCP_ESTABLISHED;
				1392	skb->sk_state = TCP_ESTABLISHED;
				1393	socka->state = SS_CONNECTED;
				1394	sockb->state = SS_CONNECTED;
				1395	}
				1396	return 0;
				1397	}
				1398
				1399	static void unix_sock_inherit_flags(const struct socket *old,
				1400	struct socket *new)
				1401	{
				1402	if (test_bit(SOCK_PASSCRED, &old->flags))
				1403	set_bit(SOCK_PASSCRED, &new->flags);
				1404	if (test_bit(SOCK_PASSSEC, &old->flags))
				1405	set_bit(SOCK_PASSSEC, &new->flags);
				1406	}
				1407
				1408	static int unix_accept(struct socket sock, struct socket newsock, int flags)
				1409	{
				1410	struct sock *sk = sock->sk;
				1411	struct sock *tsk;
				1412	struct sk_buff *skb;
				1413	int err;
				1414
				1415	err = -EOPNOTSUPP;
				1416	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
				1417	goto out;
				1418
				1419	err = -EINVAL;
				1420	if (sk->sk_state != TCP_LISTEN)
				1421	goto out;
				1422
				1423	/* If socket state is TCP_LISTEN it cannot change (for now...),
				1424	* so that no locks are necessary.
				1425	*/
				1426
				1427	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
				1428	if (!skb) {
				1429	/* This means receive shutdown. */
				1430	if (err == 0)
				1431	err = -EINVAL;
				1432	goto out;
				1433	}
				1434
				1435	tsk = skb->sk;
				1436	skb_free_datagram(sk, skb);
				1437	wake_up_interruptible(&unix_sk(sk)->peer_wait);
				1438
				1439	/* attach accepted sock to socket */
				1440	unix_state_lock(tsk);
				1441	newsock->state = SS_CONNECTED;
				1442	unix_sock_inherit_flags(sock, newsock);
				1443	sock_graft(tsk, newsock);
				1444	unix_state_unlock(tsk);
				1445	return 0;
				1446
				1447	out:
				1448	return err;
				1449	}
				1450
				1451
				1452	static int unix_getname(struct socket sock, struct sockaddr uaddr, int *uaddr_len, int peer)
				1453	{
				1454	struct sock *sk = sock->sk;
				1455	struct unix_sock *u;
				1456	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
				1457	int err = 0;
				1458
				1459	if (peer) {
				1460	sk = unix_peer_get(sk);
				1461
				1462	err = -ENOTCONN;
				1463	if (!sk)
				1464	goto out;
				1465	err = 0;
				1466	} else {
				1467	sock_hold(sk);
				1468	}
				1469
				1470	u = unix_sk(sk);
				1471	unix_state_lock(sk);
				1472	if (!u->addr) {
				1473	sunaddr->sun_family = AF_UNIX;
				1474	sunaddr->sun_path[0] = 0;
				1475	*uaddr_len = sizeof(short);
				1476	} else {
				1477	struct unix_address *addr = u->addr;
				1478
				1479	*uaddr_len = addr->len;
				1480	memcpy(sunaddr, addr->name, *uaddr_len);
				1481	}
				1482	unix_state_unlock(sk);
				1483	sock_put(sk);
				1484	out:
				1485	return err;
				1486	}
				1487
				1488	static void unix_detach_fds(struct scm_cookie scm, struct sk_buff skb)
				1489	{
				1490	int i;
				1491
				1492	scm->fp = UNIXCB(skb).fp;
				1493	UNIXCB(skb).fp = NULL;
				1494
				1495	for (i = scm->fp->count-1; i >= 0; i--)
				1496	unix_notinflight(scm->fp->user, scm->fp->fp[i]);
				1497	}
				1498
				1499	static void unix_destruct_scm(struct sk_buff *skb)
				1500	{
				1501	struct scm_cookie scm;
				1502	memset(&scm, 0, sizeof(scm));
				1503	scm.pid = UNIXCB(skb).pid;
				1504	if (UNIXCB(skb).fp)
				1505	unix_detach_fds(&scm, skb);
				1506
				1507	/* Alas, it calls VFS */
				1508	/* So fscking what? fput() had been SMP-safe since the last Summer */
				1509	scm_destroy(&scm);
				1510	sock_wfree(skb);
				1511	}
				1512
				1513	/*
				1514	* The "user->unix_inflight" variable is protected by the garbage
				1515	* collection lock, and we just read it locklessly here. If you go
				1516	* over the limit, there might be a tiny race in actually noticing
				1517	* it across threads. Tough.
				1518	*/
				1519	static inline bool too_many_unix_fds(struct task_struct *p)
				1520	{
				1521	struct user_struct *user = current_user();
				1522
				1523	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
				1524	return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
				1525	return false;
				1526	}
				1527
				1528	#define MAX_RECURSION_LEVEL 4
				1529
				1530	static int unix_attach_fds(struct scm_cookie scm, struct sk_buff skb)
				1531	{
				1532	int i;
				1533	unsigned char max_level = 0;
				1534	int unix_sock_count = 0;
				1535
				1536	if (too_many_unix_fds(current))
				1537	return -ETOOMANYREFS;
				1538
				1539	for (i = scm->fp->count - 1; i >= 0; i--) {
				1540	struct sock *sk = unix_get_socket(scm->fp->fp[i]);
				1541
				1542	if (sk) {
				1543	unix_sock_count++;
				1544	max_level = max(max_level,
				1545	unix_sk(sk)->recursion_level);
				1546	}
				1547	}
				1548	if (unlikely(max_level > MAX_RECURSION_LEVEL))
				1549	return -ETOOMANYREFS;
				1550
				1551	/*
				1552	* Need to duplicate file references for the sake of garbage
				1553	* collection. Otherwise a socket in the fps might become a
				1554	* candidate for GC while the skb is not yet queued.
				1555	*/
				1556	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
				1557	if (!UNIXCB(skb).fp)
				1558	return -ENOMEM;
				1559
				1560	for (i = scm->fp->count - 1; i >= 0; i--)
				1561	unix_inflight(scm->fp->user, scm->fp->fp[i]);
				1562	return max_level;
				1563	}
				1564
				1565	static int unix_scm_to_skb(struct scm_cookie scm, struct sk_buff skb, bool send_fds)
				1566	{
				1567	int err = 0;
				1568
				1569	UNIXCB(skb).pid = get_pid(scm->pid);
				1570	UNIXCB(skb).uid = scm->creds.uid;
				1571	UNIXCB(skb).gid = scm->creds.gid;
				1572	UNIXCB(skb).fp = NULL;
				1573	unix_get_secdata(scm, skb);
				1574	if (scm->fp && send_fds)
				1575	err = unix_attach_fds(scm, skb);
				1576
				1577	skb->destructor = unix_destruct_scm;
				1578	return err;
				1579	}
				1580
				1581	static bool unix_passcred_enabled(const struct socket *sock,
				1582	const struct sock *other)
				1583	{
				1584	return test_bit(SOCK_PASSCRED, &sock->flags) \|\|
				1585	!other->sk_socket \|\|
				1586	test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
				1587	}
				1588
				1589	/*
				1590	* Some apps rely on write() giving SCM_CREDENTIALS
				1591	* We include credentials if source or destination socket
				1592	* asserted SOCK_PASSCRED.
				1593	*/
				1594	static void maybe_add_creds(struct sk_buff skb, const struct socket sock,
				1595	const struct sock *other)
				1596	{
				1597	if (UNIXCB(skb).pid)
				1598	return;
				1599	if (unix_passcred_enabled(sock, other)) {
				1600	UNIXCB(skb).pid = get_pid(task_tgid(current));
				1601	current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
				1602	}
				1603	}
				1604
				1605	static int maybe_init_creds(struct scm_cookie *scm,
				1606	struct socket *socket,
				1607	const struct sock *other)
				1608	{
				1609	int err;
				1610	struct msghdr msg = { .msg_controllen = 0 };
				1611
				1612	err = scm_send(socket, &msg, scm, false);
				1613	if (err)
				1614	return err;
				1615
				1616	if (unix_passcred_enabled(socket, other)) {
				1617	scm->pid = get_pid(task_tgid(current));
				1618	current_uid_gid(&scm->creds.uid, &scm->creds.gid);
				1619	}
				1620	return err;
				1621	}
				1622
				1623	static bool unix_skb_scm_eq(struct sk_buff *skb,
				1624	struct scm_cookie *scm)
				1625	{
				1626	const struct unix_skb_parms *u = &UNIXCB(skb);
				1627
				1628	return u->pid == scm->pid &&
				1629	uid_eq(u->uid, scm->creds.uid) &&
				1630	gid_eq(u->gid, scm->creds.gid) &&
				1631	unix_secdata_eq(scm, skb);
				1632	}
				1633
				1634	/*
				1635	* Send AF_UNIX data.
				1636	*/
				1637
				1638	static int unix_dgram_sendmsg(struct socket sock, struct msghdr msg,
				1639	size_t len)
				1640	{
				1641	struct sock *sk = sock->sk;
				1642	struct net *net = sock_net(sk);
				1643	struct unix_sock *u = unix_sk(sk);
				1644	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
				1645	struct sock *other = NULL;
				1646	int namelen = 0; /* fake GCC */
				1647	int err;
				1648	unsigned int hash;
				1649	struct sk_buff *skb;
				1650	long timeo;
				1651	struct scm_cookie scm;
				1652	int max_level;
				1653	int data_len = 0;
				1654	int sk_locked;
				1655
				1656	wait_for_unix_gc();
				1657	err = scm_send(sock, msg, &scm, false);
				1658	if (err < 0)
				1659	return err;
				1660
				1661	err = -EOPNOTSUPP;
				1662	if (msg->msg_flags&MSG_OOB)
				1663	goto out;
				1664
				1665	if (msg->msg_namelen) {
				1666	err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
				1667	if (err < 0)
				1668	goto out;
				1669	namelen = err;
				1670	} else {
				1671	sunaddr = NULL;
				1672	err = -ENOTCONN;
				1673	other = unix_peer_get(sk);
				1674	if (!other)
				1675	goto out;
				1676	}
				1677
				1678	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
				1679	&& (err = unix_autobind(sock)) != 0)
				1680	goto out;
				1681
				1682	err = -EMSGSIZE;
				1683	if (len > sk->sk_sndbuf - 32)
				1684	goto out;
				1685
				1686	if (len > SKB_MAX_ALLOC) {
				1687	data_len = min_t(size_t,
				1688	len - SKB_MAX_ALLOC,
				1689	MAX_SKB_FRAGS * PAGE_SIZE);
				1690	data_len = PAGE_ALIGN(data_len);
				1691
				1692	BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
				1693	}
				1694
				1695	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
				1696	msg->msg_flags & MSG_DONTWAIT, &err,
				1697	PAGE_ALLOC_COSTLY_ORDER);
				1698	if (skb == NULL)
				1699	goto out;
				1700
				1701	err = unix_scm_to_skb(&scm, skb, true);
				1702	if (err < 0)
				1703	goto out_free;
				1704	max_level = err + 1;
				1705
				1706	skb_put(skb, len - data_len);
				1707	skb->data_len = data_len;
				1708	skb->len = len;
				1709	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
				1710	if (err)
				1711	goto out_free;
				1712
				1713	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
				1714
				1715	restart:
				1716	if (!other) {
				1717	err = -ECONNRESET;
				1718	if (sunaddr == NULL)
				1719	goto out_free;
				1720
				1721	other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
				1722	hash, &err);
				1723	if (other == NULL)
				1724	goto out_free;
				1725	}
				1726
				1727	if (sk_filter(other, skb) < 0) {
				1728	/* Toss the packet but do not return any error to the sender */
				1729	err = len;
				1730	goto out_free;
				1731	}
				1732
				1733	sk_locked = 0;
				1734	unix_state_lock(other);
				1735	restart_locked:
				1736	err = -EPERM;
				1737	if (!unix_may_send(sk, other))
				1738	goto out_unlock;
				1739
				1740	if (unlikely(sock_flag(other, SOCK_DEAD))) {
				1741	/*
				1742	* Check with 1003.1g - what should
				1743	* datagram error
				1744	*/
				1745	unix_state_unlock(other);
				1746	sock_put(other);
				1747
				1748	if (!sk_locked)
				1749	unix_state_lock(sk);
				1750
				1751	err = 0;
				1752	if (unix_peer(sk) == other) {
				1753	unix_peer(sk) = NULL;
				1754	unix_dgram_peer_wake_disconnect_wakeup(sk, other);
				1755
				1756	unix_state_unlock(sk);
				1757
				1758	unix_dgram_disconnected(sk, other);
				1759	sock_put(other);
				1760	err = -ECONNREFUSED;
				1761	} else {
				1762	unix_state_unlock(sk);
				1763	}
				1764
				1765	other = NULL;
				1766	if (err)
				1767	goto out_free;
				1768	goto restart;
				1769	}
				1770
				1771	err = -EPIPE;
				1772	if (other->sk_shutdown & RCV_SHUTDOWN)
				1773	goto out_unlock;
				1774
				1775	if (sk->sk_type != SOCK_SEQPACKET) {
				1776	err = security_unix_may_send(sk->sk_socket, other->sk_socket);
				1777	if (err)
				1778	goto out_unlock;
				1779	}
				1780
				1781	/* other == sk && unix_peer(other) != sk if
				1782	* - unix_peer(sk) == NULL, destination address bound to sk
				1783	* - unix_peer(sk) == sk by time of get but disconnected before lock
				1784	*/
				1785	if (other != sk &&
				1786	unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
				1787	if (timeo) {
				1788	timeo = unix_wait_for_peer(other, timeo);
				1789
				1790	err = sock_intr_errno(timeo);
				1791	if (signal_pending(current))
				1792	goto out_free;
				1793
				1794	goto restart;
				1795	}
				1796
				1797	if (!sk_locked) {
				1798	unix_state_unlock(other);
				1799	unix_state_double_lock(sk, other);
				1800	}
				1801
				1802	if (unix_peer(sk) != other \|\|
				1803	unix_dgram_peer_wake_me(sk, other)) {
				1804	err = -EAGAIN;
				1805	sk_locked = 1;
				1806	goto out_unlock;
				1807	}
				1808
				1809	if (!sk_locked) {
				1810	sk_locked = 1;
				1811	goto restart_locked;
				1812	}
				1813	}
				1814
				1815	if (unlikely(sk_locked))
				1816	unix_state_unlock(sk);
				1817
				1818	if (sock_flag(other, SOCK_RCVTSTAMP))
				1819	__net_timestamp(skb);
				1820	maybe_add_creds(skb, sock, other);
				1821	skb_queue_tail(&other->sk_receive_queue, skb);
				1822	if (max_level > unix_sk(other)->recursion_level)
				1823	unix_sk(other)->recursion_level = max_level;
				1824	unix_state_unlock(other);
				1825	other->sk_data_ready(other);
				1826	sock_put(other);
				1827	scm_destroy(&scm);
				1828	return len;
				1829
				1830	out_unlock:
				1831	if (sk_locked)
				1832	unix_state_unlock(sk);
				1833	unix_state_unlock(other);
				1834	out_free:
				1835	kfree_skb(skb);
				1836	out:
				1837	if (other)
				1838	sock_put(other);
				1839	scm_destroy(&scm);
				1840	return err;
				1841	}
				1842
				1843	/* We use paged skbs for stream sockets, and limit occupancy to 32768
				1844	* bytes, and a minimun of a full page.
				1845	*/
				1846	#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
				1847
				1848	static int unix_stream_sendmsg(struct socket sock, struct msghdr msg,
				1849	size_t len)
				1850	{
				1851	struct sock *sk = sock->sk;
				1852	struct sock *other = NULL;
				1853	int err, size;
				1854	struct sk_buff *skb;
				1855	int sent = 0;
				1856	struct scm_cookie scm;
				1857	bool fds_sent = false;
				1858	int max_level;
				1859	int data_len;
				1860
				1861	wait_for_unix_gc();
				1862	err = scm_send(sock, msg, &scm, false);
				1863	if (err < 0)
				1864	return err;
				1865
				1866	err = -EOPNOTSUPP;
				1867	if (msg->msg_flags&MSG_OOB)
				1868	goto out_err;
				1869
				1870	if (msg->msg_namelen) {
				1871	err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
				1872	goto out_err;
				1873	} else {
				1874	err = -ENOTCONN;
				1875	other = unix_peer(sk);
				1876	if (!other)
				1877	goto out_err;
				1878	}
				1879
				1880	if (sk->sk_shutdown & SEND_SHUTDOWN)
				1881	goto pipe_err;
				1882
				1883	while (sent < len) {
				1884	size = len - sent;
				1885
				1886	/* Keep two messages in the pipe so it schedules better */
				1887	size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
				1888
				1889	/* allow fallback to order-0 allocations */
				1890	size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
				1891
				1892	data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
				1893
				1894	data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
				1895
				1896	skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
				1897	msg->msg_flags & MSG_DONTWAIT, &err,
				1898	get_order(UNIX_SKB_FRAGS_SZ));
				1899	if (!skb)
				1900	goto out_err;
				1901
				1902	/* Only send the fds in the first buffer */
				1903	err = unix_scm_to_skb(&scm, skb, !fds_sent);
				1904	if (err < 0) {
				1905	kfree_skb(skb);
				1906	goto out_err;
				1907	}
				1908	max_level = err + 1;
				1909	fds_sent = true;
				1910
				1911	skb_put(skb, size - data_len);
				1912	skb->data_len = data_len;
				1913	skb->len = size;
				1914	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
				1915	if (err) {
				1916	kfree_skb(skb);
				1917	goto out_err;
				1918	}
				1919
				1920	unix_state_lock(other);
				1921
				1922	if (sock_flag(other, SOCK_DEAD) \|\|
				1923	(other->sk_shutdown & RCV_SHUTDOWN))
				1924	goto pipe_err_free;
				1925
				1926	maybe_add_creds(skb, sock, other);
				1927	skb_queue_tail(&other->sk_receive_queue, skb);
				1928	if (max_level > unix_sk(other)->recursion_level)
				1929	unix_sk(other)->recursion_level = max_level;
				1930	unix_state_unlock(other);
				1931	other->sk_data_ready(other);
				1932	sent += size;
				1933	}
				1934
				1935	scm_destroy(&scm);
				1936
				1937	return sent;
				1938
				1939	pipe_err_free:
				1940	unix_state_unlock(other);
				1941	kfree_skb(skb);
				1942	pipe_err:
				1943	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
				1944	send_sig(SIGPIPE, current, 0);
				1945	err = -EPIPE;
				1946	out_err:
				1947	scm_destroy(&scm);
				1948	return sent ? : err;
				1949	}
				1950
				1951	static ssize_t unix_stream_sendpage(struct socket socket, struct page page,
				1952	int offset, size_t size, int flags)
				1953	{
				1954	int err;
				1955	bool send_sigpipe = false;
				1956	bool init_scm = true;
				1957	struct scm_cookie scm;
				1958	struct sock other, sk = socket->sk;
				1959	struct sk_buff skb, newskb = NULL, *tail = NULL;
				1960
				1961	if (flags & MSG_OOB)
				1962	return -EOPNOTSUPP;
				1963
				1964	other = unix_peer(sk);
				1965	if (!other \|\| sk->sk_state != TCP_ESTABLISHED)
				1966	return -ENOTCONN;
				1967
				1968	if (false) {
				1969	alloc_skb:
				1970	unix_state_unlock(other);
				1971	mutex_unlock(&unix_sk(other)->iolock);
				1972	newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
				1973	&err, 0);
				1974	if (!newskb)
				1975	goto err;
				1976	}
				1977
				1978	/* we must acquire iolock as we modify already present
				1979	* skbs in the sk_receive_queue and mess with skb->len
				1980	*/
				1981	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
				1982	if (err) {
				1983	err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
				1984	goto err;
				1985	}
				1986
				1987	if (sk->sk_shutdown & SEND_SHUTDOWN) {
				1988	err = -EPIPE;
				1989	send_sigpipe = true;
				1990	goto err_unlock;
				1991	}
				1992
				1993	unix_state_lock(other);
				1994
				1995	if (sock_flag(other, SOCK_DEAD) \|\|
				1996	other->sk_shutdown & RCV_SHUTDOWN) {
				1997	err = -EPIPE;
				1998	send_sigpipe = true;
				1999	goto err_state_unlock;
				2000	}
				2001
				2002	if (init_scm) {
				2003	err = maybe_init_creds(&scm, socket, other);
				2004	if (err)
				2005	goto err_state_unlock;
				2006	init_scm = false;
				2007	}
				2008
				2009	skb = skb_peek_tail(&other->sk_receive_queue);
				2010	if (tail && tail == skb) {
				2011	skb = newskb;
				2012	} else if (!skb \|\| !unix_skb_scm_eq(skb, &scm)) {
				2013	if (newskb) {
				2014	skb = newskb;
				2015	} else {
				2016	tail = skb;
				2017	goto alloc_skb;
				2018	}
				2019	} else if (newskb) {
				2020	/* this is fast path, we don't necessarily need to
				2021	* call to kfree_skb even though with newskb == NULL
				2022	* this - does no harm
				2023	*/
				2024	consume_skb(newskb);
				2025	newskb = NULL;
				2026	}
				2027
				2028	if (skb_append_pagefrags(skb, page, offset, size)) {
				2029	tail = skb;
				2030	goto alloc_skb;
				2031	}
				2032
				2033	skb->len += size;
				2034	skb->data_len += size;
				2035	skb->truesize += size;
				2036	atomic_add(size, &sk->sk_wmem_alloc);
				2037
				2038	if (newskb) {
				2039	err = unix_scm_to_skb(&scm, skb, false);
				2040	if (err)
				2041	goto err_state_unlock;
				2042	spin_lock(&other->sk_receive_queue.lock);
				2043	__skb_queue_tail(&other->sk_receive_queue, newskb);
				2044	spin_unlock(&other->sk_receive_queue.lock);
				2045	}
				2046
				2047	unix_state_unlock(other);
				2048	mutex_unlock(&unix_sk(other)->iolock);
				2049
				2050	other->sk_data_ready(other);
				2051	scm_destroy(&scm);
				2052	return size;
				2053
				2054	err_state_unlock:
				2055	unix_state_unlock(other);
				2056	err_unlock:
				2057	mutex_unlock(&unix_sk(other)->iolock);
				2058	err:
				2059	kfree_skb(newskb);
				2060	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
				2061	send_sig(SIGPIPE, current, 0);
				2062	if (!init_scm)
				2063	scm_destroy(&scm);
				2064	return err;
				2065	}
				2066
				2067	static int unix_seqpacket_sendmsg(struct socket sock, struct msghdr msg,
				2068	size_t len)
				2069	{
				2070	int err;
				2071	struct sock *sk = sock->sk;
				2072
				2073	err = sock_error(sk);
				2074	if (err)
				2075	return err;
				2076
				2077	if (sk->sk_state != TCP_ESTABLISHED)
				2078	return -ENOTCONN;
				2079
				2080	if (msg->msg_namelen)
				2081	msg->msg_namelen = 0;
				2082
				2083	return unix_dgram_sendmsg(sock, msg, len);
				2084	}
				2085
				2086	static int unix_seqpacket_recvmsg(struct socket sock, struct msghdr msg,
				2087	size_t size, int flags)
				2088	{
				2089	struct sock *sk = sock->sk;
				2090
				2091	if (sk->sk_state != TCP_ESTABLISHED)
				2092	return -ENOTCONN;
				2093
				2094	return unix_dgram_recvmsg(sock, msg, size, flags);
				2095	}
				2096
				2097	static void unix_copy_addr(struct msghdr msg, struct sock sk)
				2098	{
				2099	struct unix_sock *u = unix_sk(sk);
				2100
				2101	if (u->addr) {
				2102	msg->msg_namelen = u->addr->len;
				2103	memcpy(msg->msg_name, u->addr->name, u->addr->len);
				2104	}
				2105	}
				2106
				2107	static int unix_dgram_recvmsg(struct socket sock, struct msghdr msg,
				2108	size_t size, int flags)
				2109	{
				2110	struct scm_cookie scm;
				2111	struct sock *sk = sock->sk;
				2112	struct unix_sock *u = unix_sk(sk);
				2113	int noblock = flags & MSG_DONTWAIT;
				2114	struct sk_buff *skb;
				2115	int err;
				2116	int peeked, skip;
				2117
				2118	err = -EOPNOTSUPP;
				2119	if (flags&MSG_OOB)
				2120	goto out;
				2121
				2122	err = mutex_lock_interruptible(&u->iolock);
				2123	if (unlikely(err)) {
				2124	/* recvmsg() in non blocking mode is supposed to return -EAGAIN
				2125	* sk_rcvtimeo is not honored by mutex_lock_interruptible()
				2126	*/
				2127	err = noblock ? -EAGAIN : -ERESTARTSYS;
				2128	goto out;
				2129	}
				2130
				2131	skip = sk_peek_offset(sk, flags);
				2132
				2133	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
				2134	if (!skb) {
				2135	unix_state_lock(sk);
				2136	/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
				2137	if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
				2138	(sk->sk_shutdown & RCV_SHUTDOWN))
				2139	err = 0;
				2140	unix_state_unlock(sk);
				2141	goto out_unlock;
				2142	}
				2143
				2144	wake_up_interruptible_sync_poll(&u->peer_wait,
				2145	POLLOUT \| POLLWRNORM \| POLLWRBAND);
				2146
				2147	if (msg->msg_name)
				2148	unix_copy_addr(msg, skb->sk);
				2149
				2150	if (size > skb->len - skip)
				2151	size = skb->len - skip;
				2152	else if (size < skb->len - skip)
				2153	msg->msg_flags \|= MSG_TRUNC;
				2154
				2155	err = skb_copy_datagram_msg(skb, skip, msg, size);
				2156	if (err)
				2157	goto out_free;
				2158
				2159	if (sock_flag(sk, SOCK_RCVTSTAMP))
				2160	__sock_recv_timestamp(msg, sk, skb);
				2161
				2162	memset(&scm, 0, sizeof(scm));
				2163
				2164	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
				2165	unix_set_secdata(&scm, skb);
				2166
				2167	if (!(flags & MSG_PEEK)) {
				2168	if (UNIXCB(skb).fp)
				2169	unix_detach_fds(&scm, skb);
				2170
				2171	sk_peek_offset_bwd(sk, skb->len);
				2172	} else {
				2173	/* It is questionable: on PEEK we could:
				2174	- do not return fds - good, but too simple 8)
				2175	- return fds, and do not return them on read (old strategy,
				2176	apparently wrong)
				2177	- clone fds (I chose it for now, it is the most universal
				2178	solution)
				2179
				2180	POSIX 1003.1g does not actually define this clearly
				2181	at all. POSIX 1003.1g doesn't define a lot of things
				2182	clearly however!
				2183
				2184	*/
				2185
				2186	sk_peek_offset_fwd(sk, size);
				2187
				2188	if (UNIXCB(skb).fp)
				2189	scm.fp = scm_fp_dup(UNIXCB(skb).fp);
				2190	}
				2191	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
				2192
				2193	scm_recv(sock, msg, &scm, flags);
				2194
				2195	out_free:
				2196	skb_free_datagram(sk, skb);
				2197	out_unlock:
				2198	mutex_unlock(&u->iolock);
				2199	out:
				2200	return err;
				2201	}
				2202
				2203	/*
				2204	* Sleep until more data has arrived. But check for races..
				2205	*/
				2206	static long unix_stream_data_wait(struct sock *sk, long timeo,
				2207	struct sk_buff *last, unsigned int last_len,
				2208	bool freezable)
				2209	{
				2210	struct sk_buff *tail;
				2211	DEFINE_WAIT(wait);
				2212
				2213	unix_state_lock(sk);
				2214
				2215	for (;;) {
				2216	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
				2217
				2218	tail = skb_peek_tail(&sk->sk_receive_queue);
				2219	if (tail != last \|\|
				2220	(tail && tail->len != last_len) \|\|
				2221	sk->sk_err \|\|
				2222	(sk->sk_shutdown & RCV_SHUTDOWN) \|\|
				2223	signal_pending(current) \|\|
				2224	!timeo)
				2225	break;
				2226
				2227	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
				2228	unix_state_unlock(sk);
				2229	if (freezable)
				2230	timeo = freezable_schedule_timeout(timeo);
				2231	else
				2232	timeo = schedule_timeout(timeo);
				2233	unix_state_lock(sk);
				2234
				2235	if (sock_flag(sk, SOCK_DEAD))
				2236	break;
				2237
				2238	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
				2239	}
				2240
				2241	finish_wait(sk_sleep(sk), &wait);
				2242	unix_state_unlock(sk);
				2243	return timeo;
				2244	}
				2245
				2246	static unsigned int unix_skb_len(const struct sk_buff *skb)
				2247	{
				2248	return skb->len - UNIXCB(skb).consumed;
				2249	}
				2250
				2251	struct unix_stream_read_state {
				2252	int (recv_actor)(struct sk_buff , int, int,
				2253	struct unix_stream_read_state *);
				2254	struct socket *socket;
				2255	struct msghdr *msg;
				2256	struct pipe_inode_info *pipe;
				2257	size_t size;
				2258	int flags;
				2259	unsigned int splice_flags;
				2260	};
				2261
				2262	static int unix_stream_read_generic(struct unix_stream_read_state *state,
				2263	bool freezable)
				2264	{
				2265	struct scm_cookie scm;
				2266	struct socket *sock = state->socket;
				2267	struct sock *sk = sock->sk;
				2268	struct unix_sock *u = unix_sk(sk);
				2269	int copied = 0;
				2270	int flags = state->flags;
				2271	int noblock = flags & MSG_DONTWAIT;
				2272	bool check_creds = false;
				2273	int target;
				2274	int err = 0;
				2275	long timeo;
				2276	int skip;
				2277	size_t size = state->size;
				2278	unsigned int last_len;
				2279
				2280	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
				2281	err = -EINVAL;
				2282	goto out;
				2283	}
				2284
				2285	if (unlikely(flags & MSG_OOB)) {
				2286	err = -EOPNOTSUPP;
				2287	goto out;
				2288	}
				2289
				2290	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
				2291	timeo = sock_rcvtimeo(sk, noblock);
				2292
				2293	memset(&scm, 0, sizeof(scm));
				2294
				2295	/* Lock the socket to prevent queue disordering
				2296	* while sleeps in memcpy_tomsg
				2297	*/
				2298	mutex_lock(&u->iolock);
				2299
				2300	if (flags & MSG_PEEK)
				2301	skip = sk_peek_offset(sk, flags);
				2302	else
				2303	skip = 0;
				2304
				2305	do {
				2306	int chunk;
				2307	bool drop_skb;
				2308	struct sk_buff skb, last;
				2309
				2310	unix_state_lock(sk);
				2311	if (sock_flag(sk, SOCK_DEAD)) {
				2312	err = -ECONNRESET;
				2313	goto unlock;
				2314	}
				2315	last = skb = skb_peek(&sk->sk_receive_queue);
				2316	last_len = last ? last->len : 0;
				2317	again:
				2318	if (skb == NULL) {
				2319	unix_sk(sk)->recursion_level = 0;
				2320	if (copied >= target)
				2321	goto unlock;
				2322
				2323	/*
				2324	* POSIX 1003.1g mandates this order.
				2325	*/
				2326
				2327	err = sock_error(sk);
				2328	if (err)
				2329	goto unlock;
				2330	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2331	goto unlock;
				2332
				2333	unix_state_unlock(sk);
				2334	if (!timeo) {
				2335	err = -EAGAIN;
				2336	break;
				2337	}
				2338
				2339	mutex_unlock(&u->iolock);
				2340
				2341	timeo = unix_stream_data_wait(sk, timeo, last,
				2342	last_len, freezable);
				2343
				2344	if (signal_pending(current)) {
				2345	err = sock_intr_errno(timeo);
				2346	scm_destroy(&scm);
				2347	goto out;
				2348	}
				2349
				2350	mutex_lock(&u->iolock);
				2351	continue;
				2352	unlock:
				2353	unix_state_unlock(sk);
				2354	break;
				2355	}
				2356
				2357	while (skip >= unix_skb_len(skb)) {
				2358	skip -= unix_skb_len(skb);
				2359	last = skb;
				2360	last_len = skb->len;
				2361	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				2362	if (!skb)
				2363	goto again;
				2364	}
				2365
				2366	unix_state_unlock(sk);
				2367
				2368	if (check_creds) {
				2369	/* Never glue messages from different writers */
				2370	if (!unix_skb_scm_eq(skb, &scm))
				2371	break;
				2372	} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
				2373	/* Copy credentials */
				2374	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
				2375	unix_set_secdata(&scm, skb);
				2376	check_creds = true;
				2377	}
				2378
				2379	/* Copy address just once */
				2380	if (state->msg && state->msg->msg_name) {
				2381	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
				2382	state->msg->msg_name);
				2383	unix_copy_addr(state->msg, skb->sk);
				2384	sunaddr = NULL;
				2385	}
				2386
				2387	chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
				2388	skb_get(skb);
				2389	chunk = state->recv_actor(skb, skip, chunk, state);
				2390	drop_skb = !unix_skb_len(skb);
				2391	/* skb is only safe to use if !drop_skb */
				2392	consume_skb(skb);
				2393	if (chunk < 0) {
				2394	if (copied == 0)
				2395	copied = -EFAULT;
				2396	break;
				2397	}
				2398	copied += chunk;
				2399	size -= chunk;
				2400
				2401	if (drop_skb) {
				2402	/* the skb was touched by a concurrent reader;
				2403	* we should not expect anything from this skb
				2404	* anymore and assume it invalid - we can be
				2405	* sure it was dropped from the socket queue
				2406	*
				2407	* let's report a short read
				2408	*/
				2409	err = 0;
				2410	break;
				2411	}
				2412
				2413	/* Mark read part of skb as used */
				2414	if (!(flags & MSG_PEEK)) {
				2415	UNIXCB(skb).consumed += chunk;
				2416
				2417	sk_peek_offset_bwd(sk, chunk);
				2418
				2419	if (UNIXCB(skb).fp)
				2420	unix_detach_fds(&scm, skb);
				2421
				2422	if (unix_skb_len(skb))
				2423	break;
				2424
				2425	skb_unlink(skb, &sk->sk_receive_queue);
				2426	consume_skb(skb);
				2427
				2428	if (scm.fp)
				2429	break;
				2430	} else {
				2431	/* It is questionable, see note in unix_dgram_recvmsg.
				2432	*/
				2433	if (UNIXCB(skb).fp)
				2434	scm.fp = scm_fp_dup(UNIXCB(skb).fp);
				2435
				2436	sk_peek_offset_fwd(sk, chunk);
				2437
				2438	if (UNIXCB(skb).fp)
				2439	break;
				2440
				2441	skip = 0;
				2442	last = skb;
				2443	last_len = skb->len;
				2444	unix_state_lock(sk);
				2445	skb = skb_peek_next(skb, &sk->sk_receive_queue);
				2446	if (skb)
				2447	goto again;
				2448	unix_state_unlock(sk);
				2449	break;
				2450	}
				2451	} while (size);
				2452
				2453	mutex_unlock(&u->iolock);
				2454	if (state->msg)
				2455	scm_recv(sock, state->msg, &scm, flags);
				2456	else
				2457	scm_destroy(&scm);
				2458	out:
				2459	return copied ? : err;
				2460	}
				2461
				2462	static int unix_stream_read_actor(struct sk_buff *skb,
				2463	int skip, int chunk,
				2464	struct unix_stream_read_state *state)
				2465	{
				2466	int ret;
				2467
				2468	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
				2469	state->msg, chunk);
				2470	return ret ?: chunk;
				2471	}
				2472
				2473	static int unix_stream_recvmsg(struct socket sock, struct msghdr msg,
				2474	size_t size, int flags)
				2475	{
				2476	struct unix_stream_read_state state = {
				2477	.recv_actor = unix_stream_read_actor,
				2478	.socket = sock,
				2479	.msg = msg,
				2480	.size = size,
				2481	.flags = flags
				2482	};
				2483
				2484	return unix_stream_read_generic(&state, true);
				2485	}
				2486
				2487	static ssize_t skb_unix_socket_splice(struct sock *sk,
				2488	struct pipe_inode_info *pipe,
				2489	struct splice_pipe_desc *spd)
				2490	{
				2491	int ret;
				2492	struct unix_sock *u = unix_sk(sk);
				2493
				2494	mutex_unlock(&u->iolock);
				2495	ret = splice_to_pipe(pipe, spd);
				2496	mutex_lock(&u->iolock);
				2497
				2498	return ret;
				2499	}
				2500
				2501	static int unix_stream_splice_actor(struct sk_buff *skb,
				2502	int skip, int chunk,
				2503	struct unix_stream_read_state *state)
				2504	{
				2505	return skb_splice_bits(skb, state->socket->sk,
				2506	UNIXCB(skb).consumed + skip,
				2507	state->pipe, chunk, state->splice_flags,
				2508	skb_unix_socket_splice);
				2509	}
				2510
				2511	static ssize_t unix_stream_splice_read(struct socket sock, loff_t ppos,
				2512	struct pipe_inode_info *pipe,
				2513	size_t size, unsigned int flags)
				2514	{
				2515	struct unix_stream_read_state state = {
				2516	.recv_actor = unix_stream_splice_actor,
				2517	.socket = sock,
				2518	.pipe = pipe,
				2519	.size = size,
				2520	.splice_flags = flags,
				2521	};
				2522
				2523	if (unlikely(*ppos))
				2524	return -ESPIPE;
				2525
				2526	if (sock->file->f_flags & O_NONBLOCK \|\|
				2527	flags & SPLICE_F_NONBLOCK)
				2528	state.flags = MSG_DONTWAIT;
				2529
				2530	return unix_stream_read_generic(&state, false);
				2531	}
				2532
				2533	static int unix_shutdown(struct socket *sock, int mode)
				2534	{
				2535	struct sock *sk = sock->sk;
				2536	struct sock *other;
				2537
				2538	if (mode < SHUT_RD \|\| mode > SHUT_RDWR)
				2539	return -EINVAL;
				2540	/* This maps:
				2541	* SHUT_RD (0) -> RCV_SHUTDOWN (1)
				2542	* SHUT_WR (1) -> SEND_SHUTDOWN (2)
				2543	* SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
				2544	*/
				2545	++mode;
				2546
				2547	unix_state_lock(sk);
				2548	sk->sk_shutdown \|= mode;
				2549	other = unix_peer(sk);
				2550	if (other)
				2551	sock_hold(other);
				2552	unix_state_unlock(sk);
				2553	sk->sk_state_change(sk);
				2554
				2555	if (other &&
				2556	(sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET)) {
				2557
				2558	int peer_mode = 0;
				2559
				2560	if (mode&RCV_SHUTDOWN)
				2561	peer_mode \|= SEND_SHUTDOWN;
				2562	if (mode&SEND_SHUTDOWN)
				2563	peer_mode \|= RCV_SHUTDOWN;
				2564	unix_state_lock(other);
				2565	other->sk_shutdown \|= peer_mode;
				2566	unix_state_unlock(other);
				2567	other->sk_state_change(other);
				2568	if (peer_mode == SHUTDOWN_MASK)
				2569	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
				2570	else if (peer_mode & RCV_SHUTDOWN)
				2571	sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
				2572	}
				2573	if (other)
				2574	sock_put(other);
				2575
				2576	return 0;
				2577	}
				2578
				2579	long unix_inq_len(struct sock *sk)
				2580	{
				2581	struct sk_buff *skb;
				2582	long amount = 0;
				2583
				2584	if (sk->sk_state == TCP_LISTEN)
				2585	return -EINVAL;
				2586
				2587	spin_lock(&sk->sk_receive_queue.lock);
				2588	if (sk->sk_type == SOCK_STREAM \|\|
				2589	sk->sk_type == SOCK_SEQPACKET) {
				2590	skb_queue_walk(&sk->sk_receive_queue, skb)
				2591	amount += unix_skb_len(skb);
				2592	} else {
				2593	skb = skb_peek(&sk->sk_receive_queue);
				2594	if (skb)
				2595	amount = skb->len;
				2596	}
				2597	spin_unlock(&sk->sk_receive_queue.lock);
				2598
				2599	return amount;
				2600	}
				2601	EXPORT_SYMBOL_GPL(unix_inq_len);
				2602
				2603	long unix_outq_len(struct sock *sk)
				2604	{
				2605	return sk_wmem_alloc_get(sk);
				2606	}
				2607	EXPORT_SYMBOL_GPL(unix_outq_len);
				2608
				2609	static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
				2610	{
				2611	struct sock *sk = sock->sk;
				2612	long amount = 0;
				2613	int err;
				2614
				2615	switch (cmd) {
				2616	case SIOCOUTQ:
				2617	amount = unix_outq_len(sk);
				2618	err = put_user(amount, (int __user *)arg);
				2619	break;
				2620	case SIOCINQ:
				2621	amount = unix_inq_len(sk);
				2622	if (amount < 0)
				2623	err = amount;
				2624	else
				2625	err = put_user(amount, (int __user *)arg);
				2626	break;
				2627	default:
				2628	err = -ENOIOCTLCMD;
				2629	break;
				2630	}
				2631	return err;
				2632	}
				2633
				2634	static unsigned int unix_poll(struct file file, struct socket sock, poll_table *wait)
				2635	{
				2636	struct sock *sk = sock->sk;
				2637	unsigned int mask;
				2638
				2639	sock_poll_wait(file, sk_sleep(sk), wait);
				2640	mask = 0;
				2641
				2642	/* exceptional events? */
				2643	if (sk->sk_err)
				2644	mask \|= POLLERR;
				2645	if (sk->sk_shutdown == SHUTDOWN_MASK)
				2646	mask \|= POLLHUP;
				2647	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2648	mask \|= POLLRDHUP \| POLLIN \| POLLRDNORM;
				2649
				2650	/* readable? */
				2651	if (!skb_queue_empty(&sk->sk_receive_queue))
				2652	mask \|= POLLIN \| POLLRDNORM;
				2653
				2654	/* Connection-based need to check for termination and startup */
				2655	if ((sk->sk_type == SOCK_STREAM \|\| sk->sk_type == SOCK_SEQPACKET) &&
				2656	sk->sk_state == TCP_CLOSE)
				2657	mask \|= POLLHUP;
				2658
				2659	/*
				2660	* we set writable also when the other side has shut down the
				2661	* connection. This prevents stuck sockets.
				2662	*/
				2663	if (unix_writable(sk))
				2664	mask \|= POLLOUT \| POLLWRNORM \| POLLWRBAND;
				2665
				2666	return mask;
				2667	}
				2668
				2669	static unsigned int unix_dgram_poll(struct file file, struct socket sock,
				2670	poll_table *wait)
				2671	{
				2672	struct sock sk = sock->sk, other;
				2673	unsigned int mask, writable;
				2674
				2675	sock_poll_wait(file, sk_sleep(sk), wait);
				2676	mask = 0;
				2677
				2678	/* exceptional events? */
				2679	if (sk->sk_err \|\| !skb_queue_empty(&sk->sk_error_queue))
				2680	mask \|= POLLERR \|
				2681	(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
				2682
				2683	if (sk->sk_shutdown & RCV_SHUTDOWN)
				2684	mask \|= POLLRDHUP \| POLLIN \| POLLRDNORM;
				2685	if (sk->sk_shutdown == SHUTDOWN_MASK)
				2686	mask \|= POLLHUP;
				2687
				2688	/* readable? */
				2689	if (!skb_queue_empty(&sk->sk_receive_queue))
				2690	mask \|= POLLIN \| POLLRDNORM;
				2691
				2692	/* Connection-based need to check for termination and startup */
				2693	if (sk->sk_type == SOCK_SEQPACKET) {
				2694	if (sk->sk_state == TCP_CLOSE)
				2695	mask \|= POLLHUP;
				2696	/* connection hasn't started yet? */
				2697	if (sk->sk_state == TCP_SYN_SENT)
				2698	return mask;
				2699	}
				2700
				2701	/* No write status requested, avoid expensive OUT tests. */
				2702	if (!(poll_requested_events(wait) & (POLLWRBAND\|POLLWRNORM\|POLLOUT)))
				2703	return mask;
				2704
				2705	writable = unix_writable(sk);
				2706	if (writable) {
				2707	unix_state_lock(sk);
				2708
				2709	other = unix_peer(sk);
				2710	if (other && unix_peer(other) != sk &&
				2711	unix_recvq_full(other) &&
				2712	unix_dgram_peer_wake_me(sk, other))
				2713	writable = 0;
				2714
				2715	unix_state_unlock(sk);
				2716	}
				2717
				2718	if (writable)
				2719	mask \|= POLLOUT \| POLLWRNORM \| POLLWRBAND;
				2720	else
				2721	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
				2722
				2723	return mask;
				2724	}
				2725
				2726	#ifdef CONFIG_PROC_FS
				2727
				2728	#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
				2729
				2730	#define get_bucket(x) ((x) >> BUCKET_SPACE)
				2731	#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
				2732	#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE \| (o))
				2733
				2734	static struct sock unix_from_bucket(struct seq_file seq, loff_t *pos)
				2735	{
				2736	unsigned long offset = get_offset(*pos);
				2737	unsigned long bucket = get_bucket(*pos);
				2738	struct sock *sk;
				2739	unsigned long count = 0;
				2740
				2741	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
				2742	if (sock_net(sk) != seq_file_net(seq))
				2743	continue;
				2744	if (++count == offset)
				2745	break;
				2746	}
				2747
				2748	return sk;
				2749	}
				2750
				2751	static struct sock unix_next_socket(struct seq_file seq,
				2752	struct sock *sk,
				2753	loff_t *pos)
				2754	{
				2755	unsigned long bucket;
				2756
				2757	while (sk > (struct sock *)SEQ_START_TOKEN) {
				2758	sk = sk_next(sk);
				2759	if (!sk)
				2760	goto next_bucket;
				2761	if (sock_net(sk) == seq_file_net(seq))
				2762	return sk;
				2763	}
				2764
				2765	do {
				2766	sk = unix_from_bucket(seq, pos);
				2767	if (sk)
				2768	return sk;
				2769
				2770	next_bucket:
				2771	bucket = get_bucket(*pos) + 1;
				2772	*pos = set_bucket_offset(bucket, 1);
				2773	} while (bucket < ARRAY_SIZE(unix_socket_table));
				2774
				2775	return NULL;
				2776	}
				2777
				2778	static void unix_seq_start(struct seq_file seq, loff_t *pos)
				2779	__acquires(unix_table_lock)
				2780	{
				2781	spin_lock(&unix_table_lock);
				2782
				2783	if (!*pos)
				2784	return SEQ_START_TOKEN;
				2785
				2786	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
				2787	return NULL;
				2788
				2789	return unix_next_socket(seq, NULL, pos);
				2790	}
				2791
				2792	static void unix_seq_next(struct seq_file seq, void v, loff_t pos)
				2793	{
				2794	++*pos;
				2795	return unix_next_socket(seq, v, pos);
				2796	}
				2797
				2798	static void unix_seq_stop(struct seq_file seq, void v)
				2799	__releases(unix_table_lock)
				2800	{
				2801	spin_unlock(&unix_table_lock);
				2802	}
				2803
				2804	static int unix_seq_show(struct seq_file seq, void v)
				2805	{
				2806
				2807	if (v == SEQ_START_TOKEN)
				2808	seq_puts(seq, "Num RefCount Protocol Flags Type St "
				2809	"Inode Path\n");
				2810	else {
				2811	struct sock *s = v;
				2812	struct unix_sock *u = unix_sk(s);
				2813	unix_state_lock(s);
				2814
				2815	seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
				2816	s,
				2817	atomic_read(&s->sk_refcnt),
				2818	0,
				2819	s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
				2820	s->sk_type,
				2821	s->sk_socket ?
				2822	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
				2823	(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
				2824	sock_i_ino(s));
				2825
				2826	if (u->addr) {
				2827	int i, len;
				2828	seq_putc(seq, ' ');
				2829
				2830	i = 0;
				2831	len = u->addr->len - sizeof(short);
				2832	if (!UNIX_ABSTRACT(s))
				2833	len--;
				2834	else {
				2835	seq_putc(seq, '@');
				2836	i++;
				2837	}
				2838	for ( ; i < len; i++)
				2839	seq_putc(seq, u->addr->name->sun_path[i]);
				2840	}
				2841	unix_state_unlock(s);
				2842	seq_putc(seq, '\n');
				2843	}
				2844
				2845	return 0;
				2846	}
				2847
				2848	static const struct seq_operations unix_seq_ops = {
				2849	.start = unix_seq_start,
				2850	.next = unix_seq_next,
				2851	.stop = unix_seq_stop,
				2852	.show = unix_seq_show,
				2853	};
				2854
				2855	static int unix_seq_open(struct inode inode, struct file file)
				2856	{
				2857	return seq_open_net(inode, file, &unix_seq_ops,
				2858	sizeof(struct seq_net_private));
				2859	}
				2860
				2861	static const struct file_operations unix_seq_fops = {
				2862	.owner = THIS_MODULE,
				2863	.open = unix_seq_open,
				2864	.read = seq_read,
				2865	.llseek = seq_lseek,
				2866	.release = seq_release_net,
				2867	};
				2868
				2869	#endif
				2870
				2871	static const struct net_proto_family unix_family_ops = {
				2872	.family = PF_UNIX,
				2873	.create = unix_create,
				2874	.owner = THIS_MODULE,
				2875	};
				2876
				2877
				2878	static int __net_init unix_net_init(struct net *net)
				2879	{
				2880	int error = -ENOMEM;
				2881
				2882	net->unx.sysctl_max_dgram_qlen = 10;
				2883	if (unix_sysctl_register(net))
				2884	goto out;
				2885
				2886	#ifdef CONFIG_PROC_FS
				2887	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
				2888	unix_sysctl_unregister(net);
				2889	goto out;
				2890	}
				2891	#endif
				2892	error = 0;
				2893	out:
				2894	return error;
				2895	}
				2896
				2897	static void __net_exit unix_net_exit(struct net *net)
				2898	{
				2899	unix_sysctl_unregister(net);
				2900	remove_proc_entry("unix", net->proc_net);
				2901	}
				2902
				2903	static struct pernet_operations unix_net_ops = {
				2904	.init = unix_net_init,
				2905	.exit = unix_net_exit,
				2906	};
				2907
				2908	static int __init af_unix_init(void)
				2909	{
				2910	int rc = -1;
				2911
				2912	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
				2913
				2914	rc = proto_register(&unix_proto, 1);
				2915	if (rc != 0) {
				2916	pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
				2917	goto out;
				2918	}
				2919
				2920	sock_register(&unix_family_ops);
				2921	register_pernet_subsys(&unix_net_ops);
				2922	out:
				2923	return rc;
				2924	}
				2925
				2926	static void __exit af_unix_exit(void)
				2927	{
				2928	sock_unregister(PF_UNIX);
				2929	proto_unregister(&unix_proto);
				2930	unregister_pernet_subsys(&unix_net_ops);
				2931	}
				2932
				2933	/* Earlier than device_initcall() so that other drivers invoking
				2934	request_module() don't end up in a loop when modprobe tries
				2935	to use a UNIX socket. But later than subsys_initcall() because
				2936	we depend on stuff initialised there */
				2937	fs_initcall(af_unix_init);
				2938	module_exit(af_unix_exit);
				2939
				2940	MODULE_LICENSE("GPL");
				2941	MODULE_ALIAS_NETPROTO(PF_UNIX);