Blame - kernel/cgroup_pids.c - codeaurora/cp-linux

blob: b50d5a167fda7d2ae6902efd55e551efed780007 [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/*
				2	* Process number limiting controller for cgroups.
				3	*
				4	* Used to allow a cgroup hierarchy to stop any new processes from fork()ing
				5	* after a certain limit is reached.
				6	*
				7	* Since it is trivial to hit the task limit without hitting any kmemcg limits
				8	* in place, PIDs are a fundamental resource. As such, PID exhaustion must be
				9	* preventable in the scope of a cgroup hierarchy by allowing resource limiting
				10	* of the number of tasks in a cgroup.
				11	*
				12	* In order to use the `pids` controller, set the maximum number of tasks in
				13	* pids.max (this is not available in the root cgroup for obvious reasons). The
				14	* number of processes currently in the cgroup is given by pids.current.
				15	* Organisational operations are not blocked by cgroup policies, so it is
				16	* possible to have pids.current > pids.max. However, it is not possible to
				17	* violate a cgroup policy through fork(). fork() will return -EAGAIN if forking
				18	* would cause a cgroup policy to be violated.
				19	*
				20	* To set a cgroup to have no limit, set pids.max to "max". This is the default
				21	* for all new cgroups (N.B. that PID limits are hierarchical, so the most
				22	* stringent limit in the hierarchy is followed).
				23	*
				24	* pids.current tracks all child cgroup hierarchies, so parent/pids.current is
				25	* a superset of parent/child/pids.current.
				26	*
				27	* Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com>
				28	*
				29	* This file is subject to the terms and conditions of version 2 of the GNU
				30	* General Public License. See the file COPYING in the main directory of the
				31	* Linux distribution for more details.
				32	*/
				33
				34	#include <linux/kernel.h>
				35	#include <linux/threads.h>
				36	#include <linux/atomic.h>
				37	#include <linux/cgroup.h>
				38	#include <linux/slab.h>
				39
				40	#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
				41	#define PIDS_MAX_STR "max"
				42
				43	struct pids_cgroup {
				44	struct cgroup_subsys_state css;
				45
				46	/*
				47	* Use 64-bit types so that we can safely represent "max" as
				48	* %PIDS_MAX = (%PID_MAX_LIMIT + 1).
				49	*/
				50	atomic64_t counter;
				51	int64_t limit;
				52	};
				53
				54	static struct pids_cgroup css_pids(struct cgroup_subsys_state css)
				55	{
				56	return container_of(css, struct pids_cgroup, css);
				57	}
				58
				59	static struct pids_cgroup parent_pids(struct pids_cgroup pids)
				60	{
				61	return css_pids(pids->css.parent);
				62	}
				63
				64	static struct cgroup_subsys_state *
				65	pids_css_alloc(struct cgroup_subsys_state *parent)
				66	{
				67	struct pids_cgroup *pids;
				68
				69	pids = kzalloc(sizeof(struct pids_cgroup), GFP_KERNEL);
				70	if (!pids)
				71	return ERR_PTR(-ENOMEM);
				72
				73	pids->limit = PIDS_MAX;
				74	atomic64_set(&pids->counter, 0);
				75	return &pids->css;
				76	}
				77
				78	static void pids_css_free(struct cgroup_subsys_state *css)
				79	{
				80	kfree(css_pids(css));
				81	}
				82
				83	/**
				84	* pids_cancel - uncharge the local pid count
				85	* @pids: the pid cgroup state
				86	* @num: the number of pids to cancel
				87	*
				88	* This function will WARN if the pid count goes under 0, because such a case is
				89	* a bug in the pids controller proper.
				90	*/
				91	static void pids_cancel(struct pids_cgroup *pids, int num)
				92	{
				93	/*
				94	* A negative count (or overflow for that matter) is invalid,
				95	* and indicates a bug in the `pids` controller proper.
				96	*/
				97	WARN_ON_ONCE(atomic64_add_negative(-num, &pids->counter));
				98	}
				99
				100	/**
				101	* pids_uncharge - hierarchically uncharge the pid count
				102	* @pids: the pid cgroup state
				103	* @num: the number of pids to uncharge
				104	*/
				105	static void pids_uncharge(struct pids_cgroup *pids, int num)
				106	{
				107	struct pids_cgroup *p;
				108
				109	for (p = pids; parent_pids(p); p = parent_pids(p))
				110	pids_cancel(p, num);
				111	}
				112
				113	/**
				114	* pids_charge - hierarchically charge the pid count
				115	* @pids: the pid cgroup state
				116	* @num: the number of pids to charge
				117	*
				118	* This function does not follow the pid limit set. It cannot fail and the new
				119	* pid count may exceed the limit. This is only used for reverting failed
				120	* attaches, where there is no other way out than violating the limit.
				121	*/
				122	static void pids_charge(struct pids_cgroup *pids, int num)
				123	{
				124	struct pids_cgroup *p;
				125
				126	for (p = pids; parent_pids(p); p = parent_pids(p))
				127	atomic64_add(num, &p->counter);
				128	}
				129
				130	/**
				131	* pids_try_charge - hierarchically try to charge the pid count
				132	* @pids: the pid cgroup state
				133	* @num: the number of pids to charge
				134	*
				135	* This function follows the set limit. It will fail if the charge would cause
				136	* the new value to exceed the hierarchical limit. Returns 0 if the charge
				137	* succeded, otherwise -EAGAIN.
				138	*/
				139	static int pids_try_charge(struct pids_cgroup *pids, int num)
				140	{
				141	struct pids_cgroup p, q;
				142
				143	for (p = pids; parent_pids(p); p = parent_pids(p)) {
				144	int64_t new = atomic64_add_return(num, &p->counter);
				145
				146	/*
				147	* Since new is capped to the maximum number of pid_t, if
				148	* p->limit is %PIDS_MAX then we know that this test will never
				149	* fail.
				150	*/
				151	if (new > p->limit)
				152	goto revert;
				153	}
				154
				155	return 0;
				156
				157	revert:
				158	for (q = pids; q != p; q = parent_pids(q))
				159	pids_cancel(q, num);
				160	pids_cancel(p, num);
				161
				162	return -EAGAIN;
				163	}
				164
				165	static int pids_can_attach(struct cgroup_taskset *tset)
				166	{
				167	struct task_struct *task;
				168	struct cgroup_subsys_state *dst_css;
				169
				170	cgroup_taskset_for_each(task, dst_css, tset) {
				171	struct pids_cgroup *pids = css_pids(dst_css);
				172	struct cgroup_subsys_state *old_css;
				173	struct pids_cgroup *old_pids;
				174
				175	/*
				176	* No need to pin @old_css between here and cancel_attach()
				177	* because cgroup core protects it from being freed before
				178	* the migration completes or fails.
				179	*/
				180	old_css = task_css(task, pids_cgrp_id);
				181	old_pids = css_pids(old_css);
				182
				183	pids_charge(pids, 1);
				184	pids_uncharge(old_pids, 1);
				185	}
				186
				187	return 0;
				188	}
				189
				190	static void pids_cancel_attach(struct cgroup_taskset *tset)
				191	{
				192	struct task_struct *task;
				193	struct cgroup_subsys_state *dst_css;
				194
				195	cgroup_taskset_for_each(task, dst_css, tset) {
				196	struct pids_cgroup *pids = css_pids(dst_css);
				197	struct cgroup_subsys_state *old_css;
				198	struct pids_cgroup *old_pids;
				199
				200	old_css = task_css(task, pids_cgrp_id);
				201	old_pids = css_pids(old_css);
				202
				203	pids_charge(old_pids, 1);
				204	pids_uncharge(pids, 1);
				205	}
				206	}
				207
				208	/*
				209	* task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
				210	* on threadgroup_change_begin() held by the copy_process().
				211	*/
				212	static int pids_can_fork(struct task_struct task, void *priv_p)
				213	{
				214	struct cgroup_subsys_state *css;
				215	struct pids_cgroup *pids;
				216
				217	css = task_css_check(current, pids_cgrp_id, true);
				218	pids = css_pids(css);
				219	return pids_try_charge(pids, 1);
				220	}
				221
				222	static void pids_cancel_fork(struct task_struct task, void priv)
				223	{
				224	struct cgroup_subsys_state *css;
				225	struct pids_cgroup *pids;
				226
				227	css = task_css_check(current, pids_cgrp_id, true);
				228	pids = css_pids(css);
				229	pids_uncharge(pids, 1);
				230	}
				231
				232	static void pids_free(struct task_struct *task)
				233	{
				234	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
				235
				236	pids_uncharge(pids, 1);
				237	}
				238
				239	static ssize_t pids_max_write(struct kernfs_open_file of, char buf,
				240	size_t nbytes, loff_t off)
				241	{
				242	struct cgroup_subsys_state *css = of_css(of);
				243	struct pids_cgroup *pids = css_pids(css);
				244	int64_t limit;
				245	int err;
				246
				247	buf = strstrip(buf);
				248	if (!strcmp(buf, PIDS_MAX_STR)) {
				249	limit = PIDS_MAX;
				250	goto set_limit;
				251	}
				252
				253	err = kstrtoll(buf, 0, &limit);
				254	if (err)
				255	return err;
				256
				257	if (limit < 0 \|\| limit >= PIDS_MAX)
				258	return -EINVAL;
				259
				260	set_limit:
				261	/*
				262	* Limit updates don't need to be mutex'd, since it isn't
				263	* critical that any racing fork()s follow the new limit.
				264	*/
				265	pids->limit = limit;
				266	return nbytes;
				267	}
				268
				269	static int pids_max_show(struct seq_file sf, void v)
				270	{
				271	struct cgroup_subsys_state *css = seq_css(sf);
				272	struct pids_cgroup *pids = css_pids(css);
				273	int64_t limit = pids->limit;
				274
				275	if (limit >= PIDS_MAX)
				276	seq_printf(sf, "%s\n", PIDS_MAX_STR);
				277	else
				278	seq_printf(sf, "%lld\n", limit);
				279
				280	return 0;
				281	}
				282
				283	static s64 pids_current_read(struct cgroup_subsys_state *css,
				284	struct cftype *cft)
				285	{
				286	struct pids_cgroup *pids = css_pids(css);
				287
				288	return atomic64_read(&pids->counter);
				289	}
				290
				291	static struct cftype pids_files[] = {
				292	{
				293	.name = "max",
				294	.write = pids_max_write,
				295	.seq_show = pids_max_show,
				296	.flags = CFTYPE_NOT_ON_ROOT,
				297	},
				298	{
				299	.name = "current",
				300	.read_s64 = pids_current_read,
				301	.flags = CFTYPE_NOT_ON_ROOT,
				302	},
				303	{ } /* terminate */
				304	};
				305
				306	struct cgroup_subsys pids_cgrp_subsys = {
				307	.css_alloc = pids_css_alloc,
				308	.css_free = pids_css_free,
				309	.can_attach = pids_can_attach,
				310	.cancel_attach = pids_cancel_attach,
				311	.can_fork = pids_can_fork,
				312	.cancel_fork = pids_cancel_fork,
				313	.free = pids_free,
				314	.legacy_cftypes = pids_files,
				315	.dfl_cftypes = pids_files,
				316	};