Blame - kernel/rcu/update.c - codeaurora/cp-linux

blob: 5f748c5a40f0756b2d6c97fa615cde551649db4a [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/*
				2	* Read-Copy Update mechanism for mutual exclusion
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License as published by
				6	* the Free Software Foundation; either version 2 of the License, or
				7	* (at your option) any later version.
				8	*
				9	* This program is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, you can access it online at
				16	* http://www.gnu.org/licenses/gpl-2.0.html.
				17	*
				18	* Copyright IBM Corporation, 2001
				19	*
				20	* Authors: Dipankar Sarma <dipankar@in.ibm.com>
				21	* Manfred Spraul <manfred@colorfullife.com>
				22	*
				23	* Based on the original work by Paul McKenney <paulmck@us.ibm.com>
				24	* and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
				25	* Papers:
				26	* http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
				27	* http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
				28	*
				29	* For detailed explanation of Read-Copy Update mechanism see -
				30	* http://lse.sourceforge.net/locking/rcupdate.html
				31	*
				32	*/
				33	#include <linux/types.h>
				34	#include <linux/kernel.h>
				35	#include <linux/init.h>
				36	#include <linux/spinlock.h>
				37	#include <linux/smp.h>
				38	#include <linux/interrupt.h>
				39	#include <linux/sched.h>
				40	#include <linux/atomic.h>
				41	#include <linux/bitops.h>
				42	#include <linux/percpu.h>
				43	#include <linux/notifier.h>
				44	#include <linux/cpu.h>
				45	#include <linux/mutex.h>
				46	#include <linux/export.h>
				47	#include <linux/hardirq.h>
				48	#include <linux/delay.h>
				49	#include <linux/module.h>
				50	#include <linux/kthread.h>
				51	#include <linux/tick.h>
				52
				53	#define CREATE_TRACE_POINTS
				54
				55	#include "rcu.h"
				56
				57	MODULE_ALIAS("rcupdate");
				58	#ifdef MODULE_PARAM_PREFIX
				59	#undef MODULE_PARAM_PREFIX
				60	#endif
				61	#define MODULE_PARAM_PREFIX "rcupdate."
				62
				63	module_param(rcu_expedited, int, 0);
				64
				65	#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
				66	/**
				67	* rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
				68	*
				69	* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
				70	* RCU-sched read-side critical section. In absence of
				71	* CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
				72	* critical section unless it can prove otherwise. Note that disabling
				73	* of preemption (including disabling irqs) counts as an RCU-sched
				74	* read-side critical section. This is useful for debug checks in functions
				75	* that required that they be called within an RCU-sched read-side
				76	* critical section.
				77	*
				78	* Check debug_lockdep_rcu_enabled() to prevent false positives during boot
				79	* and while lockdep is disabled.
				80	*
				81	* Note that if the CPU is in the idle loop from an RCU point of
				82	* view (ie: that we are in the section between rcu_idle_enter() and
				83	* rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
				84	* did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
				85	* that are in such a section, considering these as in extended quiescent
				86	* state, so such a CPU is effectively never in an RCU read-side critical
				87	* section regardless of what RCU primitives it invokes. This state of
				88	* affairs is required --- we need to keep an RCU-free window in idle
				89	* where the CPU may possibly enter into low power mode. This way we can
				90	* notice an extended quiescent state to other CPUs that started a grace
				91	* period. Otherwise we would delay any grace period as long as we run in
				92	* the idle task.
				93	*
				94	* Similarly, we avoid claiming an SRCU read lock held if the current
				95	* CPU is offline.
				96	*/
				97	int rcu_read_lock_sched_held(void)
				98	{
				99	int lockdep_opinion = 0;
				100
				101	if (!debug_lockdep_rcu_enabled())
				102	return 1;
				103	if (!rcu_is_watching())
				104	return 0;
				105	if (!rcu_lockdep_current_cpu_online())
				106	return 0;
				107	if (debug_locks)
				108	lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
				109	return lockdep_opinion \|\| preempt_count() != 0 \|\| irqs_disabled();
				110	}
				111	EXPORT_SYMBOL(rcu_read_lock_sched_held);
				112	#endif
				113
				114	#ifndef CONFIG_TINY_RCU
				115
				116	static atomic_t rcu_expedited_nesting =
				117	ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
				118
				119	/*
				120	* Should normal grace-period primitives be expedited? Intended for
				121	* use within RCU. Note that this function takes the rcu_expedited
				122	* sysfs/boot variable into account as well as the rcu_expedite_gp()
				123	* nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited()
				124	* returns false is a -really- bad idea.
				125	*/
				126	bool rcu_gp_is_expedited(void)
				127	{
				128	return rcu_expedited \|\| atomic_read(&rcu_expedited_nesting);
				129	}
				130	EXPORT_SYMBOL_GPL(rcu_gp_is_expedited);
				131
				132	/**
				133	* rcu_expedite_gp - Expedite future RCU grace periods
				134	*
				135	* After a call to this function, future calls to synchronize_rcu() and
				136	* friends act as the corresponding synchronize_rcu_expedited() function
				137	* had instead been called.
				138	*/
				139	void rcu_expedite_gp(void)
				140	{
				141	atomic_inc(&rcu_expedited_nesting);
				142	}
				143	EXPORT_SYMBOL_GPL(rcu_expedite_gp);
				144
				145	/**
				146	* rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation
				147	*
				148	* Undo a prior call to rcu_expedite_gp(). If all prior calls to
				149	* rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(),
				150	* and if the rcu_expedited sysfs/boot parameter is not set, then all
				151	* subsequent calls to synchronize_rcu() and friends will return to
				152	* their normal non-expedited behavior.
				153	*/
				154	void rcu_unexpedite_gp(void)
				155	{
				156	atomic_dec(&rcu_expedited_nesting);
				157	}
				158	EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
				159
				160	#endif /* #ifndef CONFIG_TINY_RCU */
				161
				162	/*
				163	* Inform RCU of the end of the in-kernel boot sequence.
				164	*/
				165	void rcu_end_inkernel_boot(void)
				166	{
				167	if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
				168	rcu_unexpedite_gp();
				169	}
				170
				171	#ifdef CONFIG_PREEMPT_RCU
				172
				173	/*
				174	* Preemptible RCU implementation for rcu_read_lock().
				175	* Just increment ->rcu_read_lock_nesting, shared state will be updated
				176	* if we block.
				177	*/
				178	void __rcu_read_lock(void)
				179	{
				180	current->rcu_read_lock_nesting++;
				181	barrier(); /* critical section after entry code. */
				182	}
				183	EXPORT_SYMBOL_GPL(__rcu_read_lock);
				184
				185	/*
				186	* Preemptible RCU implementation for rcu_read_unlock().
				187	* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
				188	* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
				189	* invoke rcu_read_unlock_special() to clean up after a context switch
				190	* in an RCU read-side critical section and other special cases.
				191	*/
				192	void __rcu_read_unlock(void)
				193	{
				194	struct task_struct *t = current;
				195
				196	if (t->rcu_read_lock_nesting != 1) {
				197	--t->rcu_read_lock_nesting;
				198	} else {
				199	barrier(); /* critical section before exit code. */
				200	t->rcu_read_lock_nesting = INT_MIN;
				201	barrier(); /* assign before ->rcu_read_unlock_special load */
				202	if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
				203	rcu_read_unlock_special(t);
				204	barrier(); /* ->rcu_read_unlock_special load before assign */
				205	t->rcu_read_lock_nesting = 0;
				206	}
				207	#ifdef CONFIG_PROVE_LOCKING
				208	{
				209	int rrln = READ_ONCE(t->rcu_read_lock_nesting);
				210
				211	WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
				212	}
				213	#endif /* #ifdef CONFIG_PROVE_LOCKING */
				214	}
				215	EXPORT_SYMBOL_GPL(__rcu_read_unlock);
				216
				217	#endif /* #ifdef CONFIG_PREEMPT_RCU */
				218
				219	#ifdef CONFIG_DEBUG_LOCK_ALLOC
				220	static struct lock_class_key rcu_lock_key;
				221	struct lockdep_map rcu_lock_map =
				222	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
				223	EXPORT_SYMBOL_GPL(rcu_lock_map);
				224
				225	static struct lock_class_key rcu_bh_lock_key;
				226	struct lockdep_map rcu_bh_lock_map =
				227	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
				228	EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
				229
				230	static struct lock_class_key rcu_sched_lock_key;
				231	struct lockdep_map rcu_sched_lock_map =
				232	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
				233	EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
				234
				235	static struct lock_class_key rcu_callback_key;
				236	struct lockdep_map rcu_callback_map =
				237	STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
				238	EXPORT_SYMBOL_GPL(rcu_callback_map);
				239
				240	int notrace debug_lockdep_rcu_enabled(void)
				241	{
				242	return rcu_scheduler_active && debug_locks &&
				243	current->lockdep_recursion == 0;
				244	}
				245	EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
				246
				247	/**
				248	* rcu_read_lock_held() - might we be in RCU read-side critical section?
				249	*
				250	* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
				251	* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
				252	* this assumes we are in an RCU read-side critical section unless it can
				253	* prove otherwise. This is useful for debug checks in functions that
				254	* require that they be called within an RCU read-side critical section.
				255	*
				256	* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
				257	* and while lockdep is disabled.
				258	*
				259	* Note that rcu_read_lock() and the matching rcu_read_unlock() must
				260	* occur in the same context, for example, it is illegal to invoke
				261	* rcu_read_unlock() in process context if the matching rcu_read_lock()
				262	* was invoked from within an irq handler.
				263	*
				264	* Note that rcu_read_lock() is disallowed if the CPU is either idle or
				265	* offline from an RCU perspective, so check for those as well.
				266	*/
				267	int rcu_read_lock_held(void)
				268	{
				269	if (!debug_lockdep_rcu_enabled())
				270	return 1;
				271	if (!rcu_is_watching())
				272	return 0;
				273	if (!rcu_lockdep_current_cpu_online())
				274	return 0;
				275	return lock_is_held(&rcu_lock_map);
				276	}
				277	EXPORT_SYMBOL_GPL(rcu_read_lock_held);
				278
				279	/**
				280	* rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
				281	*
				282	* Check for bottom half being disabled, which covers both the
				283	* CONFIG_PROVE_RCU and not cases. Note that if someone uses
				284	* rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
				285	* will show the situation. This is useful for debug checks in functions
				286	* that require that they be called within an RCU read-side critical
				287	* section.
				288	*
				289	* Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
				290	*
				291	* Note that rcu_read_lock() is disallowed if the CPU is either idle or
				292	* offline from an RCU perspective, so check for those as well.
				293	*/
				294	int rcu_read_lock_bh_held(void)
				295	{
				296	if (!debug_lockdep_rcu_enabled())
				297	return 1;
				298	if (!rcu_is_watching())
				299	return 0;
				300	if (!rcu_lockdep_current_cpu_online())
				301	return 0;
				302	return in_softirq() \|\| irqs_disabled();
				303	}
				304	EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
				305
				306	#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
				307
				308	/**
				309	* wakeme_after_rcu() - Callback function to awaken a task after grace period
				310	* @head: Pointer to rcu_head member within rcu_synchronize structure
				311	*
				312	* Awaken the corresponding task now that a grace period has elapsed.
				313	*/
				314	void wakeme_after_rcu(struct rcu_head *head)
				315	{
				316	struct rcu_synchronize *rcu;
				317
				318	rcu = container_of(head, struct rcu_synchronize, head);
				319	complete(&rcu->completion);
				320	}
				321	EXPORT_SYMBOL_GPL(wakeme_after_rcu);
				322
				323	void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
				324	struct rcu_synchronize *rs_array)
				325	{
				326	int i;
				327
				328	/* Initialize and register callbacks for each flavor specified. */
				329	for (i = 0; i < n; i++) {
				330	if (checktiny &&
				331	(crcu_array[i] == call_rcu \|\|
				332	crcu_array[i] == call_rcu_bh)) {
				333	might_sleep();
				334	continue;
				335	}
				336	init_rcu_head_on_stack(&rs_array[i].head);
				337	init_completion(&rs_array[i].completion);
				338	(crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
				339	}
				340
				341	/* Wait for all callbacks to be invoked. */
				342	for (i = 0; i < n; i++) {
				343	if (checktiny &&
				344	(crcu_array[i] == call_rcu \|\|
				345	crcu_array[i] == call_rcu_bh))
				346	continue;
				347	wait_for_completion(&rs_array[i].completion);
				348	destroy_rcu_head_on_stack(&rs_array[i].head);
				349	}
				350	}
				351	EXPORT_SYMBOL_GPL(__wait_rcu_gp);
				352
				353	#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
				354	void init_rcu_head(struct rcu_head *head)
				355	{
				356	debug_object_init(head, &rcuhead_debug_descr);
				357	}
				358
				359	void destroy_rcu_head(struct rcu_head *head)
				360	{
				361	debug_object_free(head, &rcuhead_debug_descr);
				362	}
				363
				364	/*
				365	* fixup_activate is called when:
				366	* - an active object is activated
				367	* - an unknown object is activated (might be a statically initialized object)
				368	* Activation is performed internally by call_rcu().
				369	*/
				370	static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
				371	{
				372	struct rcu_head *head = addr;
				373
				374	switch (state) {
				375
				376	case ODEBUG_STATE_NOTAVAILABLE:
				377	/*
				378	* This is not really a fixup. We just make sure that it is
				379	* tracked in the object tracker.
				380	*/
				381	debug_object_init(head, &rcuhead_debug_descr);
				382	debug_object_activate(head, &rcuhead_debug_descr);
				383	return 0;
				384	default:
				385	return 1;
				386	}
				387	}
				388
				389	/**
				390	* init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
				391	* @head: pointer to rcu_head structure to be initialized
				392	*
				393	* This function informs debugobjects of a new rcu_head structure that
				394	* has been allocated as an auto variable on the stack. This function
				395	* is not required for rcu_head structures that are statically defined or
				396	* that are dynamically allocated on the heap. This function has no
				397	* effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
				398	*/
				399	void init_rcu_head_on_stack(struct rcu_head *head)
				400	{
				401	debug_object_init_on_stack(head, &rcuhead_debug_descr);
				402	}
				403	EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
				404
				405	/**
				406	* destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
				407	* @head: pointer to rcu_head structure to be initialized
				408	*
				409	* This function informs debugobjects that an on-stack rcu_head structure
				410	* is about to go out of scope. As with init_rcu_head_on_stack(), this
				411	* function is not required for rcu_head structures that are statically
				412	* defined or that are dynamically allocated on the heap. Also as with
				413	* init_rcu_head_on_stack(), this function has no effect for
				414	* !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
				415	*/
				416	void destroy_rcu_head_on_stack(struct rcu_head *head)
				417	{
				418	debug_object_free(head, &rcuhead_debug_descr);
				419	}
				420	EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
				421
				422	struct debug_obj_descr rcuhead_debug_descr = {
				423	.name = "rcu_head",
				424	.fixup_activate = rcuhead_fixup_activate,
				425	};
				426	EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
				427	#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
				428
				429	#if defined(CONFIG_TREE_RCU) \|\| defined(CONFIG_PREEMPT_RCU) \|\| defined(CONFIG_RCU_TRACE)
				430	void do_trace_rcu_torture_read(const char rcutorturename, struct rcu_head rhp,
				431	unsigned long secs,
				432	unsigned long c_old, unsigned long c)
				433	{
				434	trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
				435	}
				436	EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
				437	#else
				438	#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
				439	do { } while (0)
				440	#endif
				441
				442	#ifdef CONFIG_RCU_STALL_COMMON
				443
				444	#ifdef CONFIG_PROVE_RCU
				445	#define RCU_STALL_DELAY_DELTA (5 * HZ)
				446	#else
				447	#define RCU_STALL_DELAY_DELTA 0
				448	#endif
				449
				450	int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
				451	static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
				452
				453	module_param(rcu_cpu_stall_suppress, int, 0644);
				454	module_param(rcu_cpu_stall_timeout, int, 0644);
				455
				456	int rcu_jiffies_till_stall_check(void)
				457	{
				458	int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
				459
				460	/*
				461	* Limit check must be consistent with the Kconfig limits
				462	* for CONFIG_RCU_CPU_STALL_TIMEOUT.
				463	*/
				464	if (till_stall_check < 3) {
				465	WRITE_ONCE(rcu_cpu_stall_timeout, 3);
				466	till_stall_check = 3;
				467	} else if (till_stall_check > 300) {
				468	WRITE_ONCE(rcu_cpu_stall_timeout, 300);
				469	till_stall_check = 300;
				470	}
				471	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
				472	}
				473
				474	void rcu_sysrq_start(void)
				475	{
				476	if (!rcu_cpu_stall_suppress)
				477	rcu_cpu_stall_suppress = 2;
				478	}
				479
				480	void rcu_sysrq_end(void)
				481	{
				482	if (rcu_cpu_stall_suppress == 2)
				483	rcu_cpu_stall_suppress = 0;
				484	}
				485
				486	static int rcu_panic(struct notifier_block this, unsigned long ev, void ptr)
				487	{
				488	rcu_cpu_stall_suppress = 1;
				489	return NOTIFY_DONE;
				490	}
				491
				492	static struct notifier_block rcu_panic_block = {
				493	.notifier_call = rcu_panic,
				494	};
				495
				496	static int __init check_cpu_stall_init(void)
				497	{
				498	atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
				499	return 0;
				500	}
				501	early_initcall(check_cpu_stall_init);
				502
				503	#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
				504
				505	#ifdef CONFIG_TASKS_RCU
				506
				507	/*
				508	* Simple variant of RCU whose quiescent states are voluntary context switch,
				509	* user-space execution, and idle. As such, grace periods can take one good
				510	* long time. There are no read-side primitives similar to rcu_read_lock()
				511	* and rcu_read_unlock() because this implementation is intended to get
				512	* the system into a safe state for some of the manipulations involved in
				513	* tracing and the like. Finally, this implementation does not support
				514	* high call_rcu_tasks() rates from multiple CPUs. If this is required,
				515	* per-CPU callback lists will be needed.
				516	*/
				517
				518	/* Global list of callbacks and associated lock. */
				519	static struct rcu_head *rcu_tasks_cbs_head;
				520	static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
				521	static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
				522	static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
				523
				524	/* Track exiting tasks in order to allow them to be waited for. */
				525	DEFINE_SRCU(tasks_rcu_exit_srcu);
				526
				527	/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
				528	static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
				529	module_param(rcu_task_stall_timeout, int, 0644);
				530
				531	static void rcu_spawn_tasks_kthread(void);
				532
				533	/*
				534	* Post an RCU-tasks callback. First call must be from process context
				535	* after the scheduler if fully operational.
				536	*/
				537	void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
				538	{
				539	unsigned long flags;
				540	bool needwake;
				541
				542	rhp->next = NULL;
				543	rhp->func = func;
				544	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
				545	needwake = !rcu_tasks_cbs_head;
				546	*rcu_tasks_cbs_tail = rhp;
				547	rcu_tasks_cbs_tail = &rhp->next;
				548	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
				549	if (needwake) {
				550	rcu_spawn_tasks_kthread();
				551	wake_up(&rcu_tasks_cbs_wq);
				552	}
				553	}
				554	EXPORT_SYMBOL_GPL(call_rcu_tasks);
				555
				556	/**
				557	* synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
				558	*
				559	* Control will return to the caller some time after a full rcu-tasks
				560	* grace period has elapsed, in other words after all currently
				561	* executing rcu-tasks read-side critical sections have elapsed. These
				562	* read-side critical sections are delimited by calls to schedule(),
				563	* cond_resched_rcu_qs(), idle execution, userspace execution, calls
				564	* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
				565	*
				566	* This is a very specialized primitive, intended only for a few uses in
				567	* tracing and other situations requiring manipulation of function
				568	* preambles and profiling hooks. The synchronize_rcu_tasks() function
				569	* is not (yet) intended for heavy use from multiple CPUs.
				570	*
				571	* Note that this guarantee implies further memory-ordering guarantees.
				572	* On systems with more than one CPU, when synchronize_rcu_tasks() returns,
				573	* each CPU is guaranteed to have executed a full memory barrier since the
				574	* end of its last RCU-tasks read-side critical section whose beginning
				575	* preceded the call to synchronize_rcu_tasks(). In addition, each CPU
				576	* having an RCU-tasks read-side critical section that extends beyond
				577	* the return from synchronize_rcu_tasks() is guaranteed to have executed
				578	* a full memory barrier after the beginning of synchronize_rcu_tasks()
				579	* and before the beginning of that RCU-tasks read-side critical section.
				580	* Note that these guarantees include CPUs that are offline, idle, or
				581	* executing in user mode, as well as CPUs that are executing in the kernel.
				582	*
				583	* Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
				584	* to its caller on CPU B, then both CPU A and CPU B are guaranteed
				585	* to have executed a full memory barrier during the execution of
				586	* synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
				587	* (but again only if the system has more than one CPU).
				588	*/
				589	void synchronize_rcu_tasks(void)
				590	{
				591	/* Complain if the scheduler has not started. */
				592	RCU_LOCKDEP_WARN(!rcu_scheduler_active,
				593	"synchronize_rcu_tasks called too soon");
				594
				595	/* Wait for the grace period. */
				596	wait_rcu_gp(call_rcu_tasks);
				597	}
				598	EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
				599
				600	/**
				601	* rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
				602	*
				603	* Although the current implementation is guaranteed to wait, it is not
				604	* obligated to, for example, if there are no pending callbacks.
				605	*/
				606	void rcu_barrier_tasks(void)
				607	{
				608	/* There is only one callback queue, so this is easy. ;-) */
				609	synchronize_rcu_tasks();
				610	}
				611	EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
				612
				613	/* See if tasks are still holding out, complain if so. */
				614	static void check_holdout_task(struct task_struct *t,
				615	bool needreport, bool *firstreport)
				616	{
				617	int cpu;
				618
				619	if (!READ_ONCE(t->rcu_tasks_holdout) \|\|
				620	t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) \|\|
				621	!READ_ONCE(t->on_rq) \|\|
				622	(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
				623	!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
				624	WRITE_ONCE(t->rcu_tasks_holdout, false);
				625	list_del_init(&t->rcu_tasks_holdout_list);
				626	put_task_struct(t);
				627	return;
				628	}
				629	if (!needreport)
				630	return;
				631	if (*firstreport) {
				632	pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
				633	*firstreport = false;
				634	}
				635	cpu = task_cpu(t);
				636	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
				637	t, ".I"[is_idle_task(t)],
				638	"N."[cpu < 0 \|\| !tick_nohz_full_cpu(cpu)],
				639	t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
				640	t->rcu_tasks_idle_cpu, cpu);
				641	sched_show_task(t);
				642	}
				643
				644	/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
				645	static int __noreturn rcu_tasks_kthread(void *arg)
				646	{
				647	unsigned long flags;
				648	struct task_struct g, t;
				649	unsigned long lastreport;
				650	struct rcu_head *list;
				651	struct rcu_head *next;
				652	LIST_HEAD(rcu_tasks_holdouts);
				653
				654	/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
				655	housekeeping_affine(current);
				656
				657	/*
				658	* Each pass through the following loop makes one check for
				659	* newly arrived callbacks, and, if there are some, waits for
				660	* one RCU-tasks grace period and then invokes the callbacks.
				661	* This loop is terminated by the system going down. ;-)
				662	*/
				663	for (;;) {
				664
				665	/* Pick up any new callbacks. */
				666	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
				667	list = rcu_tasks_cbs_head;
				668	rcu_tasks_cbs_head = NULL;
				669	rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
				670	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
				671
				672	/* If there were none, wait a bit and start over. */
				673	if (!list) {
				674	wait_event_interruptible(rcu_tasks_cbs_wq,
				675	rcu_tasks_cbs_head);
				676	if (!rcu_tasks_cbs_head) {
				677	WARN_ON(signal_pending(current));
				678	schedule_timeout_interruptible(HZ/10);
				679	}
				680	continue;
				681	}
				682
				683	/*
				684	* Wait for all pre-existing t->on_rq and t->nvcsw
				685	* transitions to complete. Invoking synchronize_sched()
				686	* suffices because all these transitions occur with
				687	* interrupts disabled. Without this synchronize_sched(),
				688	* a read-side critical section that started before the
				689	* grace period might be incorrectly seen as having started
				690	* after the grace period.
				691	*
				692	* This synchronize_sched() also dispenses with the
				693	* need for a memory barrier on the first store to
				694	* ->rcu_tasks_holdout, as it forces the store to happen
				695	* after the beginning of the grace period.
				696	*/
				697	synchronize_sched();
				698
				699	/*
				700	* There were callbacks, so we need to wait for an
				701	* RCU-tasks grace period. Start off by scanning
				702	* the task list for tasks that are not already
				703	* voluntarily blocked. Mark these tasks and make
				704	* a list of them in rcu_tasks_holdouts.
				705	*/
				706	rcu_read_lock();
				707	for_each_process_thread(g, t) {
				708	if (t != current && READ_ONCE(t->on_rq) &&
				709	!is_idle_task(t)) {
				710	get_task_struct(t);
				711	t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
				712	WRITE_ONCE(t->rcu_tasks_holdout, true);
				713	list_add(&t->rcu_tasks_holdout_list,
				714	&rcu_tasks_holdouts);
				715	}
				716	}
				717	rcu_read_unlock();
				718
				719	/*
				720	* Wait for tasks that are in the process of exiting.
				721	* This does only part of the job, ensuring that all
				722	* tasks that were previously exiting reach the point
				723	* where they have disabled preemption, allowing the
				724	* later synchronize_sched() to finish the job.
				725	*/
				726	synchronize_srcu(&tasks_rcu_exit_srcu);
				727
				728	/*
				729	* Each pass through the following loop scans the list
				730	* of holdout tasks, removing any that are no longer
				731	* holdouts. When the list is empty, we are done.
				732	*/
				733	lastreport = jiffies;
				734	while (!list_empty(&rcu_tasks_holdouts)) {
				735	bool firstreport;
				736	bool needreport;
				737	int rtst;
				738	struct task_struct *t1;
				739
				740	schedule_timeout_interruptible(HZ);
				741	rtst = READ_ONCE(rcu_task_stall_timeout);
				742	needreport = rtst > 0 &&
				743	time_after(jiffies, lastreport + rtst);
				744	if (needreport)
				745	lastreport = jiffies;
				746	firstreport = true;
				747	WARN_ON(signal_pending(current));
				748	list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
				749	rcu_tasks_holdout_list) {
				750	check_holdout_task(t, needreport, &firstreport);
				751	cond_resched();
				752	}
				753	}
				754
				755	/*
				756	* Because ->on_rq and ->nvcsw are not guaranteed
				757	* to have a full memory barriers prior to them in the
				758	* schedule() path, memory reordering on other CPUs could
				759	* cause their RCU-tasks read-side critical sections to
				760	* extend past the end of the grace period. However,
				761	* because these ->nvcsw updates are carried out with
				762	* interrupts disabled, we can use synchronize_sched()
				763	* to force the needed ordering on all such CPUs.
				764	*
				765	* This synchronize_sched() also confines all
				766	* ->rcu_tasks_holdout accesses to be within the grace
				767	* period, avoiding the need for memory barriers for
				768	* ->rcu_tasks_holdout accesses.
				769	*
				770	* In addition, this synchronize_sched() waits for exiting
				771	* tasks to complete their final preempt_disable() region
				772	* of execution, cleaning up after the synchronize_srcu()
				773	* above.
				774	*/
				775	synchronize_sched();
				776
				777	/* Invoke the callbacks. */
				778	while (list) {
				779	next = list->next;
				780	local_bh_disable();
				781	list->func(list);
				782	local_bh_enable();
				783	list = next;
				784	cond_resched();
				785	}
				786	schedule_timeout_uninterruptible(HZ/10);
				787	}
				788	}
				789
				790	/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
				791	static void rcu_spawn_tasks_kthread(void)
				792	{
				793	static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
				794	static struct task_struct *rcu_tasks_kthread_ptr;
				795	struct task_struct *t;
				796
				797	if (READ_ONCE(rcu_tasks_kthread_ptr)) {
				798	smp_mb(); /* Ensure caller sees full kthread. */
				799	return;
				800	}
				801	mutex_lock(&rcu_tasks_kthread_mutex);
				802	if (rcu_tasks_kthread_ptr) {
				803	mutex_unlock(&rcu_tasks_kthread_mutex);
				804	return;
				805	}
				806	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
				807	BUG_ON(IS_ERR(t));
				808	smp_mb(); /* Ensure others see full kthread. */
				809	WRITE_ONCE(rcu_tasks_kthread_ptr, t);
				810	mutex_unlock(&rcu_tasks_kthread_mutex);
				811	}
				812
				813	#endif /* #ifdef CONFIG_TASKS_RCU */
				814
				815	#ifdef CONFIG_PROVE_RCU
				816
				817	/*
				818	* Early boot self test parameters, one for each flavor
				819	*/
				820	static bool rcu_self_test;
				821	static bool rcu_self_test_bh;
				822	static bool rcu_self_test_sched;
				823
				824	module_param(rcu_self_test, bool, 0444);
				825	module_param(rcu_self_test_bh, bool, 0444);
				826	module_param(rcu_self_test_sched, bool, 0444);
				827
				828	static int rcu_self_test_counter;
				829
				830	static void test_callback(struct rcu_head *r)
				831	{
				832	rcu_self_test_counter++;
				833	pr_info("RCU test callback executed %d\n", rcu_self_test_counter);
				834	}
				835
				836	static void early_boot_test_call_rcu(void)
				837	{
				838	static struct rcu_head head;
				839
				840	call_rcu(&head, test_callback);
				841	}
				842
				843	static void early_boot_test_call_rcu_bh(void)
				844	{
				845	static struct rcu_head head;
				846
				847	call_rcu_bh(&head, test_callback);
				848	}
				849
				850	static void early_boot_test_call_rcu_sched(void)
				851	{
				852	static struct rcu_head head;
				853
				854	call_rcu_sched(&head, test_callback);
				855	}
				856
				857	void rcu_early_boot_tests(void)
				858	{
				859	pr_info("Running RCU self tests\n");
				860
				861	if (rcu_self_test)
				862	early_boot_test_call_rcu();
				863	if (rcu_self_test_bh)
				864	early_boot_test_call_rcu_bh();
				865	if (rcu_self_test_sched)
				866	early_boot_test_call_rcu_sched();
				867	}
				868
				869	static int rcu_verify_early_boot_tests(void)
				870	{
				871	int ret = 0;
				872	int early_boot_test_counter = 0;
				873
				874	if (rcu_self_test) {
				875	early_boot_test_counter++;
				876	rcu_barrier();
				877	}
				878	if (rcu_self_test_bh) {
				879	early_boot_test_counter++;
				880	rcu_barrier_bh();
				881	}
				882	if (rcu_self_test_sched) {
				883	early_boot_test_counter++;
				884	rcu_barrier_sched();
				885	}
				886
				887	if (rcu_self_test_counter != early_boot_test_counter) {
				888	WARN_ON(1);
				889	ret = -1;
				890	}
				891
				892	return ret;
				893	}
				894	late_initcall(rcu_verify_early_boot_tests);
				895	#else
				896	void rcu_early_boot_tests(void) {}
				897	#endif /* CONFIG_PROVE_RCU */