Blame - kernel/time/posix-cpu-timers.c - codeaurora/cp-linux

blob: 80016b329d94427d83abfb28988b2d268846a0e4 [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/*
				2	* Implement CPU time clocks for the POSIX clock interface.
				3	*/
				4
				5	#include <linux/sched.h>
				6	#include <linux/posix-timers.h>
				7	#include <linux/errno.h>
				8	#include <linux/math64.h>
				9	#include <asm/uaccess.h>
				10	#include <linux/kernel_stat.h>
				11	#include <trace/events/timer.h>
				12	#include <linux/random.h>
				13	#include <linux/tick.h>
				14	#include <linux/workqueue.h>
				15
				16	/*
				17	* Called after updating RLIMIT_CPU to run cpu timer and update
				18	* tsk->signal->cputime_expires expiration cache if necessary. Needs
				19	* siglock protection since other code may update expiration cache as
				20	* well.
				21	*/
				22	void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
				23	{
				24	cputime_t cputime = secs_to_cputime(rlim_new);
				25
				26	spin_lock_irq(&task->sighand->siglock);
				27	set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
				28	spin_unlock_irq(&task->sighand->siglock);
				29	}
				30
				31	static int check_clock(const clockid_t which_clock)
				32	{
				33	int error = 0;
				34	struct task_struct *p;
				35	const pid_t pid = CPUCLOCK_PID(which_clock);
				36
				37	if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
				38	return -EINVAL;
				39
				40	if (pid == 0)
				41	return 0;
				42
				43	rcu_read_lock();
				44	p = find_task_by_vpid(pid);
				45	if (!p \|\| !(CPUCLOCK_PERTHREAD(which_clock) ?
				46	same_thread_group(p, current) : has_group_leader_pid(p))) {
				47	error = -EINVAL;
				48	}
				49	rcu_read_unlock();
				50
				51	return error;
				52	}
				53
				54	static inline unsigned long long
				55	timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
				56	{
				57	unsigned long long ret;
				58
				59	ret = 0; /* high half always zero when .cpu used */
				60	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
				61	ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
				62	} else {
				63	ret = cputime_to_expires(timespec_to_cputime(tp));
				64	}
				65	return ret;
				66	}
				67
				68	static void sample_to_timespec(const clockid_t which_clock,
				69	unsigned long long expires,
				70	struct timespec *tp)
				71	{
				72	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
				73	*tp = ns_to_timespec(expires);
				74	else
				75	cputime_to_timespec((__force cputime_t)expires, tp);
				76	}
				77
				78	/*
				79	* Update expiry time from increment, and increase overrun count,
				80	* given the current clock sample.
				81	*/
				82	static void bump_cpu_timer(struct k_itimer *timer,
				83	unsigned long long now)
				84	{
				85	int i;
				86	unsigned long long delta, incr;
				87
				88	if (timer->it.cpu.incr == 0)
				89	return;
				90
				91	if (now < timer->it.cpu.expires)
				92	return;
				93
				94	incr = timer->it.cpu.incr;
				95	delta = now + incr - timer->it.cpu.expires;
				96
				97	/* Don't use (incr2 < delta), incr2 might overflow. */
				98	for (i = 0; incr < delta - incr; i++)
				99	incr = incr << 1;
				100
				101	for (; i >= 0; incr >>= 1, i--) {
				102	if (delta < incr)
				103	continue;
				104
				105	timer->it.cpu.expires += incr;
				106	timer->it_overrun += 1 << i;
				107	delta -= incr;
				108	}
				109	}
				110
				111	/**
				112	* task_cputime_zero - Check a task_cputime struct for all zero fields.
				113	*
				114	* @cputime: The struct to compare.
				115	*
				116	* Checks @cputime to see if all fields are zero. Returns true if all fields
				117	* are zero, false if any field is nonzero.
				118	*/
				119	static inline int task_cputime_zero(const struct task_cputime *cputime)
				120	{
				121	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
				122	return 1;
				123	return 0;
				124	}
				125
				126	static inline unsigned long long prof_ticks(struct task_struct *p)
				127	{
				128	cputime_t utime, stime;
				129
				130	task_cputime(p, &utime, &stime);
				131
				132	return cputime_to_expires(utime + stime);
				133	}
				134	static inline unsigned long long virt_ticks(struct task_struct *p)
				135	{
				136	cputime_t utime;
				137
				138	task_cputime(p, &utime, NULL);
				139
				140	return cputime_to_expires(utime);
				141	}
				142
				143	static int
				144	posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
				145	{
				146	int error = check_clock(which_clock);
				147	if (!error) {
				148	tp->tv_sec = 0;
				149	tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
				150	if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
				151	/*
				152	* If sched_clock is using a cycle counter, we
				153	* don't have any idea of its true resolution
				154	* exported, but it is much more than 1s/HZ.
				155	*/
				156	tp->tv_nsec = 1;
				157	}
				158	}
				159	return error;
				160	}
				161
				162	static int
				163	posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
				164	{
				165	/*
				166	* You can never reset a CPU clock, but we check for other errors
				167	* in the call before failing with EPERM.
				168	*/
				169	int error = check_clock(which_clock);
				170	if (error == 0) {
				171	error = -EPERM;
				172	}
				173	return error;
				174	}
				175
				176
				177	/*
				178	* Sample a per-thread clock for the given task.
				179	*/
				180	static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
				181	unsigned long long *sample)
				182	{
				183	switch (CPUCLOCK_WHICH(which_clock)) {
				184	default:
				185	return -EINVAL;
				186	case CPUCLOCK_PROF:
				187	*sample = prof_ticks(p);
				188	break;
				189	case CPUCLOCK_VIRT:
				190	*sample = virt_ticks(p);
				191	break;
				192	case CPUCLOCK_SCHED:
				193	*sample = task_sched_runtime(p);
				194	break;
				195	}
				196	return 0;
				197	}
				198
				199	/*
				200	* Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
				201	* to avoid race conditions with concurrent updates to cputime.
				202	*/
				203	static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
				204	{
				205	u64 curr_cputime;
				206	retry:
				207	curr_cputime = atomic64_read(cputime);
				208	if (sum_cputime > curr_cputime) {
				209	if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
				210	goto retry;
				211	}
				212	}
				213
				214	static void update_gt_cputime(struct task_cputime_atomic cputime_atomic, struct task_cputime sum)
				215	{
				216	__update_gt_cputime(&cputime_atomic->utime, sum->utime);
				217	__update_gt_cputime(&cputime_atomic->stime, sum->stime);
				218	__update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
				219	}
				220
				221	/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
				222	static inline void sample_cputime_atomic(struct task_cputime *times,
				223	struct task_cputime_atomic *atomic_times)
				224	{
				225	times->utime = atomic64_read(&atomic_times->utime);
				226	times->stime = atomic64_read(&atomic_times->stime);
				227	times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
				228	}
				229
				230	void thread_group_cputimer(struct task_struct tsk, struct task_cputime times)
				231	{
				232	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
				233	struct task_cputime sum;
				234
				235	/* Check if cputimer isn't running. This is accessed without locking. */
				236	if (!READ_ONCE(cputimer->running)) {
				237	/*
				238	* The POSIX timer interface allows for absolute time expiry
				239	* values through the TIMER_ABSTIME flag, therefore we have
				240	* to synchronize the timer to the clock every time we start it.
				241	*/
				242	thread_group_cputime(tsk, &sum);
				243	update_gt_cputime(&cputimer->cputime_atomic, &sum);
				244
				245	/*
				246	* We're setting cputimer->running without a lock. Ensure
				247	* this only gets written to in one operation. We set
				248	* running after update_gt_cputime() as a small optimization,
				249	* but barriers are not required because update_gt_cputime()
				250	* can handle concurrent updates.
				251	*/
				252	WRITE_ONCE(cputimer->running, true);
				253	}
				254	sample_cputime_atomic(times, &cputimer->cputime_atomic);
				255	}
				256
				257	/*
				258	* Sample a process (thread group) clock for the given group_leader task.
				259	* Must be called with task sighand lock held for safe while_each_thread()
				260	* traversal.
				261	*/
				262	static int cpu_clock_sample_group(const clockid_t which_clock,
				263	struct task_struct *p,
				264	unsigned long long *sample)
				265	{
				266	struct task_cputime cputime;
				267
				268	switch (CPUCLOCK_WHICH(which_clock)) {
				269	default:
				270	return -EINVAL;
				271	case CPUCLOCK_PROF:
				272	thread_group_cputime(p, &cputime);
				273	*sample = cputime_to_expires(cputime.utime + cputime.stime);
				274	break;
				275	case CPUCLOCK_VIRT:
				276	thread_group_cputime(p, &cputime);
				277	*sample = cputime_to_expires(cputime.utime);
				278	break;
				279	case CPUCLOCK_SCHED:
				280	thread_group_cputime(p, &cputime);
				281	*sample = cputime.sum_exec_runtime;
				282	break;
				283	}
				284	return 0;
				285	}
				286
				287	static int posix_cpu_clock_get_task(struct task_struct *tsk,
				288	const clockid_t which_clock,
				289	struct timespec *tp)
				290	{
				291	int err = -EINVAL;
				292	unsigned long long rtn;
				293
				294	if (CPUCLOCK_PERTHREAD(which_clock)) {
				295	if (same_thread_group(tsk, current))
				296	err = cpu_clock_sample(which_clock, tsk, &rtn);
				297	} else {
				298	if (tsk == current \|\| thread_group_leader(tsk))
				299	err = cpu_clock_sample_group(which_clock, tsk, &rtn);
				300	}
				301
				302	if (!err)
				303	sample_to_timespec(which_clock, rtn, tp);
				304
				305	return err;
				306	}
				307
				308
				309	static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
				310	{
				311	const pid_t pid = CPUCLOCK_PID(which_clock);
				312	int err = -EINVAL;
				313
				314	if (pid == 0) {
				315	/*
				316	* Special case constant value for our own clocks.
				317	* We don't have to do any lookup to find ourselves.
				318	*/
				319	err = posix_cpu_clock_get_task(current, which_clock, tp);
				320	} else {
				321	/*
				322	* Find the given PID, and validate that the caller
				323	* should be able to see it.
				324	*/
				325	struct task_struct *p;
				326	rcu_read_lock();
				327	p = find_task_by_vpid(pid);
				328	if (p)
				329	err = posix_cpu_clock_get_task(p, which_clock, tp);
				330	rcu_read_unlock();
				331	}
				332
				333	return err;
				334	}
				335
				336
				337	/*
				338	* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
				339	* This is called from sys_timer_create() and do_cpu_nanosleep() with the
				340	* new timer already all-zeros initialized.
				341	*/
				342	static int posix_cpu_timer_create(struct k_itimer *new_timer)
				343	{
				344	int ret = 0;
				345	const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
				346	struct task_struct *p;
				347
				348	if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
				349	return -EINVAL;
				350
				351	INIT_LIST_HEAD(&new_timer->it.cpu.entry);
				352
				353	rcu_read_lock();
				354	if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
				355	if (pid == 0) {
				356	p = current;
				357	} else {
				358	p = find_task_by_vpid(pid);
				359	if (p && !same_thread_group(p, current))
				360	p = NULL;
				361	}
				362	} else {
				363	if (pid == 0) {
				364	p = current->group_leader;
				365	} else {
				366	p = find_task_by_vpid(pid);
				367	if (p && !has_group_leader_pid(p))
				368	p = NULL;
				369	}
				370	}
				371	new_timer->it.cpu.task = p;
				372	if (p) {
				373	get_task_struct(p);
				374	} else {
				375	ret = -EINVAL;
				376	}
				377	rcu_read_unlock();
				378
				379	return ret;
				380	}
				381
				382	/*
				383	* Clean up a CPU-clock timer that is about to be destroyed.
				384	* This is called from timer deletion with the timer already locked.
				385	* If we return TIMER_RETRY, it's necessary to release the timer's lock
				386	* and try again. (This happens when the timer is in the middle of firing.)
				387	*/
				388	static int posix_cpu_timer_del(struct k_itimer *timer)
				389	{
				390	int ret = 0;
				391	unsigned long flags;
				392	struct sighand_struct *sighand;
				393	struct task_struct *p = timer->it.cpu.task;
				394
				395	WARN_ON_ONCE(p == NULL);
				396
				397	/*
				398	* Protect against sighand release/switch in exit/exec and process/
				399	* thread timer list entry concurrent read/writes.
				400	*/
				401	sighand = lock_task_sighand(p, &flags);
				402	if (unlikely(sighand == NULL)) {
				403	/*
				404	* We raced with the reaping of the task.
				405	* The deletion should have cleared us off the list.
				406	*/
				407	WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
				408	} else {
				409	if (timer->it.cpu.firing)
				410	ret = TIMER_RETRY;
				411	else
				412	list_del(&timer->it.cpu.entry);
				413
				414	unlock_task_sighand(p, &flags);
				415	}
				416
				417	if (!ret)
				418	put_task_struct(p);
				419
				420	return ret;
				421	}
				422
				423	static void cleanup_timers_list(struct list_head *head)
				424	{
				425	struct cpu_timer_list timer, next;
				426
				427	list_for_each_entry_safe(timer, next, head, entry)
				428	list_del_init(&timer->entry);
				429	}
				430
				431	/*
				432	* Clean out CPU timers still ticking when a thread exited. The task
				433	* pointer is cleared, and the expiry time is replaced with the residual
				434	* time for later timer_gettime calls to return.
				435	* This must be called with the siglock held.
				436	*/
				437	static void cleanup_timers(struct list_head *head)
				438	{
				439	cleanup_timers_list(head);
				440	cleanup_timers_list(++head);
				441	cleanup_timers_list(++head);
				442	}
				443
				444	/*
				445	* These are both called with the siglock held, when the current thread
				446	* is being reaped. When the final (leader) thread in the group is reaped,
				447	* posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
				448	*/
				449	void posix_cpu_timers_exit(struct task_struct *tsk)
				450	{
				451	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
				452	sizeof(unsigned long long));
				453	cleanup_timers(tsk->cpu_timers);
				454
				455	}
				456	void posix_cpu_timers_exit_group(struct task_struct *tsk)
				457	{
				458	cleanup_timers(tsk->signal->cpu_timers);
				459	}
				460
				461	static inline int expires_gt(cputime_t expires, cputime_t new_exp)
				462	{
				463	return expires == 0 \|\| expires > new_exp;
				464	}
				465
				466	/*
				467	* Insert the timer on the appropriate list before any timers that
				468	* expire later. This must be called with the sighand lock held.
				469	*/
				470	static void arm_timer(struct k_itimer *timer)
				471	{
				472	struct task_struct *p = timer->it.cpu.task;
				473	struct list_head head, listpos;
				474	struct task_cputime *cputime_expires;
				475	struct cpu_timer_list *const nt = &timer->it.cpu;
				476	struct cpu_timer_list *next;
				477
				478	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
				479	head = p->cpu_timers;
				480	cputime_expires = &p->cputime_expires;
				481	} else {
				482	head = p->signal->cpu_timers;
				483	cputime_expires = &p->signal->cputime_expires;
				484	}
				485	head += CPUCLOCK_WHICH(timer->it_clock);
				486
				487	listpos = head;
				488	list_for_each_entry(next, head, entry) {
				489	if (nt->expires < next->expires)
				490	break;
				491	listpos = &next->entry;
				492	}
				493	list_add(&nt->entry, listpos);
				494
				495	if (listpos == head) {
				496	unsigned long long exp = nt->expires;
				497
				498	/*
				499	* We are the new earliest-expiring POSIX 1.b timer, hence
				500	* need to update expiration cache. Take into account that
				501	* for process timers we share expiration cache with itimers
				502	* and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
				503	*/
				504
				505	switch (CPUCLOCK_WHICH(timer->it_clock)) {
				506	case CPUCLOCK_PROF:
				507	if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
				508	cputime_expires->prof_exp = expires_to_cputime(exp);
				509	break;
				510	case CPUCLOCK_VIRT:
				511	if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
				512	cputime_expires->virt_exp = expires_to_cputime(exp);
				513	break;
				514	case CPUCLOCK_SCHED:
				515	if (cputime_expires->sched_exp == 0 \|\|
				516	cputime_expires->sched_exp > exp)
				517	cputime_expires->sched_exp = exp;
				518	break;
				519	}
				520	}
				521	}
				522
				523	/*
				524	* The timer is locked, fire it and arrange for its reload.
				525	*/
				526	static void cpu_timer_fire(struct k_itimer *timer)
				527	{
				528	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
				529	/*
				530	* User don't want any signal.
				531	*/
				532	timer->it.cpu.expires = 0;
				533	} else if (unlikely(timer->sigq == NULL)) {
				534	/*
				535	* This a special case for clock_nanosleep,
				536	* not a normal timer from sys_timer_create.
				537	*/
				538	wake_up_process(timer->it_process);
				539	timer->it.cpu.expires = 0;
				540	} else if (timer->it.cpu.incr == 0) {
				541	/*
				542	* One-shot timer. Clear it as soon as it's fired.
				543	*/
				544	posix_timer_event(timer, 0);
				545	timer->it.cpu.expires = 0;
				546	} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
				547	/*
				548	* The signal did not get queued because the signal
				549	* was ignored, so we won't get any callback to
				550	* reload the timer. But we need to keep it
				551	* ticking in case the signal is deliverable next time.
				552	*/
				553	posix_cpu_timer_schedule(timer);
				554	}
				555	}
				556
				557	/*
				558	* Sample a process (thread group) timer for the given group_leader task.
				559	* Must be called with task sighand lock held for safe while_each_thread()
				560	* traversal.
				561	*/
				562	static int cpu_timer_sample_group(const clockid_t which_clock,
				563	struct task_struct *p,
				564	unsigned long long *sample)
				565	{
				566	struct task_cputime cputime;
				567
				568	thread_group_cputimer(p, &cputime);
				569	switch (CPUCLOCK_WHICH(which_clock)) {
				570	default:
				571	return -EINVAL;
				572	case CPUCLOCK_PROF:
				573	*sample = cputime_to_expires(cputime.utime + cputime.stime);
				574	break;
				575	case CPUCLOCK_VIRT:
				576	*sample = cputime_to_expires(cputime.utime);
				577	break;
				578	case CPUCLOCK_SCHED:
				579	*sample = cputime.sum_exec_runtime;
				580	break;
				581	}
				582	return 0;
				583	}
				584
				585	#ifdef CONFIG_NO_HZ_FULL
				586	static void nohz_kick_work_fn(struct work_struct *work)
				587	{
				588	tick_nohz_full_kick_all();
				589	}
				590
				591	static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
				592
				593	/*
				594	* We need the IPIs to be sent from sane process context.
				595	* The posix cpu timers are always set with irqs disabled.
				596	*/
				597	static void posix_cpu_timer_kick_nohz(void)
				598	{
				599	if (context_tracking_is_enabled())
				600	schedule_work(&nohz_kick_work);
				601	}
				602
				603	bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
				604	{
				605	if (!task_cputime_zero(&tsk->cputime_expires))
				606	return false;
				607
				608	/* Check if cputimer is running. This is accessed without locking. */
				609	if (READ_ONCE(tsk->signal->cputimer.running))
				610	return false;
				611
				612	return true;
				613	}
				614	#else
				615	static inline void posix_cpu_timer_kick_nohz(void) { }
				616	#endif
				617
				618	/*
				619	* Guts of sys_timer_settime for CPU timers.
				620	* This is called with the timer locked and interrupts disabled.
				621	* If we return TIMER_RETRY, it's necessary to release the timer's lock
				622	* and try again. (This happens when the timer is in the middle of firing.)
				623	*/
				624	static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
				625	struct itimerspec new, struct itimerspec old)
				626	{
				627	unsigned long flags;
				628	struct sighand_struct *sighand;
				629	struct task_struct *p = timer->it.cpu.task;
				630	unsigned long long old_expires, new_expires, old_incr, val;
				631	int ret;
				632
				633	WARN_ON_ONCE(p == NULL);
				634
				635	new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
				636
				637	/*
				638	* Protect against sighand release/switch in exit/exec and p->cpu_timers
				639	* and p->signal->cpu_timers read/write in arm_timer()
				640	*/
				641	sighand = lock_task_sighand(p, &flags);
				642	/*
				643	* If p has just been reaped, we can no
				644	* longer get any information about it at all.
				645	*/
				646	if (unlikely(sighand == NULL)) {
				647	return -ESRCH;
				648	}
				649
				650	/*
				651	* Disarm any old timer after extracting its expiry time.
				652	*/
				653	WARN_ON_ONCE(!irqs_disabled());
				654
				655	ret = 0;
				656	old_incr = timer->it.cpu.incr;
				657	old_expires = timer->it.cpu.expires;
				658	if (unlikely(timer->it.cpu.firing)) {
				659	timer->it.cpu.firing = -1;
				660	ret = TIMER_RETRY;
				661	} else
				662	list_del_init(&timer->it.cpu.entry);
				663
				664	/*
				665	* We need to sample the current value to convert the new
				666	* value from to relative and absolute, and to convert the
				667	* old value from absolute to relative. To set a process
				668	* timer, we need a sample to balance the thread expiry
				669	* times (in arm_timer). With an absolute time, we must
				670	* check if it's already passed. In short, we need a sample.
				671	*/
				672	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
				673	cpu_clock_sample(timer->it_clock, p, &val);
				674	} else {
				675	cpu_timer_sample_group(timer->it_clock, p, &val);
				676	}
				677
				678	if (old) {
				679	if (old_expires == 0) {
				680	old->it_value.tv_sec = 0;
				681	old->it_value.tv_nsec = 0;
				682	} else {
				683	/*
				684	* Update the timer in case it has
				685	* overrun already. If it has,
				686	* we'll report it as having overrun
				687	* and with the next reloaded timer
				688	* already ticking, though we are
				689	* swallowing that pending
				690	* notification here to install the
				691	* new setting.
				692	*/
				693	bump_cpu_timer(timer, val);
				694	if (val < timer->it.cpu.expires) {
				695	old_expires = timer->it.cpu.expires - val;
				696	sample_to_timespec(timer->it_clock,
				697	old_expires,
				698	&old->it_value);
				699	} else {
				700	old->it_value.tv_nsec = 1;
				701	old->it_value.tv_sec = 0;
				702	}
				703	}
				704	}
				705
				706	if (unlikely(ret)) {
				707	/*
				708	* We are colliding with the timer actually firing.
				709	* Punt after filling in the timer's old value, and
				710	* disable this firing since we are already reporting
				711	* it as an overrun (thanks to bump_cpu_timer above).
				712	*/
				713	unlock_task_sighand(p, &flags);
				714	goto out;
				715	}
				716
				717	if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
				718	new_expires += val;
				719	}
				720
				721	/*
				722	* Install the new expiry time (or zero).
				723	* For a timer with no notification action, we don't actually
				724	* arm the timer (we'll just fake it for timer_gettime).
				725	*/
				726	timer->it.cpu.expires = new_expires;
				727	if (new_expires != 0 && val < new_expires) {
				728	arm_timer(timer);
				729	}
				730
				731	unlock_task_sighand(p, &flags);
				732	/*
				733	* Install the new reload setting, and
				734	* set up the signal and overrun bookkeeping.
				735	*/
				736	timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
				737	&new->it_interval);
				738
				739	/*
				740	* This acts as a modification timestamp for the timer,
				741	* so any automatic reload attempt will punt on seeing
				742	* that we have reset the timer manually.
				743	*/
				744	timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
				745	~REQUEUE_PENDING;
				746	timer->it_overrun_last = 0;
				747	timer->it_overrun = -1;
				748
				749	if (new_expires != 0 && !(val < new_expires)) {
				750	/*
				751	* The designated time already passed, so we notify
				752	* immediately, even if the thread never runs to
				753	* accumulate more time on this clock.
				754	*/
				755	cpu_timer_fire(timer);
				756	}
				757
				758	ret = 0;
				759	out:
				760	if (old) {
				761	sample_to_timespec(timer->it_clock,
				762	old_incr, &old->it_interval);
				763	}
				764	if (!ret)
				765	posix_cpu_timer_kick_nohz();
				766	return ret;
				767	}
				768
				769	static void posix_cpu_timer_get(struct k_itimer timer, struct itimerspec itp)
				770	{
				771	unsigned long long now;
				772	struct task_struct *p = timer->it.cpu.task;
				773
				774	WARN_ON_ONCE(p == NULL);
				775
				776	/*
				777	* Easy part: convert the reload time.
				778	*/
				779	sample_to_timespec(timer->it_clock,
				780	timer->it.cpu.incr, &itp->it_interval);
				781
				782	if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */
				783	itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
				784	return;
				785	}
				786
				787	/*
				788	* Sample the clock to take the difference with the expiry time.
				789	*/
				790	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
				791	cpu_clock_sample(timer->it_clock, p, &now);
				792	} else {
				793	struct sighand_struct *sighand;
				794	unsigned long flags;
				795
				796	/*
				797	* Protect against sighand release/switch in exit/exec and
				798	* also make timer sampling safe if it ends up calling
				799	* thread_group_cputime().
				800	*/
				801	sighand = lock_task_sighand(p, &flags);
				802	if (unlikely(sighand == NULL)) {
				803	/*
				804	* The process has been reaped.
				805	* We can't even collect a sample any more.
				806	* Call the timer disarmed, nothing else to do.
				807	*/
				808	timer->it.cpu.expires = 0;
				809	sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
				810	&itp->it_value);
				811	return;
				812	} else {
				813	cpu_timer_sample_group(timer->it_clock, p, &now);
				814	unlock_task_sighand(p, &flags);
				815	}
				816	}
				817
				818	if (now < timer->it.cpu.expires) {
				819	sample_to_timespec(timer->it_clock,
				820	timer->it.cpu.expires - now,
				821	&itp->it_value);
				822	} else {
				823	/*
				824	* The timer should have expired already, but the firing
				825	* hasn't taken place yet. Say it's just about to expire.
				826	*/
				827	itp->it_value.tv_nsec = 1;
				828	itp->it_value.tv_sec = 0;
				829	}
				830	}
				831
				832	static unsigned long long
				833	check_timers_list(struct list_head *timers,
				834	struct list_head *firing,
				835	unsigned long long curr)
				836	{
				837	int maxfire = 20;
				838
				839	while (!list_empty(timers)) {
				840	struct cpu_timer_list *t;
				841
				842	t = list_first_entry(timers, struct cpu_timer_list, entry);
				843
				844	if (!--maxfire \|\| curr < t->expires)
				845	return t->expires;
				846
				847	t->firing = 1;
				848	list_move_tail(&t->entry, firing);
				849	}
				850
				851	return 0;
				852	}
				853
				854	/*
				855	* Check for any per-thread CPU timers that have fired and move them off
				856	* the tsk->cpu_timers[N] list onto the firing list. Here we update the
				857	* tsk->it_*_expires values to reflect the remaining thread CPU timers.
				858	*/
				859	static void check_thread_timers(struct task_struct *tsk,
				860	struct list_head *firing)
				861	{
				862	struct list_head *timers = tsk->cpu_timers;
				863	struct signal_struct *const sig = tsk->signal;
				864	struct task_cputime *tsk_expires = &tsk->cputime_expires;
				865	unsigned long long expires;
				866	unsigned long soft;
				867
				868	/*
				869	* If cputime_expires is zero, then there are no active
				870	* per thread CPU timers.
				871	*/
				872	if (task_cputime_zero(&tsk->cputime_expires))
				873	return;
				874
				875	expires = check_timers_list(timers, firing, prof_ticks(tsk));
				876	tsk_expires->prof_exp = expires_to_cputime(expires);
				877
				878	expires = check_timers_list(++timers, firing, virt_ticks(tsk));
				879	tsk_expires->virt_exp = expires_to_cputime(expires);
				880
				881	tsk_expires->sched_exp = check_timers_list(++timers, firing,
				882	tsk->se.sum_exec_runtime);
				883
				884	/*
				885	* Check for the special case thread timers.
				886	*/
				887	soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
				888	if (soft != RLIM_INFINITY) {
				889	unsigned long hard =
				890	READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
				891
				892	if (hard != RLIM_INFINITY &&
				893	tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
				894	/*
				895	* At the hard limit, we just die.
				896	* No need to calculate anything else now.
				897	*/
				898	__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
				899	return;
				900	}
				901	if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
				902	/*
				903	* At the soft limit, send a SIGXCPU every second.
				904	*/
				905	if (soft < hard) {
				906	soft += USEC_PER_SEC;
				907	sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
				908	}
				909	printk(KERN_INFO
				910	"RT Watchdog Timeout: %s[%d]\n",
				911	tsk->comm, task_pid_nr(tsk));
				912	__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
				913	}
				914	}
				915	}
				916
				917	static inline void stop_process_timers(struct signal_struct *sig)
				918	{
				919	struct thread_group_cputimer *cputimer = &sig->cputimer;
				920
				921	/* Turn off cputimer->running. This is done without locking. */
				922	WRITE_ONCE(cputimer->running, false);
				923	}
				924
				925	static u32 onecputick;
				926
				927	static void check_cpu_itimer(struct task_struct tsk, struct cpu_itimer it,
				928	unsigned long long *expires,
				929	unsigned long long cur_time, int signo)
				930	{
				931	if (!it->expires)
				932	return;
				933
				934	if (cur_time >= it->expires) {
				935	if (it->incr) {
				936	it->expires += it->incr;
				937	it->error += it->incr_error;
				938	if (it->error >= onecputick) {
				939	it->expires -= cputime_one_jiffy;
				940	it->error -= onecputick;
				941	}
				942	} else {
				943	it->expires = 0;
				944	}
				945
				946	trace_itimer_expire(signo == SIGPROF ?
				947	ITIMER_PROF : ITIMER_VIRTUAL,
				948	tsk->signal->leader_pid, cur_time);
				949	__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
				950	}
				951
				952	if (it->expires && (!expires \|\| it->expires < expires)) {
				953	*expires = it->expires;
				954	}
				955	}
				956
				957	/*
				958	* Check for any per-thread CPU timers that have fired and move them
				959	* off the tsk->*_timers list onto the firing list. Per-thread timers
				960	* have already been taken off.
				961	*/
				962	static void check_process_timers(struct task_struct *tsk,
				963	struct list_head *firing)
				964	{
				965	struct signal_struct *const sig = tsk->signal;
				966	unsigned long long utime, ptime, virt_expires, prof_expires;
				967	unsigned long long sum_sched_runtime, sched_expires;
				968	struct list_head *timers = sig->cpu_timers;
				969	struct task_cputime cputime;
				970	unsigned long soft;
				971
				972	/*
				973	* If cputimer is not running, then there are no active
				974	* process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
				975	*/
				976	if (!READ_ONCE(tsk->signal->cputimer.running))
				977	return;
				978
				979	/*
				980	* Signify that a thread is checking for process timers.
				981	* Write access to this field is protected by the sighand lock.
				982	*/
				983	sig->cputimer.checking_timer = true;
				984
				985	/*
				986	* Collect the current process totals.
				987	*/
				988	thread_group_cputimer(tsk, &cputime);
				989	utime = cputime_to_expires(cputime.utime);
				990	ptime = utime + cputime_to_expires(cputime.stime);
				991	sum_sched_runtime = cputime.sum_exec_runtime;
				992
				993	prof_expires = check_timers_list(timers, firing, ptime);
				994	virt_expires = check_timers_list(++timers, firing, utime);
				995	sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
				996
				997	/*
				998	* Check for the special case process timers.
				999	*/
				1000	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
				1001	SIGPROF);
				1002	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
				1003	SIGVTALRM);
				1004	soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
				1005	if (soft != RLIM_INFINITY) {
				1006	unsigned long psecs = cputime_to_secs(ptime);
				1007	unsigned long hard =
				1008	READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
				1009	cputime_t x;
				1010	if (psecs >= hard) {
				1011	/*
				1012	* At the hard limit, we just die.
				1013	* No need to calculate anything else now.
				1014	*/
				1015	__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
				1016	return;
				1017	}
				1018	if (psecs >= soft) {
				1019	/*
				1020	* At the soft limit, send a SIGXCPU every second.
				1021	*/
				1022	__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
				1023	if (soft < hard) {
				1024	soft++;
				1025	sig->rlim[RLIMIT_CPU].rlim_cur = soft;
				1026	}
				1027	}
				1028	x = secs_to_cputime(soft);
				1029	if (!prof_expires \|\| x < prof_expires) {
				1030	prof_expires = x;
				1031	}
				1032	}
				1033
				1034	sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
				1035	sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
				1036	sig->cputime_expires.sched_exp = sched_expires;
				1037	if (task_cputime_zero(&sig->cputime_expires))
				1038	stop_process_timers(sig);
				1039
				1040	sig->cputimer.checking_timer = false;
				1041	}
				1042
				1043	/*
				1044	* This is called from the signal code (via do_schedule_next_timer)
				1045	* when the last timer signal was delivered and we have to reload the timer.
				1046	*/
				1047	void posix_cpu_timer_schedule(struct k_itimer *timer)
				1048	{
				1049	struct sighand_struct *sighand;
				1050	unsigned long flags;
				1051	struct task_struct *p = timer->it.cpu.task;
				1052	unsigned long long now;
				1053
				1054	WARN_ON_ONCE(p == NULL);
				1055
				1056	/*
				1057	* Fetch the current sample and update the timer's expiry time.
				1058	*/
				1059	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
				1060	cpu_clock_sample(timer->it_clock, p, &now);
				1061	bump_cpu_timer(timer, now);
				1062	if (unlikely(p->exit_state))
				1063	goto out;
				1064
				1065	/* Protect timer list r/w in arm_timer() */
				1066	sighand = lock_task_sighand(p, &flags);
				1067	if (!sighand)
				1068	goto out;
				1069	} else {
				1070	/*
				1071	* Protect arm_timer() and timer sampling in case of call to
				1072	* thread_group_cputime().
				1073	*/
				1074	sighand = lock_task_sighand(p, &flags);
				1075	if (unlikely(sighand == NULL)) {
				1076	/*
				1077	* The process has been reaped.
				1078	* We can't even collect a sample any more.
				1079	*/
				1080	timer->it.cpu.expires = 0;
				1081	goto out;
				1082	} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
				1083	unlock_task_sighand(p, &flags);
				1084	/* Optimizations: if the process is dying, no need to rearm */
				1085	goto out;
				1086	}
				1087	cpu_timer_sample_group(timer->it_clock, p, &now);
				1088	bump_cpu_timer(timer, now);
				1089	/* Leave the sighand locked for the call below. */
				1090	}
				1091
				1092	/*
				1093	* Now re-arm for the new expiry time.
				1094	*/
				1095	WARN_ON_ONCE(!irqs_disabled());
				1096	arm_timer(timer);
				1097	unlock_task_sighand(p, &flags);
				1098
				1099	/* Kick full dynticks CPUs in case they need to tick on the new timer */
				1100	posix_cpu_timer_kick_nohz();
				1101	out:
				1102	timer->it_overrun_last = timer->it_overrun;
				1103	timer->it_overrun = -1;
				1104	++timer->it_requeue_pending;
				1105	}
				1106
				1107	/**
				1108	* task_cputime_expired - Compare two task_cputime entities.
				1109	*
				1110	* @sample: The task_cputime structure to be checked for expiration.
				1111	* @expires: Expiration times, against which @sample will be checked.
				1112	*
				1113	* Checks @sample against @expires to see if any field of @sample has expired.
				1114	* Returns true if any field of the former is greater than the corresponding
				1115	* field of the latter if the latter field is set. Otherwise returns false.
				1116	*/
				1117	static inline int task_cputime_expired(const struct task_cputime *sample,
				1118	const struct task_cputime *expires)
				1119	{
				1120	if (expires->utime && sample->utime >= expires->utime)
				1121	return 1;
				1122	if (expires->stime && sample->utime + sample->stime >= expires->stime)
				1123	return 1;
				1124	if (expires->sum_exec_runtime != 0 &&
				1125	sample->sum_exec_runtime >= expires->sum_exec_runtime)
				1126	return 1;
				1127	return 0;
				1128	}
				1129
				1130	/**
				1131	* fastpath_timer_check - POSIX CPU timers fast path.
				1132	*
				1133	* @tsk: The task (thread) being checked.
				1134	*
				1135	* Check the task and thread group timers. If both are zero (there are no
				1136	* timers set) return false. Otherwise snapshot the task and thread group
				1137	* timers and compare them with the corresponding expiration times. Return
				1138	* true if a timer has expired, else return false.
				1139	*/
				1140	static inline int fastpath_timer_check(struct task_struct *tsk)
				1141	{
				1142	struct signal_struct *sig;
				1143
				1144	if (!task_cputime_zero(&tsk->cputime_expires)) {
				1145	struct task_cputime task_sample;
				1146
				1147	task_cputime(tsk, &task_sample.utime, &task_sample.stime);
				1148	task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
				1149	if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
				1150	return 1;
				1151	}
				1152
				1153	sig = tsk->signal;
				1154	/*
				1155	* Check if thread group timers expired when the cputimer is
				1156	* running and no other thread in the group is already checking
				1157	* for thread group cputimers. These fields are read without the
				1158	* sighand lock. However, this is fine because this is meant to
				1159	* be a fastpath heuristic to determine whether we should try to
				1160	* acquire the sighand lock to check/handle timers.
				1161	*
				1162	* In the worst case scenario, if 'running' or 'checking_timer' gets
				1163	* set but the current thread doesn't see the change yet, we'll wait
				1164	* until the next thread in the group gets a scheduler interrupt to
				1165	* handle the timer. This isn't an issue in practice because these
				1166	* types of delays with signals actually getting sent are expected.
				1167	*/
				1168	if (READ_ONCE(sig->cputimer.running) &&
				1169	!READ_ONCE(sig->cputimer.checking_timer)) {
				1170	struct task_cputime group_sample;
				1171
				1172	sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
				1173
				1174	if (task_cputime_expired(&group_sample, &sig->cputime_expires))
				1175	return 1;
				1176	}
				1177
				1178	return 0;
				1179	}
				1180
				1181	/*
				1182	* This is called from the timer interrupt handler. The irq handler has
				1183	* already updated our counts. We need to check if any timers fire now.
				1184	* Interrupts are disabled.
				1185	*/
				1186	void run_posix_cpu_timers(struct task_struct *tsk)
				1187	{
				1188	LIST_HEAD(firing);
				1189	struct k_itimer timer, next;
				1190	unsigned long flags;
				1191
				1192	WARN_ON_ONCE(!irqs_disabled());
				1193
				1194	/*
				1195	* The fast path checks that there are no expired thread or thread
				1196	* group timers. If that's so, just return.
				1197	*/
				1198	if (!fastpath_timer_check(tsk))
				1199	return;
				1200
				1201	if (!lock_task_sighand(tsk, &flags))
				1202	return;
				1203	/*
				1204	* Here we take off tsk->signal->cpu_timers[N] and
				1205	* tsk->cpu_timers[N] all the timers that are firing, and
				1206	* put them on the firing list.
				1207	*/
				1208	check_thread_timers(tsk, &firing);
				1209
				1210	check_process_timers(tsk, &firing);
				1211
				1212	/*
				1213	* We must release these locks before taking any timer's lock.
				1214	* There is a potential race with timer deletion here, as the
				1215	* siglock now protects our private firing list. We have set
				1216	* the firing flag in each timer, so that a deletion attempt
				1217	* that gets the timer lock before we do will give it up and
				1218	* spin until we've taken care of that timer below.
				1219	*/
				1220	unlock_task_sighand(tsk, &flags);
				1221
				1222	/*
				1223	* Now that all the timers on our list have the firing flag,
				1224	* no one will touch their list entries but us. We'll take
				1225	* each timer's lock before clearing its firing flag, so no
				1226	* timer call will interfere.
				1227	*/
				1228	list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
				1229	int cpu_firing;
				1230
				1231	spin_lock(&timer->it_lock);
				1232	list_del_init(&timer->it.cpu.entry);
				1233	cpu_firing = timer->it.cpu.firing;
				1234	timer->it.cpu.firing = 0;
				1235	/*
				1236	* The firing flag is -1 if we collided with a reset
				1237	* of the timer, which already reported this
				1238	* almost-firing as an overrun. So don't generate an event.
				1239	*/
				1240	if (likely(cpu_firing >= 0))
				1241	cpu_timer_fire(timer);
				1242	spin_unlock(&timer->it_lock);
				1243	}
				1244	}
				1245
				1246	/*
				1247	* Set one of the process-wide special case CPU timers or RLIMIT_CPU.
				1248	* The tsk->sighand->siglock must be held by the caller.
				1249	*/
				1250	void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
				1251	cputime_t newval, cputime_t oldval)
				1252	{
				1253	unsigned long long now;
				1254
				1255	WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
				1256	cpu_timer_sample_group(clock_idx, tsk, &now);
				1257
				1258	if (oldval) {
				1259	/*
				1260	* We are setting itimer. The *oldval is absolute and we update
				1261	* it to be relative, *newval argument is relative and we update
				1262	* it to be absolute.
				1263	*/
				1264	if (*oldval) {
				1265	if (*oldval <= now) {
				1266	/* Just about to fire. */
				1267	*oldval = cputime_one_jiffy;
				1268	} else {
				1269	*oldval -= now;
				1270	}
				1271	}
				1272
				1273	if (!*newval)
				1274	goto out;
				1275	*newval += now;
				1276	}
				1277
				1278	/*
				1279	* Update expiration cache if we are the earliest timer, or eventually
				1280	* RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
				1281	*/
				1282	switch (clock_idx) {
				1283	case CPUCLOCK_PROF:
				1284	if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
				1285	tsk->signal->cputime_expires.prof_exp = *newval;
				1286	break;
				1287	case CPUCLOCK_VIRT:
				1288	if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
				1289	tsk->signal->cputime_expires.virt_exp = *newval;
				1290	break;
				1291	}
				1292	out:
				1293	posix_cpu_timer_kick_nohz();
				1294	}
				1295
				1296	static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
				1297	struct timespec rqtp, struct itimerspec it)
				1298	{
				1299	struct k_itimer timer;
				1300	int error;
				1301
				1302	/*
				1303	* Set up a temporary timer and then wait for it to go off.
				1304	*/
				1305	memset(&timer, 0, sizeof timer);
				1306	spin_lock_init(&timer.it_lock);
				1307	timer.it_clock = which_clock;
				1308	timer.it_overrun = -1;
				1309	error = posix_cpu_timer_create(&timer);
				1310	timer.it_process = current;
				1311	if (!error) {
				1312	static struct itimerspec zero_it;
				1313
				1314	memset(it, 0, sizeof *it);
				1315	it->it_value = *rqtp;
				1316
				1317	spin_lock_irq(&timer.it_lock);
				1318	error = posix_cpu_timer_set(&timer, flags, it, NULL);
				1319	if (error) {
				1320	spin_unlock_irq(&timer.it_lock);
				1321	return error;
				1322	}
				1323
				1324	while (!signal_pending(current)) {
				1325	if (timer.it.cpu.expires == 0) {
				1326	/*
				1327	* Our timer fired and was reset, below
				1328	* deletion can not fail.
				1329	*/
				1330	posix_cpu_timer_del(&timer);
				1331	spin_unlock_irq(&timer.it_lock);
				1332	return 0;
				1333	}
				1334
				1335	/*
				1336	* Block until cpu_timer_fire (or a signal) wakes us.
				1337	*/
				1338	__set_current_state(TASK_INTERRUPTIBLE);
				1339	spin_unlock_irq(&timer.it_lock);
				1340	schedule();
				1341	spin_lock_irq(&timer.it_lock);
				1342	}
				1343
				1344	/*
				1345	* We were interrupted by a signal.
				1346	*/
				1347	sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
				1348	error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
				1349	if (!error) {
				1350	/*
				1351	* Timer is now unarmed, deletion can not fail.
				1352	*/
				1353	posix_cpu_timer_del(&timer);
				1354	}
				1355	spin_unlock_irq(&timer.it_lock);
				1356
				1357	while (error == TIMER_RETRY) {
				1358	/*
				1359	* We need to handle case when timer was or is in the
				1360	* middle of firing. In other cases we already freed
				1361	* resources.
				1362	*/
				1363	spin_lock_irq(&timer.it_lock);
				1364	error = posix_cpu_timer_del(&timer);
				1365	spin_unlock_irq(&timer.it_lock);
				1366	}
				1367
				1368	if ((it->it_value.tv_sec \| it->it_value.tv_nsec) == 0) {
				1369	/*
				1370	* It actually did fire already.
				1371	*/
				1372	return 0;
				1373	}
				1374
				1375	error = -ERESTART_RESTARTBLOCK;
				1376	}
				1377
				1378	return error;
				1379	}
				1380
				1381	static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
				1382
				1383	static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
				1384	struct timespec rqtp, struct timespec __user rmtp)
				1385	{
				1386	struct restart_block *restart_block = &current->restart_block;
				1387	struct itimerspec it;
				1388	int error;
				1389
				1390	/*
				1391	* Diagnose required errors first.
				1392	*/
				1393	if (CPUCLOCK_PERTHREAD(which_clock) &&
				1394	(CPUCLOCK_PID(which_clock) == 0 \|\|
				1395	CPUCLOCK_PID(which_clock) == current->pid))
				1396	return -EINVAL;
				1397
				1398	error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
				1399
				1400	if (error == -ERESTART_RESTARTBLOCK) {
				1401
				1402	if (flags & TIMER_ABSTIME)
				1403	return -ERESTARTNOHAND;
				1404	/*
				1405	* Report back to the user the time still remaining.
				1406	*/
				1407	if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
				1408	return -EFAULT;
				1409
				1410	restart_block->fn = posix_cpu_nsleep_restart;
				1411	restart_block->nanosleep.clockid = which_clock;
				1412	restart_block->nanosleep.rmtp = rmtp;
				1413	restart_block->nanosleep.expires = timespec_to_ns(rqtp);
				1414	}
				1415	return error;
				1416	}
				1417
				1418	static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
				1419	{
				1420	clockid_t which_clock = restart_block->nanosleep.clockid;
				1421	struct timespec t;
				1422	struct itimerspec it;
				1423	int error;
				1424
				1425	t = ns_to_timespec(restart_block->nanosleep.expires);
				1426
				1427	error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
				1428
				1429	if (error == -ERESTART_RESTARTBLOCK) {
				1430	struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
				1431	/*
				1432	* Report back to the user the time still remaining.
				1433	*/
				1434	if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
				1435	return -EFAULT;
				1436
				1437	restart_block->nanosleep.expires = timespec_to_ns(&t);
				1438	}
				1439	return error;
				1440
				1441	}
				1442
				1443	#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
				1444	#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
				1445
				1446	static int process_cpu_clock_getres(const clockid_t which_clock,
				1447	struct timespec *tp)
				1448	{
				1449	return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
				1450	}
				1451	static int process_cpu_clock_get(const clockid_t which_clock,
				1452	struct timespec *tp)
				1453	{
				1454	return posix_cpu_clock_get(PROCESS_CLOCK, tp);
				1455	}
				1456	static int process_cpu_timer_create(struct k_itimer *timer)
				1457	{
				1458	timer->it_clock = PROCESS_CLOCK;
				1459	return posix_cpu_timer_create(timer);
				1460	}
				1461	static int process_cpu_nsleep(const clockid_t which_clock, int flags,
				1462	struct timespec *rqtp,
				1463	struct timespec __user *rmtp)
				1464	{
				1465	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
				1466	}
				1467	static long process_cpu_nsleep_restart(struct restart_block *restart_block)
				1468	{
				1469	return -EINVAL;
				1470	}
				1471	static int thread_cpu_clock_getres(const clockid_t which_clock,
				1472	struct timespec *tp)
				1473	{
				1474	return posix_cpu_clock_getres(THREAD_CLOCK, tp);
				1475	}
				1476	static int thread_cpu_clock_get(const clockid_t which_clock,
				1477	struct timespec *tp)
				1478	{
				1479	return posix_cpu_clock_get(THREAD_CLOCK, tp);
				1480	}
				1481	static int thread_cpu_timer_create(struct k_itimer *timer)
				1482	{
				1483	timer->it_clock = THREAD_CLOCK;
				1484	return posix_cpu_timer_create(timer);
				1485	}
				1486
				1487	struct k_clock clock_posix_cpu = {
				1488	.clock_getres = posix_cpu_clock_getres,
				1489	.clock_set = posix_cpu_clock_set,
				1490	.clock_get = posix_cpu_clock_get,
				1491	.timer_create = posix_cpu_timer_create,
				1492	.nsleep = posix_cpu_nsleep,
				1493	.nsleep_restart = posix_cpu_nsleep_restart,
				1494	.timer_set = posix_cpu_timer_set,
				1495	.timer_del = posix_cpu_timer_del,
				1496	.timer_get = posix_cpu_timer_get,
				1497	};
				1498
				1499	static __init int init_posix_cpu_timers(void)
				1500	{
				1501	struct k_clock process = {
				1502	.clock_getres = process_cpu_clock_getres,
				1503	.clock_get = process_cpu_clock_get,
				1504	.timer_create = process_cpu_timer_create,
				1505	.nsleep = process_cpu_nsleep,
				1506	.nsleep_restart = process_cpu_nsleep_restart,
				1507	};
				1508	struct k_clock thread = {
				1509	.clock_getres = thread_cpu_clock_getres,
				1510	.clock_get = thread_cpu_clock_get,
				1511	.timer_create = thread_cpu_timer_create,
				1512	};
				1513	struct timespec ts;
				1514
				1515	posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
				1516	posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
				1517
				1518	cputime_to_timespec(cputime_one_jiffy, &ts);
				1519	onecputick = ts.tv_nsec;
				1520	WARN_ON(ts.tv_sec != 0);
				1521
				1522	return 0;
				1523	}
				1524	__initcall(init_posix_cpu_timers);