mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
prlimit: do not grab the tasklist_lock
Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. The tasklist_lock was grabbed to protect tsk->sighand from disappearing (becoming NULL). tsk->signal was already protected by holding a reference to tsk. update_rlimit_cpu() assumed tsk->sighand != NULL. With this commit, it attempts to lock_task_sighand(). However, this means that update_rlimit_cpu() can fail. This only happens when a task is exiting. Note that during exec, sighand may *change*, but it will not be NULL. Prior to this commit, the do_prlimit() ensured that update_rlimit_cpu() would not fail by read locking the tasklist_lock and checking tsk->sighand != NULL. If update_rlimit_cpu() fails, there may be other tasks that are not exiting that share tsk->signal. However, the group_leader is the last task to be released, so if we cannot update_rlimit_cpu(group_leader), then the entire process is exiting. The only other caller of update_rlimit_cpu() is selinux_bprm_committing_creds(). It has tsk == current, so update_rlimit_cpu() cannot fail (current->sighand cannot disappear until current exits). This change resulted in a 14% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden <brho@google.com> Link: https://lkml.kernel.org/r/20220106172041.522167-4-brho@google.com Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
This commit is contained in:
committed by
Eric W. Biederman
parent
c57bef0287
commit
18c91bb2d8
@@ -253,7 +253,7 @@ void posix_cpu_timers_exit_group(struct task_struct *task);
|
|||||||
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
|
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
|
||||||
u64 *newval, u64 *oldval);
|
u64 *newval, u64 *oldval);
|
||||||
|
|
||||||
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
|
int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
|
||||||
|
|
||||||
void posixtimer_rearm(struct kernel_siginfo *info);
|
void posixtimer_rearm(struct kernel_siginfo *info);
|
||||||
#endif
|
#endif
|
||||||
|
25
kernel/sys.c
25
kernel/sys.c
@@ -1441,13 +1441,7 @@ static int do_prlimit(struct task_struct *tsk, unsigned int resource,
|
|||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* protect tsk->signal and tsk->sighand from disappearing */
|
/* Holding a refcount on tsk protects tsk->signal from disappearing. */
|
||||||
read_lock(&tasklist_lock);
|
|
||||||
if (!tsk->sighand) {
|
|
||||||
retval = -ESRCH;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
rlim = tsk->signal->rlim + resource;
|
rlim = tsk->signal->rlim + resource;
|
||||||
task_lock(tsk->group_leader);
|
task_lock(tsk->group_leader);
|
||||||
if (new_rlim) {
|
if (new_rlim) {
|
||||||
@@ -1476,10 +1470,19 @@ static int do_prlimit(struct task_struct *tsk, unsigned int resource,
|
|||||||
*/
|
*/
|
||||||
if (!retval && new_rlim && resource == RLIMIT_CPU &&
|
if (!retval && new_rlim && resource == RLIMIT_CPU &&
|
||||||
new_rlim->rlim_cur != RLIM_INFINITY &&
|
new_rlim->rlim_cur != RLIM_INFINITY &&
|
||||||
IS_ENABLED(CONFIG_POSIX_TIMERS))
|
IS_ENABLED(CONFIG_POSIX_TIMERS)) {
|
||||||
update_rlimit_cpu(tsk, new_rlim->rlim_cur);
|
/*
|
||||||
out:
|
* update_rlimit_cpu can fail if the task is exiting, but there
|
||||||
read_unlock(&tasklist_lock);
|
* may be other tasks in the thread group that are not exiting,
|
||||||
|
* and they need their cpu timers adjusted.
|
||||||
|
*
|
||||||
|
* The group_leader is the last task to be released, so if we
|
||||||
|
* cannot update_rlimit_cpu on it, then the entire process is
|
||||||
|
* exiting and we do not need to update at all.
|
||||||
|
*/
|
||||||
|
update_rlimit_cpu(tsk->group_leader, new_rlim->rlim_cur);
|
||||||
|
}
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -34,14 +34,20 @@ void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
|
|||||||
* tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
|
* tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
|
||||||
* necessary. Needs siglock protection since other code may update the
|
* necessary. Needs siglock protection since other code may update the
|
||||||
* expiration cache as well.
|
* expiration cache as well.
|
||||||
|
*
|
||||||
|
* Returns 0 on success, -ESRCH on failure. Can fail if the task is exiting and
|
||||||
|
* we cannot lock_task_sighand. Cannot fail if task is current.
|
||||||
*/
|
*/
|
||||||
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
|
int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
|
||||||
{
|
{
|
||||||
u64 nsecs = rlim_new * NSEC_PER_SEC;
|
u64 nsecs = rlim_new * NSEC_PER_SEC;
|
||||||
|
unsigned long irq_fl;
|
||||||
|
|
||||||
spin_lock_irq(&task->sighand->siglock);
|
if (!lock_task_sighand(task, &irq_fl))
|
||||||
|
return -ESRCH;
|
||||||
set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
|
set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
|
||||||
spin_unlock_irq(&task->sighand->siglock);
|
unlock_task_sighand(task, &irq_fl);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user