mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 04:33:26 +02:00
When switching from kthreads to vhost_tasks two bugs were added:
1. The vhost worker tasks's now show up as processes so scripts doing
ps or ps a would not incorrectly detect the vhost task as another
process. 2. kthreads disabled freeze by setting PF_NOFREEZE, but
vhost tasks's didn't disable or add support for them.
To fix both bugs, this switches the vhost task to be thread in the
process that does the VHOST_SET_OWNER ioctl, and has vhost_worker call
get_signal to support SIGKILL/SIGSTOP and freeze signals. Note that
SIGKILL/STOP support is required because CLONE_THREAD requires
CLONE_SIGHAND which requires those 2 signals to be supported.
This is a modified version of the patch written by Mike Christie
<michael.christie@oracle.com> which was a modified version of patch
originally written by Linus.
Much of what depended upon PF_IO_WORKER now depends on PF_USER_WORKER.
Including ignoring signals, setting up the register state, and having
get_signal return instead of calling do_group_exit.
Tidied up the vhost_task abstraction so that the definition of
vhost_task only needs to be visible inside of vhost_task.c. Making
it easier to review the code and tell what needs to be done where.
As part of this the main loop has been moved from vhost_worker into
vhost_task_fn. vhost_worker now returns true if work was done.
The main loop has been updated to call get_signal which handles
SIGSTOP, freezing, and collects the message that tells the thread to
exit as part of process exit. This collection clears
__fatal_signal_pending. This collection is not guaranteed to
clear signal_pending() so clear that explicitly so the schedule()
sleeps.
For now the vhost thread continues to exist and run work until the
last file descriptor is closed and the release function is called as
part of freeing struct file. To avoid hangs in the coredump
rendezvous and when killing threads in a multi-threaded exec. The
coredump code and de_thread have been modified to ignore vhost threads.
Remvoing the special case for exec appears to require teaching
vhost_dev_flush how to directly complete transactions in case
the vhost thread is no longer running.
Removing the special case for coredump rendezvous requires either the
above fix needed for exec or moving the coredump rendezvous into
get_signal.
Fixes: 6e890c5d50
("vhost: use vhost_tasks for worker threads")
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Co-developed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
192 lines
5.0 KiB
C
192 lines
5.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_SCHED_TASK_H
|
|
#define _LINUX_SCHED_TASK_H
|
|
|
|
/*
|
|
* Interface between the scheduler and various task lifetime (fork()/exit())
|
|
* functionality:
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
struct task_struct;
|
|
struct rusage;
|
|
union thread_union;
|
|
struct css_set;
|
|
|
|
/* All the bits taken by the old clone syscall. */
|
|
#define CLONE_LEGACY_FLAGS 0xffffffffULL
|
|
|
|
struct kernel_clone_args {
|
|
u64 flags;
|
|
int __user *pidfd;
|
|
int __user *child_tid;
|
|
int __user *parent_tid;
|
|
const char *name;
|
|
int exit_signal;
|
|
u32 kthread:1;
|
|
u32 io_thread:1;
|
|
u32 user_worker:1;
|
|
u32 no_files:1;
|
|
unsigned long stack;
|
|
unsigned long stack_size;
|
|
unsigned long tls;
|
|
pid_t *set_tid;
|
|
/* Number of elements in *set_tid */
|
|
size_t set_tid_size;
|
|
int cgroup;
|
|
int idle;
|
|
int (*fn)(void *);
|
|
void *fn_arg;
|
|
struct cgroup *cgrp;
|
|
struct css_set *cset;
|
|
};
|
|
|
|
/*
|
|
* This serializes "schedule()" and also protects
|
|
* the run-queue from deletions/modifications (but
|
|
* _adding_ to the beginning of the run-queue has
|
|
* a separate lock).
|
|
*/
|
|
extern rwlock_t tasklist_lock;
|
|
extern spinlock_t mmlist_lock;
|
|
|
|
extern union thread_union init_thread_union;
|
|
extern struct task_struct init_task;
|
|
|
|
extern int lockdep_tasklist_lock_is_held(void);
|
|
|
|
extern asmlinkage void schedule_tail(struct task_struct *prev);
|
|
extern void init_idle(struct task_struct *idle, int cpu);
|
|
|
|
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
|
|
extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
|
|
extern void sched_post_fork(struct task_struct *p);
|
|
extern void sched_dead(struct task_struct *p);
|
|
|
|
void __noreturn do_task_dead(void);
|
|
void __noreturn make_task_dead(int signr);
|
|
|
|
extern void mm_cache_init(void);
|
|
extern void proc_caches_init(void);
|
|
|
|
extern void fork_init(void);
|
|
|
|
extern void release_task(struct task_struct * p);
|
|
|
|
extern int copy_thread(struct task_struct *, const struct kernel_clone_args *);
|
|
|
|
extern void flush_thread(void);
|
|
|
|
#ifdef CONFIG_HAVE_EXIT_THREAD
|
|
extern void exit_thread(struct task_struct *tsk);
|
|
#else
|
|
static inline void exit_thread(struct task_struct *tsk)
|
|
{
|
|
}
|
|
#endif
|
|
extern __noreturn void do_group_exit(int);
|
|
|
|
extern void exit_files(struct task_struct *);
|
|
extern void exit_itimers(struct task_struct *);
|
|
|
|
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
|
|
struct task_struct *copy_process(struct pid *pid, int trace, int node,
|
|
struct kernel_clone_args *args);
|
|
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
|
|
struct task_struct *fork_idle(int);
|
|
extern pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
|
|
unsigned long flags);
|
|
extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
|
|
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
|
|
int kernel_wait(pid_t pid, int *stat);
|
|
|
|
extern void free_task(struct task_struct *tsk);
|
|
|
|
/* sched_exec is called by processes performing an exec */
|
|
#ifdef CONFIG_SMP
|
|
extern void sched_exec(void);
|
|
#else
|
|
#define sched_exec() {}
|
|
#endif
|
|
|
|
static inline struct task_struct *get_task_struct(struct task_struct *t)
|
|
{
|
|
refcount_inc(&t->usage);
|
|
return t;
|
|
}
|
|
|
|
extern void __put_task_struct(struct task_struct *t);
|
|
|
|
static inline void put_task_struct(struct task_struct *t)
|
|
{
|
|
if (refcount_dec_and_test(&t->usage))
|
|
__put_task_struct(t);
|
|
}
|
|
|
|
static inline void put_task_struct_many(struct task_struct *t, int nr)
|
|
{
|
|
if (refcount_sub_and_test(nr, &t->usage))
|
|
__put_task_struct(t);
|
|
}
|
|
|
|
void put_task_struct_rcu_user(struct task_struct *task);
|
|
|
|
/* Free all architecture-specific resources held by a thread. */
|
|
void release_thread(struct task_struct *dead_task);
|
|
|
|
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
|
extern int arch_task_struct_size __read_mostly;
|
|
#else
|
|
# define arch_task_struct_size (sizeof(struct task_struct))
|
|
#endif
|
|
|
|
#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST
|
|
/*
|
|
* If an architecture has not declared a thread_struct whitelist we
|
|
* must assume something there may need to be copied to userspace.
|
|
*/
|
|
static inline void arch_thread_struct_whitelist(unsigned long *offset,
|
|
unsigned long *size)
|
|
{
|
|
*offset = 0;
|
|
/* Handle dynamically sized thread_struct. */
|
|
*size = arch_task_struct_size - offsetof(struct task_struct, thread);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
|
|
{
|
|
return t->stack_vm_area;
|
|
}
|
|
#else
|
|
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
|
|
{
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
|
|
* subscriptions and synchronises with wait4(). Also used in procfs. Also
|
|
* pins the final release of task.io_context. Also protects ->cpuset and
|
|
* ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
|
|
*
|
|
* Nests both inside and outside of read_lock(&tasklist_lock).
|
|
* It must not be nested with write_lock_irq(&tasklist_lock),
|
|
* neither inside nor outside.
|
|
*/
|
|
static inline void task_lock(struct task_struct *p)
|
|
{
|
|
spin_lock(&p->alloc_lock);
|
|
}
|
|
|
|
static inline void task_unlock(struct task_struct *p)
|
|
{
|
|
spin_unlock(&p->alloc_lock);
|
|
}
|
|
|
|
#endif /* _LINUX_SCHED_TASK_H */
|