seccomp: Support atomic "addfd + send reply"

Alban Crequy reported a race condition userspace faces when we want to
add some fds and make the syscall return them[1] using seccomp notify.

The problem is that currently two different ioctl() calls are needed by
the process handling the syscalls (agent) for another userspace process
(target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and
SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible
for the agent to do the first ioctl to add a file descriptor but the
target is interrupted (EINTR) before the agent does the second ioctl()
call.

This patch adds a flag to the ADDFD ioctl() so it adds the fd and
returns that value atomically to the target program, as suggested by
Kees Cook[2]. This is done by simply allowing
seccomp_do_user_notification() to add the fd and return it in this case.
Therefore, in this case the target wakes up from the wait in
seccomp_do_user_notification() either to interrupt the syscall or to add
the fd and return it.

This "allocate an fd and return" functionality is useful for syscalls
that return a file descriptor only, like connect(2). Other syscalls that
return a file descriptor but not as return value (or return more than
one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not
work with this flag.

This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and
SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's
return value, nor error can be set by the user. Upon successful invocation
of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND
flag, the notifying process's errno will be 0, and the return value will
be the file descriptor number that was installed.

[1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxta-KQ@mail.gmail.com/
[2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/

Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me
This commit is contained in:
Rodrigo Campos
2021-05-17 12:39:07 -07:00
committed by Kees Cook
parent ddc4739169
commit 0ae71c7720
3 changed files with 58 additions and 6 deletions

View File

@@ -107,6 +107,7 @@ struct seccomp_knotif {
* installing process should allocate the fd as normal.
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
* is allowed.
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
* @ret: The return value of the installing process. It is set to the fd num
* upon success (>= 0).
* @completion: Indicates that the installing process has completed fd
@@ -118,6 +119,7 @@ struct seccomp_kaddfd {
struct file *file;
int fd;
unsigned int flags;
__u32 ioctl_flags;
union {
bool setfd;
@@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
return filter->notif->next_id++;
}
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
{
int fd;
/*
* Remove the notification, and reset the list pointers, indicating
* that it has been handled.
*/
list_del_init(&addfd->list);
if (!addfd->setfd)
addfd->ret = receive_fd(addfd->file, addfd->flags);
fd = receive_fd(addfd->file, addfd->flags);
else
addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
addfd->flags);
fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
addfd->ret = fd;
if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
/* If we fail reset and return an error to the notifier */
if (fd < 0) {
n->state = SECCOMP_NOTIFY_SENT;
} else {
/* Return the FD we just added */
n->flags = 0;
n->error = 0;
n->val = fd;
}
}
/*
* Mark the notification as completed. From this point, addfd mem
* might be invalidated and we can't safely read it anymore.
*/
complete(&addfd->completion);
}
@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
struct seccomp_kaddfd, list);
/* Check if we were woken up by a addfd message */
if (addfd)
seccomp_handle_addfd(addfd);
seccomp_handle_addfd(addfd, &n);
} while (n.state != SECCOMP_NOTIFY_REPLIED);
@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (addfd.newfd_flags & ~O_CLOEXEC)
return -EINVAL;
if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
return -EINVAL;
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
@@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (!kaddfd.file)
return -EBADF;
kaddfd.ioctl_flags = addfd.flags;
kaddfd.flags = addfd.newfd_flags;
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
kaddfd.fd = addfd.newfd;
@@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
goto out_unlock;
}
if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
/*
* Disallow queuing an atomic addfd + send reply while there are
* some addfd requests still to process.
*
* There is no clear reason to support it and allows us to keep
* the loop on the other side straight-forward.
*/
if (!list_empty(&knotif->addfd)) {
ret = -EBUSY;
goto out_unlock;
}
/* Allow exactly only one reply */
knotif->state = SECCOMP_NOTIFY_REPLIED;
}
list_add(&kaddfd.list, &knotif->addfd);
complete(&knotif->ready);
mutex_unlock(&filter->notify_lock);