mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 12:43:29 +02:00
Introduce a new mount bind mount property to allow idmapping mounts. The MOUNT_ATTR_IDMAP flag can be set via the new mount_setattr() syscall together with a file descriptor referring to a user namespace. The user namespace referenced by the namespace file descriptor will be attached to the bind mount. All interactions with the filesystem going through that mount will be mapped according to the mapping specified in the user namespace attached to it. Using user namespaces to mark mounts means we can reuse all the existing infrastructure in the kernel that already exists to handle idmappings and can also use this for permission checking to allow unprivileged user to create idmapped mounts in the future. Idmapping a mount is decoupled from the caller's user and mount namespace. This means idmapped mounts can be created in the initial user namespace which is an important use-case for systemd-homed, portable usb-sticks between systems, sharing data between the initial user namespace and unprivileged containers, and other use-cases that have been brought up. For example, assume a home directory where all files are owned by uid and gid 1000 and the home directory is brought to a new laptop where the user has id 12345. The system administrator can simply create a mount of this home directory with a mapping of 1000:12345:1 and other mappings to indicate the ids should be kept. (With this it is e.g. also possible to create idmapped mounts on the host with an identity mapping 1:1:100000 where the root user is not mapped. A user with root access that e.g. has been pivot rooted into such a mount on the host will be not be able to execute, read, write, or create files as root.) Given that mapping a mount is decoupled from the caller's user namespace a sufficiently privileged process such as a container manager can set up an idmapped mount for the container and the container can simply pivot root to it. There's no need for the container to do anything. The mount will appear correctly mapped independent of the user namespace the container uses. This means we don't need to mark a mount as idmappable. In order to create an idmapped mount the caller must currently be privileged in the user namespace of the superblock the mount belongs to. Once a mount has been idmapped we don't allow it to change its mapping. This keeps permission checking and life-cycle management simple. Users wanting to change the idmapped can always create a new detached mount with a different idmapping. Link: https://lore.kernel.org/r/20210121131959.646623-36-christian.brauner@ubuntu.com Cc: Christoph Hellwig <hch@lst.de> Cc: David Howells <dhowells@redhat.com> Cc: Mauricio Vásquez Bernal <mauricio@kinvolk.io> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
125 lines
4.0 KiB
C
125 lines
4.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
*
|
|
* Definitions for mount interface. This describes the in the kernel build
|
|
* linkedlist with mounted filesystems.
|
|
*
|
|
* Author: Marco van Wieringen <mvw@planets.elm.net>
|
|
*
|
|
*/
|
|
#ifndef _LINUX_MOUNT_H
|
|
#define _LINUX_MOUNT_H
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/list.h>
|
|
#include <linux/nodemask.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/seqlock.h>
|
|
#include <linux/atomic.h>
|
|
|
|
struct super_block;
|
|
struct vfsmount;
|
|
struct dentry;
|
|
struct mnt_namespace;
|
|
struct fs_context;
|
|
|
|
#define MNT_NOSUID 0x01
|
|
#define MNT_NODEV 0x02
|
|
#define MNT_NOEXEC 0x04
|
|
#define MNT_NOATIME 0x08
|
|
#define MNT_NODIRATIME 0x10
|
|
#define MNT_RELATIME 0x20
|
|
#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
|
|
#define MNT_NOSYMFOLLOW 0x80
|
|
|
|
#define MNT_SHRINKABLE 0x100
|
|
#define MNT_WRITE_HOLD 0x200
|
|
|
|
#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
|
|
#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
|
|
/*
|
|
* MNT_SHARED_MASK is the set of flags that should be cleared when a
|
|
* mount becomes shared. Currently, this is only the flag that says a
|
|
* mount cannot be bind mounted, since this is how we create a mount
|
|
* that shares events with another mount. If you add a new MNT_*
|
|
* flag, consider how it interacts with shared mounts.
|
|
*/
|
|
#define MNT_SHARED_MASK (MNT_UNBINDABLE)
|
|
#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
|
|
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
|
|
| MNT_READONLY | MNT_NOSYMFOLLOW)
|
|
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
|
|
|
|
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
|
|
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
|
|
MNT_CURSOR)
|
|
|
|
#define MNT_INTERNAL 0x4000
|
|
|
|
#define MNT_LOCK_ATIME 0x040000
|
|
#define MNT_LOCK_NOEXEC 0x080000
|
|
#define MNT_LOCK_NOSUID 0x100000
|
|
#define MNT_LOCK_NODEV 0x200000
|
|
#define MNT_LOCK_READONLY 0x400000
|
|
#define MNT_LOCKED 0x800000
|
|
#define MNT_DOOMED 0x1000000
|
|
#define MNT_SYNC_UMOUNT 0x2000000
|
|
#define MNT_MARKED 0x4000000
|
|
#define MNT_UMOUNT 0x8000000
|
|
#define MNT_CURSOR 0x10000000
|
|
|
|
struct vfsmount {
|
|
struct dentry *mnt_root; /* root of the mounted tree */
|
|
struct super_block *mnt_sb; /* pointer to superblock */
|
|
int mnt_flags;
|
|
struct user_namespace *mnt_userns;
|
|
} __randomize_layout;
|
|
|
|
static inline struct user_namespace *mnt_user_ns(const struct vfsmount *mnt)
|
|
{
|
|
/* Pairs with smp_store_release() in do_idmap_mount(). */
|
|
return smp_load_acquire(&mnt->mnt_userns);
|
|
}
|
|
|
|
struct file; /* forward dec */
|
|
struct path;
|
|
|
|
extern int mnt_want_write(struct vfsmount *mnt);
|
|
extern int mnt_want_write_file(struct file *file);
|
|
extern int mnt_clone_write(struct vfsmount *mnt);
|
|
extern void mnt_drop_write(struct vfsmount *mnt);
|
|
extern void mnt_drop_write_file(struct file *file);
|
|
extern void mntput(struct vfsmount *mnt);
|
|
extern struct vfsmount *mntget(struct vfsmount *mnt);
|
|
extern struct vfsmount *mnt_clone_internal(const struct path *path);
|
|
extern bool __mnt_is_readonly(struct vfsmount *mnt);
|
|
extern bool mnt_may_suid(struct vfsmount *mnt);
|
|
|
|
struct path;
|
|
extern struct vfsmount *clone_private_mount(const struct path *path);
|
|
extern int __mnt_want_write(struct vfsmount *);
|
|
extern void __mnt_drop_write(struct vfsmount *);
|
|
|
|
struct file_system_type;
|
|
extern struct vfsmount *fc_mount(struct fs_context *fc);
|
|
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
|
|
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
|
|
int flags, const char *name,
|
|
void *data);
|
|
extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
|
|
struct file_system_type *type,
|
|
const char *name, void *data);
|
|
|
|
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
|
|
extern void mark_mounts_for_expiry(struct list_head *mounts);
|
|
|
|
extern dev_t name_to_dev_t(const char *name);
|
|
|
|
extern unsigned int sysctl_mount_max;
|
|
|
|
extern bool path_is_mountpoint(const struct path *path);
|
|
|
|
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
|
|
|
|
#endif /* _LINUX_MOUNT_H */
|