mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 04:33:26 +02:00
Merge tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux
Pull splice updates from Jens Axboe: "This kills off ITER_PIPE to avoid a race between truncate, iov_iter_revert() on the pipe and an as-yet incomplete DMA to a bio with unpinned/unref'ed pages from an O_DIRECT splice read. This causes memory corruption. Instead, we either use (a) filemap_splice_read(), which invokes the buffered file reading code and splices from the pagecache into the pipe; (b) copy_splice_read(), which bulk-allocates a buffer, reads into it and then pushes the filled pages into the pipe; or (c) handle it in filesystem-specific code. Summary: - Rename direct_splice_read() to copy_splice_read() - Simplify the calculations for the number of pages to be reclaimed in copy_splice_read() - Turn do_splice_to() into a helper, vfs_splice_read(), so that it can be used by overlayfs and coda to perform the checks on the lower fs - Make vfs_splice_read() jump to copy_splice_read() to handle direct-I/O and DAX - Provide shmem with its own splice_read to handle non-existent pages in the pagecache. We don't want a ->read_folio() as we don't want to populate holes, but filemap_get_pages() requires it - Provide overlayfs with its own splice_read to call down to a lower layer as overlayfs doesn't provide ->read_folio() - Provide coda with its own splice_read to call down to a lower layer as coda doesn't provide ->read_folio() - Direct ->splice_read to copy_splice_read() in tty, procfs, kernfs and random files as they just copy to the output buffer and don't splice pages - Provide wrappers for afs, ceph, ecryptfs, ext4, f2fs, nfs, ntfs3, ocfs2, orangefs, xfs and zonefs to do locking and/or revalidation - Make cifs use filemap_splice_read() - Replace pointers to generic_file_splice_read() with pointers to filemap_splice_read() as DIO and DAX are handled in the caller; filesystems can still provide their own alternate ->splice_read() op - Remove generic_file_splice_read() - Remove ITER_PIPE and its paraphernalia as generic_file_splice_read was the only user" * tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux: (31 commits) splice: kdoc for filemap_splice_read() and copy_splice_read() iov_iter: Kill ITER_PIPE splice: Remove generic_file_splice_read() splice: Use filemap_splice_read() instead of generic_file_splice_read() cifs: Use filemap_splice_read() trace: Convert trace/seq to use copy_splice_read() zonefs: Provide a splice-read wrapper xfs: Provide a splice-read wrapper orangefs: Provide a splice-read wrapper ocfs2: Provide a splice-read wrapper ntfs3: Provide a splice-read wrapper nfs: Provide a splice-read wrapper f2fs: Provide a splice-read wrapper ext4: Provide a splice-read wrapper ecryptfs: Provide a splice-read wrapper ceph: Provide a splice-read wrapper afs: Provide a splice-read wrapper 9p: Add splice_read wrapper net: Make sock_splice_read() use copy_splice_read() by default tty, proc, kernfs, random: Use copy_splice_read() ...
This commit is contained in:
31
mm/filemap.c
31
mm/filemap.c
@@ -2693,8 +2693,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
if (unlikely(iocb->ki_pos >= i_size_read(inode)))
|
||||
break;
|
||||
|
||||
error = filemap_get_pages(iocb, iter->count, &fbatch,
|
||||
iov_iter_is_pipe(iter));
|
||||
error = filemap_get_pages(iocb, iter->count, &fbatch, false);
|
||||
if (error < 0)
|
||||
break;
|
||||
|
||||
@@ -2878,9 +2877,24 @@ size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
|
||||
return spliced;
|
||||
}
|
||||
|
||||
/*
|
||||
* Splice folios from the pagecache of a buffered (ie. non-O_DIRECT) file into
|
||||
* a pipe.
|
||||
/**
|
||||
* filemap_splice_read - Splice data from a file's pagecache into a pipe
|
||||
* @in: The file to read from
|
||||
* @ppos: Pointer to the file position to read from
|
||||
* @pipe: The pipe to splice into
|
||||
* @len: The amount to splice
|
||||
* @flags: The SPLICE_F_* flags
|
||||
*
|
||||
* This function gets folios from a file's pagecache and splices them into the
|
||||
* pipe. Readahead will be called as necessary to fill more folios. This may
|
||||
* be used for blockdevs also.
|
||||
*
|
||||
* Return: On success, the number of bytes read will be returned and *@ppos
|
||||
* will be updated if appropriate; 0 will be returned if there is no more data
|
||||
* to be read; -EAGAIN will be returned if the pipe had no space, and some
|
||||
* other negative error code will be returned on error. A short read may occur
|
||||
* if the pipe has insufficient space, we reach the end of the data or we hit a
|
||||
* hole.
|
||||
*/
|
||||
ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
@@ -2893,6 +2907,9 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
|
||||
bool writably_mapped;
|
||||
int i, error = 0;
|
||||
|
||||
if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))
|
||||
return 0;
|
||||
|
||||
init_sync_kiocb(&iocb, in);
|
||||
iocb.ki_pos = *ppos;
|
||||
|
||||
@@ -2906,7 +2923,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
|
||||
do {
|
||||
cond_resched();
|
||||
|
||||
if (*ppos >= i_size_read(file_inode(in)))
|
||||
if (*ppos >= i_size_read(in->f_mapping->host))
|
||||
break;
|
||||
|
||||
iocb.ki_pos = *ppos;
|
||||
@@ -2922,7 +2939,7 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
|
||||
* part of the page is not copied back to userspace (unless
|
||||
* another truncate extends the file - this is desired though).
|
||||
*/
|
||||
isize = i_size_read(file_inode(in));
|
||||
isize = i_size_read(in->f_mapping->host);
|
||||
if (unlikely(*ppos >= isize))
|
||||
break;
|
||||
end_offset = min_t(loff_t, isize, *ppos + len);
|
||||
|
134
mm/shmem.c
134
mm/shmem.c
@@ -2731,6 +2731,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
return retval ? retval : error;
|
||||
}
|
||||
|
||||
static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
}
|
||||
|
||||
static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
|
||||
struct pipe_buffer *buf)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct pipe_buf_operations zero_pipe_buf_ops = {
|
||||
.release = zero_pipe_buf_release,
|
||||
.try_steal = zero_pipe_buf_try_steal,
|
||||
.get = zero_pipe_buf_get,
|
||||
};
|
||||
|
||||
static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
|
||||
loff_t fpos, size_t size)
|
||||
{
|
||||
size_t offset = fpos & ~PAGE_MASK;
|
||||
|
||||
size = min_t(size_t, size, PAGE_SIZE - offset);
|
||||
|
||||
if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
|
||||
struct pipe_buffer *buf = pipe_head_buf(pipe);
|
||||
|
||||
*buf = (struct pipe_buffer) {
|
||||
.ops = &zero_pipe_buf_ops,
|
||||
.page = ZERO_PAGE(0),
|
||||
.offset = offset,
|
||||
.len = size,
|
||||
};
|
||||
pipe->head++;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
struct inode *inode = file_inode(in);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct folio *folio = NULL;
|
||||
size_t total_spliced = 0, used, npages, n, part;
|
||||
loff_t isize;
|
||||
int error = 0;
|
||||
|
||||
/* Work out how much data we can actually add into the pipe */
|
||||
used = pipe_occupancy(pipe->head, pipe->tail);
|
||||
npages = max_t(ssize_t, pipe->max_usage - used, 0);
|
||||
len = min_t(size_t, len, npages * PAGE_SIZE);
|
||||
|
||||
do {
|
||||
if (*ppos >= i_size_read(inode))
|
||||
break;
|
||||
|
||||
error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
|
||||
if (error) {
|
||||
if (error == -EINVAL)
|
||||
error = 0;
|
||||
break;
|
||||
}
|
||||
if (folio) {
|
||||
folio_unlock(folio);
|
||||
|
||||
if (folio_test_hwpoison(folio)) {
|
||||
error = -EIO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* i_size must be checked after we know the pages are Uptodate.
|
||||
*
|
||||
* Checking i_size after the check allows us to calculate
|
||||
* the correct value for "nr", which means the zero-filled
|
||||
* part of the page is not copied back to userspace (unless
|
||||
* another truncate extends the file - this is desired though).
|
||||
*/
|
||||
isize = i_size_read(inode);
|
||||
if (unlikely(*ppos >= isize))
|
||||
break;
|
||||
part = min_t(loff_t, isize - *ppos, len);
|
||||
|
||||
if (folio) {
|
||||
/*
|
||||
* If users can be writing to this page using arbitrary
|
||||
* virtual addresses, take care about potential aliasing
|
||||
* before reading the page on the kernel side.
|
||||
*/
|
||||
if (mapping_writably_mapped(mapping))
|
||||
flush_dcache_folio(folio);
|
||||
folio_mark_accessed(folio);
|
||||
/*
|
||||
* Ok, we have the page, and it's up-to-date, so we can
|
||||
* now splice it into the pipe.
|
||||
*/
|
||||
n = splice_folio_into_pipe(pipe, folio, *ppos, part);
|
||||
folio_put(folio);
|
||||
folio = NULL;
|
||||
} else {
|
||||
n = splice_zeropage_into_pipe(pipe, *ppos, len);
|
||||
}
|
||||
|
||||
if (!n)
|
||||
break;
|
||||
len -= n;
|
||||
total_spliced += n;
|
||||
*ppos += n;
|
||||
in->f_ra.prev_pos = *ppos;
|
||||
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
} while (len);
|
||||
|
||||
if (folio)
|
||||
folio_put(folio);
|
||||
|
||||
file_accessed(in);
|
||||
return total_spliced ? total_spliced : error;
|
||||
}
|
||||
|
||||
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
@@ -3971,7 +4103,7 @@ static const struct file_operations shmem_file_operations = {
|
||||
.read_iter = shmem_file_read_iter,
|
||||
.write_iter = generic_file_write_iter,
|
||||
.fsync = noop_fsync,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_read = shmem_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.fallocate = shmem_fallocate,
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user