From fcced95b6ba2a507a83b8b3e0358a8ac16b13e35 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:11 +0530 Subject: [PATCH 01/19] ext2/dax: Fix ext2_setsize when len is page aligned PAGE_ALIGN(x) macro gives the next highest value which is multiple of pagesize. But if x is already page aligned then it simply returns x. So, if x passed is 0 in dax_zero_range() function, that means the length gets passed as 0 to ->iomap_begin(). In ext2 it then calls ext2_get_blocks -> max_blocks as 0 and hits bug_on here in ext2_get_blocks(). BUG_ON(maxblocks == 0); Instead we should be calling dax_truncate_page() here which takes care of it. i.e. it only calls dax_zero_range if the offset is not page/block aligned. This can be easily triggered with following on fsdax mounted pmem device. dd if=/dev/zero of=file count=1 bs=512 truncate -s 0 file [79.525838] EXT2-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your own risk [79.529376] ext2 filesystem being mounted at /mnt1/test supports timestamps until 2038 (0x7fffffff) [93.793207] ------------[ cut here ]------------ [93.795102] kernel BUG at fs/ext2/inode.c:637! [93.796904] invalid opcode: 0000 [#1] PREEMPT SMP PTI [93.798659] CPU: 0 PID: 1192 Comm: truncate Not tainted 6.3.0-rc2-xfstests-00056-g131086faa369 #139 [93.806459] RIP: 0010:ext2_get_blocks.constprop.0+0x524/0x610 <...> [93.835298] Call Trace: [93.836253] [93.837103] ? lock_acquire+0xf8/0x110 [93.838479] ? d_lookup+0x69/0xd0 [93.839779] ext2_iomap_begin+0xa7/0x1c0 [93.841154] iomap_iter+0xc7/0x150 [93.842425] dax_zero_range+0x6e/0xa0 [93.843813] ext2_setsize+0x176/0x1b0 [93.845164] ext2_setattr+0x151/0x200 [93.846467] notify_change+0x341/0x4e0 [93.847805] ? lock_acquire+0xf8/0x110 [93.849143] ? do_truncate+0x74/0xe0 [93.850452] ? do_truncate+0x84/0xe0 [93.851739] do_truncate+0x84/0xe0 [93.852974] do_sys_ftruncate+0x2b4/0x2f0 [93.854404] do_syscall_64+0x3f/0x90 [93.855789] entry_SYSCALL_64_after_hwframe+0x72/0xdc CC: stable@vger.kernel.org Fixes: 2aa3048e03d3 ("iomap: switch iomap_zero_range to use iomap_iter") Reviewed-by: Darrick J. Wong Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: <046a58317f29d9603d1068b2bbae47c2332c17ae.1682069716.git.ritesh.list@gmail.com> --- fs/ext2/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 26f135e7ffce..dc76147e7b07 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1259,9 +1259,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); if (IS_DAX(inode)) - error = dax_zero_range(inode, newsize, - PAGE_ALIGN(newsize) - newsize, NULL, - &ext2_iomap_ops); + error = dax_truncate_page(inode, newsize, NULL, + &ext2_iomap_ops); else error = block_truncate_page(inode->i_mapping, newsize, ext2_get_block); From 31b2ebc0929e964f4edfbfa7129d43f7e3c17165 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:12 +0530 Subject: [PATCH 02/19] fs/buffer.c: Add generic_buffers_fsync*() implementation Some of the higher layers like iomap takes inode_lock() when calling generic_write_sync(). Also writeback already happens from other paths without inode lock, so it's difficult to say that we really need sync_mapping_buffers() to take any inode locking here. Having said that, let's add generic_buffers_fsync/_noflush() implementation in buffer.c with no inode_lock/unlock() for now so that filesystems like ext2 and ext4's nojournal mode can use it. Ext4 when got converted to iomap for direct-io already copied it's own variant of __generic_file_fsync() without lock. This patch adds generic_buffers_fsync() & generic_buffers_fsync_noflush() implementations for use in filesystems like ext2 & ext4 respectively. Later we can review other filesystems as well to see if we can make generic_buffers_fsync/_noflush() which does not take any inode_lock() as the default path. Tested-by: Disha Goel Reviewed-by: Christoph Hellwig Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: --- fs/buffer.c | 70 +++++++++++++++++++++++++++++++++++++ include/linux/buffer_head.h | 4 +++ 2 files changed, 74 insertions(+) diff --git a/fs/buffer.c b/fs/buffer.c index a7fc561758b1..00cad2658a07 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -592,6 +592,76 @@ int sync_mapping_buffers(struct address_space *mapping) } EXPORT_SYMBOL(sync_mapping_buffers); +/** + * generic_buffers_fsync_noflush - generic buffer fsync implementation + * for simple filesystems with no inode lock + * + * @file: file to synchronize + * @start: start offset in bytes + * @end: end offset in bytes (inclusive) + * @datasync: only synchronize essential metadata if true + * + * This is a generic implementation of the fsync method for simple + * filesystems which track all non-inode metadata in the buffers list + * hanging off the address_space structure. + */ +int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, + bool datasync) +{ + struct inode *inode = file->f_mapping->host; + int err; + int ret; + + err = file_write_and_wait_range(file, start, end); + if (err) + return err; + + ret = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY_ALL)) + goto out; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto out; + + err = sync_inode_metadata(inode, 1); + if (ret == 0) + ret = err; + +out: + /* check and advance again to catch errors after syncing out buffers */ + err = file_check_and_advance_wb_err(file); + if (ret == 0) + ret = err; + return ret; +} +EXPORT_SYMBOL(generic_buffers_fsync_noflush); + +/** + * generic_buffers_fsync - generic buffer fsync implementation + * for simple filesystems with no inode lock + * + * @file: file to synchronize + * @start: start offset in bytes + * @end: end offset in bytes (inclusive) + * @datasync: only synchronize essential metadata if true + * + * This is a generic implementation of the fsync method for simple + * filesystems which track all non-inode metadata in the buffers list + * hanging off the address_space structure. This also makes sure that + * a device cache flush operation is called at the end. + */ +int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, + bool datasync) +{ + struct inode *inode = file->f_mapping->host; + int ret; + + ret = generic_buffers_fsync_noflush(file, start, end, datasync); + if (!ret) + ret = blkdev_issue_flush(inode->i_sb->s_bdev); + return ret; +} +EXPORT_SYMBOL(generic_buffers_fsync); + /* * Called when we've recently written block `bblock', and it is known that * `bblock' was for a buffer_boundary() buffer. This means that the block at diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1520793c72da..1bd73cefd311 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,6 +217,10 @@ int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); +int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, + bool datasync); +int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, + bool datasync); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) From 5b5b4ff8f92daec4475318c0ec4cb4ed43de9eb6 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:13 +0530 Subject: [PATCH 03/19] ext4: Use generic_buffers_fsync_noflush() implementation ext4 when got converted to iomap for dio, it copied __generic_file_fsync implementation to avoid taking inode_lock in order to avoid any deadlock (since iomap takes an inode_lock while calling generic_write_sync()). The previous patch already added generic_buffers_fsync*() which does not take any inode_lock(). Hence kill the redundant code and use generic_buffers_fsync_noflush() function instead. Tested-by: Disha Goel Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: --- fs/ext4/fsync.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index f65fdb27ce14..9cd71d76622d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "ext4.h" #include "ext4_jbd2.h" @@ -78,21 +79,13 @@ static int ext4_sync_parent(struct inode *inode) return ret; } -static int ext4_fsync_nojournal(struct inode *inode, bool datasync, - bool *needs_barrier) +static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, + int datasync, bool *needs_barrier) { - int ret, err; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY_ALL)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = sync_inode_metadata(inode, 1); - if (!ret) - ret = err; + struct inode *inode = file->f_inode; + int ret; + ret = generic_buffers_fsync_noflush(file, start, end, datasync); if (!ret) ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) @@ -148,6 +141,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) goto out; } + if (!sbi->s_journal) { + ret = ext4_fsync_nojournal(file, start, end, datasync, + &needs_barrier); + if (needs_barrier) + goto issue_flush; + goto out; + } + ret = file_write_and_wait_range(file, start, end); if (ret) goto out; @@ -157,11 +158,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * Metadata is in the journal, we wait for proper transaction to * commit here. */ - if (!sbi->s_journal) - ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier); - else - ret = ext4_fsync_journal(inode, datasync, &needs_barrier); + ret = ext4_fsync_journal(inode, datasync, &needs_barrier); +issue_flush: if (needs_barrier) { err = blkdev_issue_flush(inode->i_sb->s_bdev); if (!ret) From d05307042500e3f2c06fc2f9e76e8db31d61c7c0 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:14 +0530 Subject: [PATCH 04/19] ext2: Use generic_buffers_fsync() implementation Next patch converts ext2 to use iomap interface for DIO. iomap layer can call generic_write_sync() -> ext2_fsync() from iomap_dio_complete while still holding the inode_lock(). Now writeback from other paths doesn't need inode_lock(). It seems there is also no need of an inode_lock() for sync_mapping_buffers(). It uses it's own mapping->private_lock for it's buffer list handling. Hence this patch is in preparation to move ext2 to iomap. This uses generic_buffers_fsync() which does not take any inode_lock() in ext2_fsync(). Tested-by: Disha Goel Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: <76d206a464574ff91db25bc9e43479b51ca7e307.1682069716.git.ritesh.list@gmail.com> --- fs/ext2/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 6b4bebe982ca..749163787139 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -153,7 +154,7 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct super_block *sb = file->f_mapping->host->i_sb; - ret = generic_file_fsync(file, start, end, datasync); + ret = generic_buffers_fsync(file, start, end, datasync); if (ret == -EIO) /* We don't really know where the IO error happened... */ ext2_error(sb, __func__, From fb5de4358e1aa4753dce73c4dc1aca73ff39cedd Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:15 +0530 Subject: [PATCH 05/19] ext2: Move direct-io to use iomap This patch converts ext2 direct-io path to iomap interface. - This also takes care of DIO_SKIP_HOLES part in which we return -ENOTBLK from ext2_iomap_begin(), in case if the write is done on a hole. - This fallbacks to buffered-io in case of DIO_SKIP_HOLES or in case of a partial write or if any error is detected in ext2_iomap_end(). We try to return -ENOTBLK in such cases. - For any unaligned or extending DIO writes, we pass IOMAP_DIO_FORCE_WAIT flag to ensure synchronous writes. - For extending writes we set IOMAP_F_DIRTY in ext2_iomap_begin because otherwise with dsync writes on devices that support FUA, generic_write_sync won't be called and we might miss inode metadata updates. - Since ext2 already now uses _nolock vartiant of sync write. Hence there is no inode lock problem with iomap in this patch. - ext2_iomap_ops are now being shared by DIO, DAX & fiemap path Tested-by: Disha Goel Reviewed-by: Christoph Hellwig Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: <610b672a52f2a7ff6dc550fd14d0f995806232a5.1682069716.git.ritesh.list@gmail.com> --- fs/ext2/ext2.h | 1 + fs/ext2/file.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext2/inode.c | 53 ++++++++++++++-------- 3 files changed, 150 insertions(+), 19 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8244366862e4..d0531d4ef499 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -754,6 +754,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_evict_inode(struct inode *); +void ext2_write_failed(struct address_space *mapping, loff_t to); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *); extern int ext2_getattr (struct mnt_idmap *, const struct path *, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 749163787139..98add36c1a59 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -162,12 +162,124 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + + inode_lock_shared(inode); + ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0); + inode_unlock_shared(inode); + + return ret; +} + +static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + loff_t pos = iocb->ki_pos; + struct inode *inode = file_inode(iocb->ki_filp); + + if (error) + goto out; + + /* + * If we are extending the file, we have to update i_size here before + * page cache gets invalidated in iomap_dio_rw(). This prevents racing + * buffered reads from zeroing out too much from page cache pages. + * Note that all extending writes always happens synchronously with + * inode lock held by ext2_dio_write_iter(). So it is safe to update + * inode size here for extending file writes. + */ + pos += size; + if (pos > i_size_read(inode)) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } +out: + return error; +} + +static const struct iomap_dio_ops ext2_dio_write_ops = { + .end_io = ext2_dio_write_end_io, +}; + +static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + unsigned int flags = 0; + unsigned long blocksize = inode->i_sb->s_blocksize; + loff_t offset = iocb->ki_pos; + loff_t count = iov_iter_count(from); + + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out_unlock; + + ret = kiocb_modified(iocb); + if (ret) + goto out_unlock; + + /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */ + if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) || + (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))) + flags |= IOMAP_DIO_FORCE_WAIT; + + ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops, + flags, NULL, 0); + + /* ENOTBLK is magic return value for fallback to buffered-io */ + if (ret == -ENOTBLK) + ret = 0; + + if (ret < 0 && ret != -EIOCBQUEUED) + ext2_write_failed(inode->i_mapping, offset + count); + + /* handle case for partial write and for fallback to buffered write */ + if (ret >= 0 && iov_iter_count(from)) { + loff_t pos, endbyte; + ssize_t status; + int ret2; + + iocb->ki_flags &= ~IOCB_DIRECT; + pos = iocb->ki_pos; + status = generic_perform_write(iocb, from); + if (unlikely(status < 0)) { + ret = status; + goto out_unlock; + } + + iocb->ki_pos += status; + ret += status; + endbyte = pos + status - 1; + ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, + endbyte); + if (!ret2) + invalidate_mapping_pages(inode->i_mapping, + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + if (ret > 0) + generic_write_sync(iocb, ret); + } + +out_unlock: + inode_unlock(inode); + return ret; +} + static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { #ifdef CONFIG_FS_DAX if (IS_DAX(iocb->ki_filp->f_mapping->host)) return ext2_dax_read_iter(iocb, to); #endif + if (iocb->ki_flags & IOCB_DIRECT) + return ext2_dio_read_iter(iocb, to); + return generic_file_read_iter(iocb, to); } @@ -177,6 +289,9 @@ static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_DAX(iocb->ki_filp->f_mapping->host)) return ext2_dax_write_iter(iocb, from); #endif + if (iocb->ki_flags & IOCB_DIRECT) + return ext2_dio_write_iter(iocb, from); + return generic_file_write_iter(iocb, from); } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index dc76147e7b07..75983215c7a1 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -56,7 +56,7 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) static void ext2_truncate_blocks(struct inode *inode, loff_t offset); -static void ext2_write_failed(struct address_space *mapping, loff_t to) +void ext2_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; @@ -809,9 +809,27 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, bool new = false, boundary = false; u32 bno; int ret; + bool create = flags & IOMAP_WRITE; + + /* + * For writes that could fill holes inside i_size on a + * DIO_SKIP_HOLES filesystem we forbid block creations: only + * overwrites are permitted. + */ + if ((flags & IOMAP_DIRECT) && + (first_block << blkbits) < i_size_read(inode)) + create = 0; + + /* + * Writes that span EOF might trigger an IO size update on completion, + * so consider them to be dirty for the purposes of O_DSYNC even if + * there is no other metadata changes pending or have been made here. + */ + if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode)) + iomap->flags |= IOMAP_F_DIRTY; ret = ext2_get_blocks(inode, first_block, max_blocks, - &bno, &new, &boundary, flags & IOMAP_WRITE); + &bno, &new, &boundary, create); if (ret < 0) return ret; @@ -823,6 +841,12 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->bdev = inode->i_sb->s_bdev; if (ret == 0) { + /* + * Switch to buffered-io for writing to holes in a non-extent + * based filesystem to avoid stale data exposure problem. + */ + if (!create && (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) + return -ENOTBLK; iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; iomap->length = 1 << blkbits; @@ -844,6 +868,13 @@ static int ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { + /* + * Switch to buffered-io in case of any error. + * Blocks allocated can be used by the buffered-io path. + */ + if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE) && written == 0) + return -ENOTBLK; + if (iomap->type == IOMAP_MAPPED && written < length && (flags & IOMAP_WRITE)) @@ -908,22 +939,6 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping,block,ext2_get_block); } -static ssize_t -ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - size_t count = iov_iter_count(iter); - loff_t offset = iocb->ki_pos; - ssize_t ret; - - ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); - if (ret < 0 && iov_iter_rw(iter) == WRITE) - ext2_write_failed(mapping, offset + count); - return ret; -} - static int ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -946,7 +961,7 @@ const struct address_space_operations ext2_aops = { .write_begin = ext2_write_begin, .write_end = ext2_write_end, .bmap = ext2_bmap, - .direct_IO = ext2_direct_IO, + .direct_IO = noop_direct_IO, .writepages = ext2_writepages, .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, From 6e335cd789bee7a7111c4fe6d46b1d63cde81511 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:17 +0530 Subject: [PATCH 06/19] ext2: Add direct-io trace points This patch adds the trace point to ext2 direct-io apis in fs/ext2/file.c Here is how the output looks like a.out-467865 [006] 6758.170968: ext2_dio_write_begin: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT|WRITE aio 1 ret 0 a.out-467865 [006] 6758.171061: ext2_dio_write_end: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 0 flags DIRECT|WRITE aio 1 ret -529 kworker/3:153-444162 [003] 6758.171252: ext2_dio_write_endio: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT|WRITE aio 1 ret 0 a.out-468222 [001] 6761.628924: ext2_dio_read_begin: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 1 ret 0 a.out-468222 [001] 6761.629063: ext2_dio_read_end: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 0 flags DIRECT aio 1 ret -529 a.out-468428 [005] 6763.937454: ext2_dio_write_begin: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 0 ret 0 a.out-468428 [005] 6763.937829: ext2_dio_write_endio: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 0 ret 0 a.out-468428 [005] 6763.937847: ext2_dio_write_end: dev 7:12 ino 0xe isize 0x1000 pos 0x1000 len 0 flags DIRECT aio 0 ret 4096 a.out-468609 [000] 6765.702878: ext2_dio_read_begin: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 0 ret 0 a.out-468609 [000] 6765.703243: ext2_dio_read_end: dev 7:12 ino 0xe isize 0x1000 pos 0x1000 len 0 flags DIRECT aio 0 ret 4096 Reported-and-tested-by: Disha Goel [Need to add CFLAGS_trace for fixing unable to find trace file problem] Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: --- fs/ext2/Makefile | 5 ++- fs/ext2/file.c | 10 +++++- fs/ext2/trace.c | 6 ++++ fs/ext2/trace.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 fs/ext2/trace.c create mode 100644 fs/ext2/trace.h diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index 311479d864a7..8860948ef9ca 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -6,7 +6,10 @@ obj-$(CONFIG_EXT2_FS) += ext2.o ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o + ioctl.o namei.o super.o symlink.o trace.o + +# For tracepoints to include our trace.h from tracepoint infrastructure +CFLAGS_trace.o := -I$(src) ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 98add36c1a59..7a32f202908e 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -29,6 +29,7 @@ #include "ext2.h" #include "xattr.h" #include "acl.h" +#include "trace.h" #ifdef CONFIG_FS_DAX static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -168,9 +169,11 @@ static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file->f_mapping->host; ssize_t ret; + trace_ext2_dio_read_begin(iocb, to, 0); inode_lock_shared(inode); ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0); inode_unlock_shared(inode); + trace_ext2_dio_read_end(iocb, to, ret); return ret; } @@ -198,6 +201,7 @@ static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, mark_inode_dirty(inode); } out: + trace_ext2_dio_write_endio(iocb, size, error); return error; } @@ -214,7 +218,9 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) unsigned long blocksize = inode->i_sb->s_blocksize; loff_t offset = iocb->ki_pos; loff_t count = iov_iter_count(from); + ssize_t status = 0; + trace_ext2_dio_write_begin(iocb, from, 0); inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) @@ -242,7 +248,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) /* handle case for partial write and for fallback to buffered write */ if (ret >= 0 && iov_iter_count(from)) { loff_t pos, endbyte; - ssize_t status; int ret2; iocb->ki_flags &= ~IOCB_DIRECT; @@ -268,6 +273,9 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) out_unlock: inode_unlock(inode); + if (status) + trace_ext2_dio_write_buff_end(iocb, from, status); + trace_ext2_dio_write_end(iocb, from, ret); return ret; } diff --git a/fs/ext2/trace.c b/fs/ext2/trace.c new file mode 100644 index 000000000000..b01cdf6526fd --- /dev/null +++ b/fs/ext2/trace.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "ext2.h" +#include + +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/fs/ext2/trace.h b/fs/ext2/trace.h new file mode 100644 index 000000000000..7d230e13576e --- /dev/null +++ b/fs/ext2/trace.h @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ext2 + +#if !defined(_EXT2_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _EXT2_TRACE_H + +#include + +DECLARE_EVENT_CLASS(ext2_dio_class, + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), + TP_ARGS(iocb, iter, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(size_t, count) + __field(int, ki_flags) + __field(bool, aio) + __field(ssize_t, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->count = iov_iter_count(iter); + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zu flags %s aio %d ret %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->count, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->ret) +); + +#define DEFINE_DIO_RW_EVENT(name) \ +DEFINE_EVENT(ext2_dio_class, name, \ + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), \ + TP_ARGS(iocb, iter, ret)) +DEFINE_DIO_RW_EVENT(ext2_dio_write_begin); +DEFINE_DIO_RW_EVENT(ext2_dio_write_end); +DEFINE_DIO_RW_EVENT(ext2_dio_write_buff_end); +DEFINE_DIO_RW_EVENT(ext2_dio_read_begin); +DEFINE_DIO_RW_EVENT(ext2_dio_read_end); + +TRACE_EVENT(ext2_dio_write_endio, + TP_PROTO(struct kiocb *iocb, ssize_t size, int ret), + TP_ARGS(iocb, size, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(ssize_t, size) + __field(int, ki_flags) + __field(bool, aio) + __field(int, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->size = size; + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zd flags %s aio %d ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->size, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->ret) +); + +#endif /* _EXT2_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include From 8f1dca19b1e11785f42e70da796942154f63aef9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 11 Jan 2023 15:21:52 -0500 Subject: [PATCH 07/19] ext2_rename(): set_link and delete_entry may fail Signed-off-by: Al Viro Reviewed-by: Fabio M. De Francesco Tested-by: Fabio M. De Francesco Signed-off-by: Jan Kara --- fs/ext2/dir.c | 12 ++++++------ fs/ext2/namei.c | 29 ++++++++++------------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 4a6955a0a116..a3c77ea5a7de 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -606,8 +606,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, if (de->rec_len == 0) { ext2_error(inode->i_sb, __func__, "zero-length directory entry"); - err = -EIO; - goto out; + return -EIO; } pde = de; de = ext2_next_entry(de); @@ -617,7 +616,10 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, pos = page_offset(page) + from; lock_page(page); err = ext2_prepare_chunk(page, pos, to - from); - BUG_ON(err); + if (err) { + unlock_page(page); + return err; + } if (pde) pde->rec_len = ext2_rec_len_to_disk(to - from); dir->inode = 0; @@ -625,9 +627,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, inode->i_ctime = inode->i_mtime = current_time(inode); EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); - err = ext2_handle_dirsync(inode); -out: - return err; + return ext2_handle_dirsync(inode); } /* diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 7f5dfa87cc95..dafdd2d41876 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -335,18 +335,16 @@ static int ext2_rename (struct mnt_idmap * idmap, err = dquot_initialize(old_dir); if (err) - goto out; + return err; err = dquot_initialize(new_dir); if (err) - goto out; + return err; old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page, &old_page_addr); - if (IS_ERR(old_de)) { - err = PTR_ERR(old_de); - goto out; - } + if (IS_ERR(old_de)) + return PTR_ERR(old_de); if (S_ISDIR(old_inode->i_mode)) { err = -EIO; @@ -394,27 +392,20 @@ static int ext2_rename (struct mnt_idmap * idmap, old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); - ext2_delete_entry(old_de, old_page, old_page_addr); - - if (dir_de) { - if (old_dir != new_dir) { + err = ext2_delete_entry(old_de, old_page, old_page_addr); + if (!err && dir_de) { + if (old_dir != new_dir) err = ext2_set_link(old_inode, dir_de, dir_page, dir_page_addr, new_dir, false); - } - ext2_put_page(dir_page, dir_page_addr); inode_dec_link_count(old_dir); } - -out_old: - ext2_put_page(old_page, old_page_addr); -out: - return err; - out_dir: if (dir_de) ext2_put_page(dir_page, dir_page_addr); - goto out_old; +out_old: + ext2_put_page(old_page, old_page_addr); + return err; } const struct inode_operations ext2_dir_inode_operations = { From 86008392695bd7a910b9d1452c828f9c7d1a6a1f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 13 Dec 2022 20:26:27 -0500 Subject: [PATCH 08/19] ext2: use offset_in_page() instead of open-coding it as subtraction Signed-off-by: Al Viro Reviewed-by: Fabio M. De Francesco Tested-by: Fabio M. De Francesco Signed-off-by: Jan Kara --- fs/ext2/dir.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index a3c77ea5a7de..5141ec6a6b51 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -240,7 +240,7 @@ ext2_validate_entry(char *base, unsigned offset, unsigned mask) break; p = ext2_next_entry(p); } - return (char *)p - base; + return offset_in_page(p); } static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) @@ -465,8 +465,7 @@ int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, struct page *page, void *page_addr, struct inode *inode, bool update_times) { - loff_t pos = page_offset(page) + - (char *) de - (char *) page_addr; + loff_t pos = page_offset(page) + offset_in_page(de); unsigned len = ext2_rec_len_from_disk(de->rec_len); int err; @@ -556,8 +555,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - pos = page_offset(page) + - (char *)de - (char *)page_addr; + pos = page_offset(page) + offset_in_page(de); err = ext2_prepare_chunk(page, pos, rec_len); if (err) goto out_unlock; @@ -594,8 +592,8 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, char *kaddr) { struct inode *inode = page->mapping->host; - unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char *)dir - kaddr) + + unsigned from = offset_in_page(dir) & ~(ext2_chunk_size(inode)-1); + unsigned to = offset_in_page(dir) + ext2_rec_len_from_disk(dir->rec_len); loff_t pos; ext2_dirent * pde = NULL; @@ -612,7 +610,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, de = ext2_next_entry(de); } if (pde) - from = (char *)pde - kaddr; + from = offset_in_page(pde); pos = page_offset(page) + from; lock_page(page); err = ext2_prepare_chunk(page, pos, to - from); From 46022375abe8160b6c952a2ca0ea7988be6b888d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 13 Dec 2022 20:07:28 -0500 Subject: [PATCH 09/19] ext2_get_page(): saner type We need to pass to caller both the page reference and pointer to the first byte in the now-mapped page. The former always has the same type, the latter varies from caller to caller. So make it void *ext2_get_page(...., struct page **page) rather than struct page *ext2_get_page(..., void **page_addr) and avoid the casts... Signed-off-by: Al Viro Reviewed-by: Fabio M. De Francesco Tested-by: Fabio M. De Francesco Signed-off-by: Jan Kara --- fs/ext2/dir.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 5141ec6a6b51..75c8f8037a40 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -186,23 +186,25 @@ fail: * NOTE: ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_page() * and should be treated as a call to ext2_get_page() for nesting purposes. */ -static struct page * ext2_get_page(struct inode *dir, unsigned long n, - int quiet, void **page_addr) +static void *ext2_get_page(struct inode *dir, unsigned long n, + int quiet, struct page **page) { struct address_space *mapping = dir->i_mapping; struct folio *folio = read_mapping_folio(mapping, n, NULL); + void *page_addr; if (IS_ERR(folio)) - return &folio->page; - *page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1)); + return ERR_CAST(folio); + page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1)); if (unlikely(!folio_test_checked(folio))) { - if (!ext2_check_page(&folio->page, quiet, *page_addr)) + if (!ext2_check_page(&folio->page, quiet, page_addr)) goto fail; } - return &folio->page; + *page = &folio->page; + return page_addr; fail: - ext2_put_page(&folio->page, *page_addr); + ext2_put_page(&folio->page, page_addr); return ERR_PTR(-EIO); } @@ -271,16 +273,17 @@ ext2_readdir(struct file *file, struct dir_context *ctx) EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE); for ( ; n < npages; n++, offset = 0) { - char *kaddr, *limit; ext2_dirent *de; - struct page *page = ext2_get_page(inode, n, 0, (void **)&kaddr); + struct page *page; + char *kaddr = ext2_get_page(inode, n, 0, &page); + char *limit; - if (IS_ERR(page)) { + if (IS_ERR(kaddr)) { ext2_error(sb, __func__, "bad page in #%lu", inode->i_ino); ctx->pos += PAGE_SIZE - offset; - return PTR_ERR(page); + return PTR_ERR(kaddr); } if (unlikely(need_revalidate)) { if (offset) { @@ -362,9 +365,10 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, n = start; do { char *kaddr; - page = ext2_get_page(dir, n, 0, &page_addr); - if (IS_ERR(page)) - return ERR_CAST(page); + + page_addr = ext2_get_page(dir, n, 0, &page); + if (IS_ERR(page_addr)) + return ERR_CAST(page_addr); kaddr = page_addr; de = (ext2_dirent *) kaddr; @@ -418,13 +422,11 @@ found: struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa) { - void *page_addr; - struct page *page = ext2_get_page(dir, 0, 0, &page_addr); + void *page_addr = ext2_get_page(dir, 0, 0, p); ext2_dirent *de = NULL; - if (!IS_ERR(page)) { + if (!IS_ERR(page_addr)) { de = ext2_next_entry((ext2_dirent *) page_addr); - *p = page; *pa = page_addr; } return de; @@ -513,10 +515,9 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) char *kaddr; char *dir_end; - page = ext2_get_page(dir, n, 0, &page_addr); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; + page_addr = ext2_get_page(dir, n, 0, &page); + if (IS_ERR(page_addr)) + return PTR_ERR(page_addr); lock_page(page); kaddr = page_addr; dir_end = kaddr + ext2_last_byte(dir, n); @@ -577,7 +578,6 @@ got_it: /* OFFSET_CACHE */ out_put: ext2_put_page(page, page_addr); -out: return err; out_unlock: unlock_page(page); @@ -682,9 +682,9 @@ int ext2_empty_dir (struct inode * inode) for (i = 0; i < npages; i++) { char *kaddr; ext2_dirent * de; - page = ext2_get_page(inode, i, 0, &page_addr); + page_addr = ext2_get_page(inode, i, 0, &page); - if (IS_ERR(page)) + if (IS_ERR(page_addr)) return 0; kaddr = page_addr; From 91f646fb971f4401216a2dff5c568bcbce79a923 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 13 Dec 2022 20:14:50 -0500 Subject: [PATCH 10/19] ext2_put_page(): accept any pointer within the page eliminates the need to keep the pointer to the first byte within the page if we are guaranteed to have pointers to some byte in the same page at hand. Don't backport without commit 88d7b12068b9 ("highmem: round down the address passed to kunmap_flush_on_unmap()"). Signed-off-by: Al Viro Reviewed-by: Fabio M. De Francesco Tested-by: Fabio M. De Francesco Signed-off-by: Jan Kara --- fs/ext2/dir.c | 38 +++++++++++++++++--------------------- fs/ext2/namei.c | 8 ++++---- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 75c8f8037a40..31b2aab94da0 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -299,7 +299,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) if (de->rec_len == 0) { ext2_error(sb, __func__, "zero-length directory entry"); - ext2_put_page(page, kaddr); + ext2_put_page(page, de); return -EIO; } if (de->inode) { @@ -311,7 +311,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit(ctx, de->name, de->name_len, le32_to_cpu(de->inode), d_type)) { - ext2_put_page(page, kaddr); + ext2_put_page(page, de); return 0; } } @@ -377,14 +377,14 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, if (de->rec_len == 0) { ext2_error(dir->i_sb, __func__, "zero-length directory entry"); - ext2_put_page(page, page_addr); + ext2_put_page(page, de); goto out; } if (ext2_match(namelen, name, de)) goto found; de = ext2_next_entry(de); } - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); if (++n >= npages) n = 0; @@ -443,7 +443,7 @@ int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino) return PTR_ERR(de); *ino = le32_to_cpu(de->inode); - ext2_put_page(page, page_addr); + ext2_put_page(page, de); return 0; } @@ -499,7 +499,6 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) unsigned reclen = EXT2_DIR_REC_LEN(namelen); unsigned short rec_len, name_len; struct page *page = NULL; - void *page_addr = NULL; ext2_dirent * de; unsigned long npages = dir_pages(dir); unsigned long n; @@ -515,11 +514,10 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) char *kaddr; char *dir_end; - page_addr = ext2_get_page(dir, n, 0, &page); - if (IS_ERR(page_addr)) - return PTR_ERR(page_addr); + kaddr = ext2_get_page(dir, n, 0, &page); + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); lock_page(page); - kaddr = page_addr; dir_end = kaddr + ext2_last_byte(dir, n); de = (ext2_dirent *)kaddr; kaddr += PAGE_SIZE - reclen; @@ -550,7 +548,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) de = (ext2_dirent *) ((char *) de + rec_len); } unlock_page(page); - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); } BUG(); return -EINVAL; @@ -577,7 +575,7 @@ got_it: err = ext2_handle_dirsync(dir); /* OFFSET_CACHE */ out_put: - ext2_put_page(page, page_addr); + ext2_put_page(page, de); return err; out_unlock: unlock_page(page); @@ -675,19 +673,17 @@ fail: */ int ext2_empty_dir (struct inode * inode) { - void *page_addr = NULL; - struct page *page = NULL; + struct page *page; + char *kaddr; unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { - char *kaddr; - ext2_dirent * de; - page_addr = ext2_get_page(inode, i, 0, &page); + ext2_dirent *de; - if (IS_ERR(page_addr)) + kaddr = ext2_get_page(inode, i, 0, &page); + if (IS_ERR(kaddr)) return 0; - kaddr = page_addr; de = (ext2_dirent *)kaddr; kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1); @@ -713,12 +709,12 @@ int ext2_empty_dir (struct inode * inode) } de = ext2_next_entry(de); } - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); } return 1; not_empty: - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); return 0; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index dafdd2d41876..50105d50c48a 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -288,7 +288,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) } err = ext2_delete_entry (de, page, page_addr); - ext2_put_page(page, page_addr); + ext2_put_page(page, de); if (err) goto out; @@ -370,7 +370,7 @@ static int ext2_rename (struct mnt_idmap * idmap, } err = ext2_set_link(new_dir, new_de, new_page, page_addr, old_inode, true); - ext2_put_page(new_page, page_addr); + ext2_put_page(new_page, new_de); if (err) goto out_dir; new_inode->i_ctime = current_time(new_inode); @@ -402,9 +402,9 @@ static int ext2_rename (struct mnt_idmap * idmap, } out_dir: if (dir_de) - ext2_put_page(dir_page, dir_page_addr); + ext2_put_page(dir_page, dir_de); out_old: - ext2_put_page(old_page, old_page_addr); + ext2_put_page(old_page, old_de); return err; } From dae42837ba6dd441e4a569996d5f62986ffe01ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 13 Dec 2022 20:31:39 -0500 Subject: [PATCH 11/19] ext2_{set_link,delete_entry}(): don't bother with page_addr ext2_set_link() simply doesn't use it anymore and ext2_delete_entry() can easily obtain it from the directory entry pointer... Signed-off-by: Al Viro Reviewed-by: Fabio M. De Francesco Tested-by: Fabio M. De Francesco Signed-off-by: Jan Kara --- fs/ext2/dir.c | 11 +++++------ fs/ext2/ext2.h | 6 ++---- fs/ext2/namei.c | 9 ++++----- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 31b2aab94da0..8cf91a7bbbb9 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -464,8 +464,7 @@ static int ext2_handle_dirsync(struct inode *dir) } int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, void *page_addr, struct inode *inode, - bool update_times) + struct page *page, struct inode *inode, bool update_times) { loff_t pos = page_offset(page) + offset_in_page(de); unsigned len = ext2_rec_len_from_disk(de->rec_len); @@ -586,16 +585,16 @@ out_unlock: * ext2_delete_entry deletes a directory entry by merging it with the * previous entry. Page is up-to-date. */ -int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, - char *kaddr) +int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page) { struct inode *inode = page->mapping->host; + char *kaddr = (char *)((unsigned long)dir & PAGE_MASK); unsigned from = offset_in_page(dir) & ~(ext2_chunk_size(inode)-1); unsigned to = offset_in_page(dir) + ext2_rec_len_from_disk(dir->rec_len); loff_t pos; - ext2_dirent * pde = NULL; - ext2_dirent * de = (ext2_dirent *) (kaddr + from); + ext2_dirent *pde = NULL; + ext2_dirent *de = (ext2_dirent *)(kaddr + from); int err; while ((char*)de < (char*)dir) { diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d0531d4ef499..391edf57b944 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -731,13 +731,11 @@ extern int ext2_inode_by_name(struct inode *dir, extern int ext2_make_empty(struct inode *, struct inode *); extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, struct page **, void **res_page_addr); -extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page, - char *kaddr); +extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa); int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, void *page_addr, struct inode *inode, - bool update_times); + struct page *page, struct inode *inode, bool update_times); static inline void ext2_put_page(struct page *page, void *page_addr) { kunmap_local(page_addr); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 50105d50c48a..3e7f895ac2da 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -287,7 +287,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) goto out; } - err = ext2_delete_entry (de, page, page_addr); + err = ext2_delete_entry(de, page); ext2_put_page(page, de); if (err) goto out; @@ -368,8 +368,7 @@ static int ext2_rename (struct mnt_idmap * idmap, err = PTR_ERR(new_de); goto out_dir; } - err = ext2_set_link(new_dir, new_de, new_page, page_addr, - old_inode, true); + err = ext2_set_link(new_dir, new_de, new_page, old_inode, true); ext2_put_page(new_page, new_de); if (err) goto out_dir; @@ -392,11 +391,11 @@ static int ext2_rename (struct mnt_idmap * idmap, old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); - err = ext2_delete_entry(old_de, old_page, old_page_addr); + err = ext2_delete_entry(old_de, old_page); if (!err && dir_de) { if (old_dir != new_dir) err = ext2_set_link(old_inode, dir_de, dir_page, - dir_page_addr, new_dir, false); + new_dir, false); inode_dec_link_count(old_dir); } From b8b9e8b35d3856499c0b2e8188885257828b7fa5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 13 Dec 2022 20:53:47 -0500 Subject: [PATCH 12/19] ext2_find_entry()/ext2_dotdot(): callers don't need page_addr anymore ... and that's how it should've been done in the first place Signed-off-by: Al Viro Reviewed-by: Fabio M. De Francesco Tested-by: Fabio M. De Francesco Signed-off-by: Jan Kara --- fs/ext2/dir.c | 35 +++++++++++------------------------ fs/ext2/ext2.h | 4 ++-- fs/ext2/namei.c | 21 ++++++++------------- 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 8cf91a7bbbb9..42db804794bd 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -339,8 +339,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) * should be treated as a call to ext2_get_page() for nesting purposes. */ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, - const struct qstr *child, struct page **res_page, - void **res_page_addr) + const struct qstr *child, struct page **res_page) { const char *name = child->name; int namelen = child->len; @@ -350,27 +349,22 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, struct page *page = NULL; struct ext2_inode_info *ei = EXT2_I(dir); ext2_dirent * de; - void *page_addr; if (npages == 0) goto out; /* OFFSET_CACHE */ *res_page = NULL; - *res_page_addr = NULL; start = ei->i_dir_start_lookup; if (start >= npages) start = 0; n = start; do { - char *kaddr; + char *kaddr = ext2_get_page(dir, n, 0, &page); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); - page_addr = ext2_get_page(dir, n, 0, &page); - if (IS_ERR(page_addr)) - return ERR_CAST(page_addr); - - kaddr = page_addr; de = (ext2_dirent *) kaddr; kaddr += ext2_last_byte(dir, n) - reclen; while ((char *) de <= kaddr) { @@ -402,7 +396,6 @@ out: found: *res_page = page; - *res_page_addr = page_addr; ei->i_dir_start_lookup = n; return de; } @@ -419,26 +412,21 @@ found: * ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_page() and * should be treated as a call to ext2_get_page() for nesting purposes. */ -struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, - void **pa) +struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p) { - void *page_addr = ext2_get_page(dir, 0, 0, p); - ext2_dirent *de = NULL; + ext2_dirent *de = ext2_get_page(dir, 0, 0, p); - if (!IS_ERR(page_addr)) { - de = ext2_next_entry((ext2_dirent *) page_addr); - *pa = page_addr; - } - return de; + if (!IS_ERR(de)) + return ext2_next_entry(de); + return NULL; } int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino) { struct ext2_dir_entry_2 *de; struct page *page; - void *page_addr; - de = ext2_find_entry(dir, child, &page, &page_addr); + de = ext2_find_entry(dir, child, &page); if (IS_ERR(de)) return PTR_ERR(de); @@ -510,10 +498,9 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) * to protect that region. */ for (n = 0; n <= npages; n++) { - char *kaddr; + char *kaddr = ext2_get_page(dir, n, 0, &page); char *dir_end; - kaddr = ext2_get_page(dir, n, 0, &page); if (IS_ERR(kaddr)) return PTR_ERR(kaddr); lock_page(page); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 391edf57b944..5e82ec0847cf 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -730,10 +730,10 @@ extern int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino); extern int ext2_make_empty(struct inode *, struct inode *); extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, - struct page **, void **res_page_addr); + struct page **); extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page); extern int ext2_empty_dir (struct inode *); -extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa); +extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p); int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, struct page *page, struct inode *inode, bool update_times); static inline void ext2_put_page(struct page *page, void *page_addr) diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 3e7f895ac2da..937dd8f60f96 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -269,19 +269,18 @@ out_dir: goto out; } -static int ext2_unlink(struct inode * dir, struct dentry *dentry) +static int ext2_unlink(struct inode *dir, struct dentry *dentry) { - struct inode * inode = d_inode(dentry); - struct ext2_dir_entry_2 * de; - struct page * page; - void *page_addr; + struct inode *inode = d_inode(dentry); + struct ext2_dir_entry_2 *de; + struct page *page; int err; err = dquot_initialize(dir); if (err) goto out; - de = ext2_find_entry(dir, &dentry->d_name, &page, &page_addr); + de = ext2_find_entry(dir, &dentry->d_name, &page); if (IS_ERR(de)) { err = PTR_ERR(de); goto out; @@ -323,10 +322,8 @@ static int ext2_rename (struct mnt_idmap * idmap, struct inode * old_inode = d_inode(old_dentry); struct inode * new_inode = d_inode(new_dentry); struct page * dir_page = NULL; - void *dir_page_addr; struct ext2_dir_entry_2 * dir_de = NULL; struct page * old_page; - void *old_page_addr; struct ext2_dir_entry_2 * old_de; int err; @@ -341,20 +338,18 @@ static int ext2_rename (struct mnt_idmap * idmap, if (err) return err; - old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page, - &old_page_addr); + old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page); if (IS_ERR(old_de)) return PTR_ERR(old_de); if (S_ISDIR(old_inode->i_mode)) { err = -EIO; - dir_de = ext2_dotdot(old_inode, &dir_page, &dir_page_addr); + dir_de = ext2_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { - void *page_addr; struct page *new_page; struct ext2_dir_entry_2 *new_de; @@ -363,7 +358,7 @@ static int ext2_rename (struct mnt_idmap * idmap, goto out_dir; new_de = ext2_find_entry(new_dir, &new_dentry->d_name, - &new_page, &page_addr); + &new_page); if (IS_ERR(new_de)) { err = PTR_ERR(new_de); goto out_dir; From 576215cffdefc1f0ceebffd87abb390926e6b037 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 25 May 2023 16:17:10 +0200 Subject: [PATCH 13/19] fs: Drop wait_unfrozen wait queue wait_unfrozen waitqueue is used only in quota code to wait for filesystem to become unfrozen. In that place we can just use sb_start_write() - sb_end_write() pair to achieve the same. So just remove the waitqueue. Reviewed-by: Christian Brauner Message-Id: <20230525141710.7595-1-jack@suse.cz> Signed-off-by: Jan Kara --- fs/quota/quota.c | 5 +++-- fs/super.c | 4 ---- include/linux/fs.h | 1 - 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 052f143e2e0e..0e41fb84060f 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -895,8 +895,9 @@ retry: up_write(&sb->s_umount); else up_read(&sb->s_umount); - wait_event(sb->s_writers.wait_unfrozen, - sb->s_writers.frozen == SB_UNFROZEN); + /* Wait for sb to unfreeze */ + sb_start_write(sb); + sb_end_write(sb); put_super(sb); goto retry; } diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..6283cea67280 100644 --- a/fs/super.c +++ b/fs/super.c @@ -236,7 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, &type->s_writers_key[i])) goto fail; } - init_waitqueue_head(&s->s_writers.wait_unfrozen); s->s_bdi = &noop_backing_dev_info; s->s_flags = flags; if (s->s_user_ns != &init_user_ns) @@ -1706,7 +1705,6 @@ int freeze_super(struct super_block *sb) if (ret) { sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); - wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; } @@ -1722,7 +1720,6 @@ int freeze_super(struct super_block *sb) "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_FS); - wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; } @@ -1768,7 +1765,6 @@ static int thaw_super_locked(struct super_block *sb) sb->s_writers.frozen = SB_UNFROZEN; sb_freeze_unlock(sb, SB_FREEZE_FS); out: - wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return 0; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 21a981680856..3b65a6194485 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1146,7 +1146,6 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; From 5ce345541ee43333bfbd99a2ea56b1a0a167c457 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 22 May 2023 07:54:34 +0700 Subject: [PATCH 14/19] fs: udf: Replace GPL 2.0 boilerplate license notice with SPDX identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The notice refers to full GPL 2.0 text on now defunct MIT FTP site [1]. Replace it with appropriate SPDX license identifier. Cc: Thomas Gleixner Cc: Pali Rohár Link: https://web.archive.org/web/20020809115410/ftp://prep.ai.mit.edu/pub/gnu/GPL [1] Signed-off-by: Bagas Sanjaya Signed-off-by: Jan Kara Message-Id: <20230522005434.22133-2-bagasdotme@gmail.com> --- fs/udf/balloc.c | 6 +----- fs/udf/dir.c | 6 +----- fs/udf/directory.c | 6 +----- fs/udf/file.c | 6 +----- fs/udf/ialloc.c | 6 +----- fs/udf/inode.c | 6 +----- fs/udf/lowlevel.c | 6 +----- fs/udf/misc.c | 6 +----- fs/udf/namei.c | 6 +----- fs/udf/partition.c | 6 +----- fs/udf/super.c | 6 +----- fs/udf/symlink.c | 6 +----- fs/udf/truncate.c | 6 +----- fs/udf/unicode.c | 6 +----- 14 files changed, 14 insertions(+), 70 deletions(-) diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 14b9db4c80f0..ab3ffc355949 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * balloc.c * @@ -5,11 +6,6 @@ * Block allocation handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1999-2001 Ben Fennema * (C) 1999 Stelias Computing Inc * diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 212393b12c22..f6533f93851b 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * dir.c * @@ -5,11 +6,6 @@ * Directory handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998-2004 Ben Fennema * * HISTORY diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 654536d2b609..1c775e072b2f 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * directory.c * * PURPOSE * Directory related functions * - * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. */ #include "udfdecl.h" diff --git a/fs/udf/file.c b/fs/udf/file.c index 8238f742377b..b871b85457e5 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * file.c * @@ -5,11 +6,6 @@ * File handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998-1999 Dave Boynton * (C) 1998-2004 Ben Fennema * (C) 1999-2000 Stelias Computing Inc diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 8d50121778a5..5f7ac8c84798 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * ialloc.c * @@ -5,11 +6,6 @@ * Inode allocation handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998-2001 Ben Fennema * * HISTORY diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1e71e04ae8f6..28cdfc57d946 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * inode.c * @@ -5,11 +6,6 @@ * Inode handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998 Dave Boynton * (C) 1998-2004 Ben Fennema * (C) 1999-2000 Stelias Computing Inc diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index c87ed942d076..9d847a7a0905 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * lowlevel.c * @@ -5,11 +6,6 @@ * Low Level Device Routines for the UDF filesystem * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1999-2001 Ben Fennema * * HISTORY diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 3777468d06ce..0788593b6a1d 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * misc.c * @@ -5,11 +6,6 @@ * Miscellaneous routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998 Dave Boynton * (C) 1998-2004 Ben Fennema * (C) 1999-2000 Stelias Computing Inc diff --git a/fs/udf/namei.c b/fs/udf/namei.c index fd20423d3ed2..49e1e0fe3fee 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * namei.c * @@ -5,11 +6,6 @@ * Inode name handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998-2004 Ben Fennema * (C) 1999-2000 Stelias Computing Inc * diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 5bcfe78d5cab..af877991edc1 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * partition.c * @@ -5,11 +6,6 @@ * Partition handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998-2001 Ben Fennema * * HISTORY diff --git a/fs/udf/super.c b/fs/udf/super.c index 6304e3c5c3d9..928a04d9d9e0 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * super.c * @@ -15,11 +16,6 @@ * https://www.iso.org/ * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998 Dave Boynton * (C) 1998-2004 Ben Fennema * (C) 2000 Stelias Computing Inc diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index a34c8c4e6d21..779b5c2c75f6 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * symlink.c * @@ -5,11 +6,6 @@ * Symlink handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1998-2001 Ben Fennema * (C) 1999 Stelias Computing Inc * diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 2e7ba234bab8..a686c10fd709 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * truncate.c * @@ -5,11 +6,6 @@ * Truncate handling routines for the OSTA-UDF(tm) filesystem. * * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. - * * (C) 1999-2004 Ben Fennema * (C) 1999 Stelias Computing Inc * diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 622569007b53..ae6e809fa3aa 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * unicode.c * @@ -11,11 +12,6 @@ * UTF-8 is explained in the IETF RFC XXXX. * ftp://ftp.internic.net/rfc/rfcxxxx.txt * - * COPYRIGHT - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from: - * ftp://prep.ai.mit.edu/pub/gnu/GPL - * Each contributing author retains all rights to their own work. */ #include "udfdecl.h" From aac2fa20132e390e87270e3a738e86abcf1aea8b Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 22 May 2023 07:54:35 +0700 Subject: [PATCH 15/19] fs: udf: udftime: Replace LGPL boilerplate with SPDX identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace license boilerplate in udftime.c with SPDX identifier for LGPL-2.0. Cc: Paul Eggert Cc: Richard Fontana Cc: Pali Rohár Signed-off-by: Bagas Sanjaya Reviewed-by: Jilayne Lovejoy Signed-off-by: Jan Kara Message-Id: <20230522005434.22133-3-bagasdotme@gmail.com> --- fs/udf/udftime.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index fce4ad976c8c..758163af39c2 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -1,21 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.0+ /* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Paul Eggert (eggert@twinsun.com). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + Contributed by Paul Eggert (eggert@twinsun.com). */ /* * dgb 10/02/98: ripped this from glibc source to help convert timestamps From 6a4e3363792e30177cc3965697e34ddcea8b900b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 5 Jun 2023 22:07:30 +0800 Subject: [PATCH 16/19] quota: Properly disable quotas when add_dquot_ref() fails When add_dquot_ref() fails (usually due to IO error or ENOMEM), we want to disable quotas we are trying to enable. However dquot_disable() call was passed just the flags we are enabling so in case flags == DQUOT_USAGE_ENABLED dquot_disable() call will just fail with EINVAL instead of properly disabling quotas. Fix the problem by always passing DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED to dquot_disable() in this case. Reported-and-tested-by: Ye Bin Reported-by: syzbot+e633c79ceaecbf479854@syzkaller.appspotmail.com Signed-off-by: Jan Kara Message-Id: <20230605140731.2427629-2-yebin10@huawei.com> --- fs/quota/dquot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ffd40dc3e4e9..6beceed34e08 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2420,7 +2420,8 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, error = add_dquot_ref(sb, type); if (error) - dquot_disable(sb, type, flags); + dquot_disable(sb, type, + DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); return error; out_fmt: From d6a95db3c7ad160bc16b89e36449705309b52bcb Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 5 Jun 2023 22:07:31 +0800 Subject: [PATCH 17/19] quota: fix warning in dqgrab() There's issue as follows when do fault injection: WARNING: CPU: 1 PID: 14870 at include/linux/quotaops.h:51 dquot_disable+0x13b7/0x18c0 Modules linked in: CPU: 1 PID: 14870 Comm: fsconfig Not tainted 6.3.0-next-20230505-00006-g5107a9c821af-dirty #541 RIP: 0010:dquot_disable+0x13b7/0x18c0 RSP: 0018:ffffc9000acc79e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88825e41b980 RDX: 0000000000000000 RSI: ffff88825e41b980 RDI: 0000000000000002 RBP: ffff888179f68000 R08: ffffffff82087ca7 R09: 0000000000000000 R10: 0000000000000001 R11: ffffed102f3ed026 R12: ffff888179f68130 R13: ffff888179f68110 R14: dffffc0000000000 R15: ffff888179f68118 FS: 00007f450a073740(0000) GS:ffff88882fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe96f2efd8 CR3: 000000025c8ad000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dquot_load_quota_sb+0xd53/0x1060 dquot_resume+0x172/0x230 ext4_reconfigure+0x1dc6/0x27b0 reconfigure_super+0x515/0xa90 __x64_sys_fsconfig+0xb19/0xd20 do_syscall_64+0x39/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd Above issue may happens as follows: ProcessA ProcessB ProcessC sys_fsconfig vfs_fsconfig_locked reconfigure_super ext4_remount dquot_suspend -> suspend all type quota sys_fsconfig vfs_fsconfig_locked reconfigure_super ext4_remount dquot_resume ret = dquot_load_quota_sb add_dquot_ref do_open -> open file O_RDWR vfs_open do_dentry_open get_write_access atomic_inc_unless_negative(&inode->i_writecount) ext4_file_open dquot_file_open dquot_initialize __dquot_initialize dqget atomic_inc(&dquot->dq_count); __dquot_initialize __dquot_initialize dqget if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) ext4_acquire_dquot -> Return error DQ_ACTIVE_B flag isn't set dquot_disable invalidate_dquots if (atomic_read(&dquot->dq_count)) dqgrab WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) -> Trigger warning In the above scenario, 'dquot->dq_flags' has no DQ_ACTIVE_B is normal when dqgrab(). To solve above issue just replace the dqgrab() use in invalidate_dquots() with atomic_inc(&dquot->dq_count). Signed-off-by: Ye Bin Signed-off-by: Jan Kara Message-Id: <20230605140731.2427629-3-yebin10@huawei.com> --- fs/quota/dquot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 6beceed34e08..e3e4f4047657 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -555,7 +555,7 @@ restart: continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { - dqgrab(dquot); + atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* * Once dqput() wakes us up, we know it's time to free From 404615d7f1dcd4cca200e9a7a9df3a1dcae1dd62 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Jun 2023 12:25:52 +0200 Subject: [PATCH 18/19] ext2: Drop fragment support Ext2 has fields in superblock reserved for subblock allocation support. However that never landed. Drop the many years dead code. Reported-by: syzbot+af5e10f73dbff48f70af@syzkaller.appspotmail.com Signed-off-by: Jan Kara --- fs/ext2/ext2.h | 12 ------------ fs/ext2/super.c | 23 ++++------------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 5e82ec0847cf..35a041c47c38 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -70,10 +70,7 @@ struct mb_cache; * second extended-fs super-block data in memory */ struct ext2_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ unsigned long s_blocks_per_group;/* Number of blocks in a group */ unsigned long s_inodes_per_group;/* Number of inodes in a group */ unsigned long s_itb_per_group; /* Number of inode table blocks per group */ @@ -188,15 +185,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) #define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size) #define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) -/* - * Macro-instructions used to manage fragments - */ -#define EXT2_MIN_FRAG_SIZE 1024 -#define EXT2_MAX_FRAG_SIZE 4096 -#define EXT2_MIN_FRAG_LOG_SIZE 10 -#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) -#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) - /* * Structure of a blocks group descriptor */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f342f347a695..2959afc7541c 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -668,10 +668,9 @@ static int ext2_setup_super (struct super_block * sb, es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); if (test_opt (sb, DEBUG)) - ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " + ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, " "bpg=%lu, ipg=%lu, mo=%04lx]", EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, - sbi->s_frag_size, sbi->s_groups_count, EXT2_BLOCKS_PER_GROUP(sb), EXT2_INODES_PER_GROUP(sb), @@ -1012,14 +1011,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } } - sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (sbi->s_frag_size == 0) - goto cantfind_ext2; - sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); @@ -1045,11 +1037,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (sb->s_blocksize != sbi->s_frag_size) { + if (es->s_log_frag_size != es->s_log_block_size) { ext2_msg(sb, KERN_ERR, - "error: fragsize %lu != blocksize %lu" - "(not supported yet)", - sbi->s_frag_size, sb->s_blocksize); + "error: fragsize log %u != blocksize log %u", + le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits); goto failed_mount; } @@ -1066,12 +1057,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3); goto failed_mount; } - if (sbi->s_frags_per_group > sb->s_blocksize * 8) { - ext2_msg(sb, KERN_ERR, - "error: #fragments per group too big: %lu", - sbi->s_frags_per_group); - goto failed_mount; - } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, From 028f6055c912588e6f72722d89c30b401bbcf013 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Jun 2023 11:32:35 +0200 Subject: [PATCH 19/19] udf: Fix uninitialized array access for some pathnames For filenames that begin with . and are between 2 and 5 characters long, UDF charset conversion code would read uninitialized memory in the output buffer. The only practical impact is that the name may be prepended a "unification hash" when it is not actually needed but still it is good to fix this. Reported-by: syzbot+cd311b1e43cc25f90d18@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/000000000000e2638a05fe9dc8f9@google.com Signed-off-by: Jan Kara --- fs/udf/unicode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index ae6e809fa3aa..32c7f3d27f74 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -243,7 +243,7 @@ static int udf_name_from_CS0(struct super_block *sb, } if (translate) { - if (str_o_len <= 2 && str_o[0] == '.' && + if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' && (str_o_len == 1 || str_o[1] == '.')) needsCRC = 1; if (needsCRC) {