Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "There are a number of major cleanups in ext4 this cycle:

   - The data=journal writepath has been significantly cleaned up and
     simplified, and reduces a large number of data=journal special
     cases by Jan Kara.

   - Ojaswin Muhoo has replaced linked list used to track extents that
     have been used for inode preallocation with a red-black tree in the
     multi-block allocator. This improves performance for workloads
     which do a large number of random allocating writes.

   - Thanks to Kemeng Shi for a lot of cleanup and bug fixes in the
     multi-block allocator.

   - Matthew wilcox has converted the code paths for reading and writing
     ext4 pages to use folios.

   - Jason Yan has continued to factor out ext4_fill_super() into
     smaller functions for improve ease of maintenance and
     comprehension.

   - Josh Triplett has created an uapi header for ext4 userspace API's"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (105 commits)
  ext4: Add a uapi header for ext4 userspace APIs
  ext4: remove useless conditional branch code
  ext4: remove unneeded check of nr_to_submit
  ext4: move dax and encrypt checking into ext4_check_feature_compatibility()
  ext4: factor out ext4_block_group_meta_init()
  ext4: move s_reserved_gdt_blocks and addressable checking into ext4_check_geometry()
  ext4: rename two functions with 'check'
  ext4: factor out ext4_flex_groups_free()
  ext4: use ext4_group_desc_free() in ext4_put_super() to save some duplicated code
  ext4: factor out ext4_percpu_param_init() and ext4_percpu_param_destroy()
  ext4: factor out ext4_hash_info_init()
  Revert "ext4: Fix warnings when freezing filesystem with journaled data"
  ext4: Update comment in mpage_prepare_extent_to_map()
  ext4: Simplify handling of journalled data in ext4_bmap()
  ext4: Drop special handling of journalled data from ext4_quota_on()
  ext4: Drop special handling of journalled data from ext4_evict_inode()
  ext4: Fix special handling of journalled data from extent zeroing
  ext4: Drop special handling of journalled data from extent shifting operations
  ext4: Drop special handling of journalled data from ext4_sync_file()
  ext4: Commit transaction before writing back pages in data=journal mode
  ...
This commit is contained in:
Linus Torvalds
2023-04-26 08:57:41 -07:00
30 changed files with 1414 additions and 1442 deletions

View File

@@ -489,9 +489,6 @@ Files in /sys/fs/ext4/<devname>:
multiple of this tuning parameter if the stripe size is not set in the multiple of this tuning parameter if the stripe size is not set in the
ext4 superblock ext4 superblock
mb_max_inode_prealloc
The maximum length of per-inode ext4_prealloc_space list.
mb_max_to_scan mb_max_to_scan
The maximum number of extents the multiblock allocator will search to The maximum number of extents the multiblock allocator will search to
find the best extent. find the best extent.

View File

@@ -7745,6 +7745,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
F: Documentation/filesystems/ext4/ F: Documentation/filesystems/ext4/
F: fs/ext4/ F: fs/ext4/
F: include/trace/events/ext4.h F: include/trace/events/ext4.h
F: include/uapi/linux/ext4.h
Extended Verification Module (EVM) Extended Verification Module (EVM)
M: Mimi Zohar <zohar@linux.ibm.com> M: Mimi Zohar <zohar@linux.ibm.com>

View File

@@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
return false; return false;
return bio_add_page(bio, &folio->page, len, off) > 0; return bio_add_page(bio, &folio->page, len, off) > 0;
} }
EXPORT_SYMBOL(bio_add_folio);
void __bio_release_pages(struct bio *bio, bool mark_dirty) void __bio_release_pages(struct bio *bio, bool mark_dirty)
{ {

View File

@@ -80,32 +80,56 @@ static inline int ext4_block_in_group(struct super_block *sb,
return (actual_group == block_group) ? 1 : 0; return (actual_group == block_group) ? 1 : 0;
} }
/* Return the number of clusters used for file system metadata; this /*
* Return the number of clusters used for file system metadata; this
* represents the overhead needed by the file system. * represents the overhead needed by the file system.
*/ */
static unsigned ext4_num_overhead_clusters(struct super_block *sb, static unsigned ext4_num_overhead_clusters(struct super_block *sb,
ext4_group_t block_group, ext4_group_t block_group,
struct ext4_group_desc *gdp) struct ext4_group_desc *gdp)
{ {
unsigned num_clusters; unsigned base_clusters, num_clusters;
int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; int block_cluster = -1, inode_cluster;
int itbl_cluster_start = -1, itbl_cluster_end = -1;
ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group); ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
ext4_fsblk_t itbl_blk; ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1;
ext4_fsblk_t itbl_blk_start, itbl_blk_end;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
/* This is the number of clusters used by the superblock, /* This is the number of clusters used by the superblock,
* block group descriptors, and reserved block group * block group descriptors, and reserved block group
* descriptor blocks */ * descriptor blocks */
num_clusters = ext4_num_base_meta_clusters(sb, block_group); base_clusters = ext4_num_base_meta_clusters(sb, block_group);
num_clusters = base_clusters;
/* /*
* For the allocation bitmaps and inode table, we first need * Account and record inode table clusters if any cluster
* to check to see if the block is in the block group. If it * is in the block group, or inode table cluster range is
* is, then check to see if the cluster is already accounted * [-1, -1] and won't overlap with block/inode bitmap cluster
* for in the clusters used for the base metadata cluster, or * accounted below.
* if we can increment the base metadata cluster to include */
* that block. Otherwise, we will have to track the cluster itbl_blk_start = ext4_inode_table(sb, gdp);
* used for the allocation bitmap or inode table explicitly. itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
if (itbl_blk_start <= end && itbl_blk_end >= start) {
itbl_blk_start = itbl_blk_start >= start ?
itbl_blk_start : start;
itbl_blk_end = itbl_blk_end <= end ?
itbl_blk_end : end;
itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);
num_clusters += itbl_cluster_end - itbl_cluster_start + 1;
/* check if border cluster is overlapped */
if (itbl_cluster_start == base_clusters - 1)
num_clusters--;
}
/*
* For the allocation bitmaps, we first need to check to see
* if the block is in the block group. If it is, then check
* to see if the cluster is already accounted for in the clusters
* used for the base metadata cluster and inode tables cluster.
* Normally all of these blocks are contiguous, so the special * Normally all of these blocks are contiguous, so the special
* case handling shouldn't be necessary except for *very* * case handling shouldn't be necessary except for *very*
* unusual file system layouts. * unusual file system layouts.
@@ -113,46 +137,26 @@ static unsigned ext4_num_overhead_clusters(struct super_block *sb,
if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
block_cluster = EXT4_B2C(sbi, block_cluster = EXT4_B2C(sbi,
ext4_block_bitmap(sb, gdp) - start); ext4_block_bitmap(sb, gdp) - start);
if (block_cluster < num_clusters) if (block_cluster >= base_clusters &&
block_cluster = -1; (block_cluster < itbl_cluster_start ||
else if (block_cluster == num_clusters) { block_cluster > itbl_cluster_end))
num_clusters++; num_clusters++;
block_cluster = -1;
}
} }
if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
inode_cluster = EXT4_B2C(sbi, inode_cluster = EXT4_B2C(sbi,
ext4_inode_bitmap(sb, gdp) - start); ext4_inode_bitmap(sb, gdp) - start);
if (inode_cluster < num_clusters) /*
inode_cluster = -1; * Additional check if inode bitmap is in just accounted
else if (inode_cluster == num_clusters) { * block_cluster
*/
if (inode_cluster != block_cluster &&
inode_cluster >= base_clusters &&
(inode_cluster < itbl_cluster_start ||
inode_cluster > itbl_cluster_end))
num_clusters++; num_clusters++;
inode_cluster = -1;
}
} }
itbl_blk = ext4_inode_table(sb, gdp);
for (i = 0; i < sbi->s_itb_per_group; i++) {
if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
c = EXT4_B2C(sbi, itbl_blk + i - start);
if ((c < num_clusters) || (c == inode_cluster) ||
(c == block_cluster) || (c == itbl_cluster))
continue;
if (c == num_clusters) {
num_clusters++;
continue;
}
num_clusters++;
itbl_cluster = c;
}
}
if (block_cluster != -1)
num_clusters++;
if (inode_cluster != -1)
num_clusters++;
return num_clusters; return num_clusters;
} }
@@ -187,8 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
ASSERT(buffer_locked(bh)); ASSERT(buffer_locked(bh));
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_mark_group_bitmap_corrupted(sb, block_group, ext4_mark_group_bitmap_corrupted(sb, block_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT | EXT4_GROUP_INFO_BBITMAP_CORRUPT |
@@ -350,13 +352,13 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
blk = ext4_inode_table(sb, desc); blk = ext4_inode_table(sb, desc);
offset = blk - group_first_block; offset = blk - group_first_block;
if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit) EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit)
return blk; return blk;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data, next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
EXT4_B2C(sbi, offset + sbi->s_itb_per_group), EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1,
EXT4_B2C(sbi, offset)); EXT4_B2C(sbi, offset));
if (next_zero_bit < if (next_zero_bit <
EXT4_B2C(sbi, offset + sbi->s_itb_per_group)) EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1)
/* bad bitmap for inode tables */ /* bad bitmap for inode tables */
return blk; return blk;
return 0; return 0;
@@ -383,8 +385,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
ext4_lock_group(sb, block_group); ext4_lock_group(sb, block_group);
if (buffer_verified(bh)) if (buffer_verified(bh))
goto verified; goto verified;
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) ||
desc, bh) ||
ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) { ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
@@ -474,17 +475,19 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
goto out; goto out;
} }
err = ext4_init_block_bitmap(sb, bh, block_group, desc); err = ext4_init_block_bitmap(sb, bh, block_group, desc);
if (err) {
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
ext4_error(sb, "Failed to init block bitmap for group "
"%u: %d", block_group, err);
goto out;
}
set_bitmap_uptodate(bh); set_bitmap_uptodate(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
set_buffer_verified(bh); set_buffer_verified(bh);
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
unlock_buffer(bh); unlock_buffer(bh);
if (err) { return bh;
ext4_error(sb, "Failed to init block bitmap for group "
"%u: %d", block_group, err);
goto out;
}
goto verify;
} }
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
if (buffer_uptodate(bh)) { if (buffer_uptodate(bh)) {
@@ -842,10 +845,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
if (!ext4_bg_has_super(sb, group)) if (!ext4_bg_has_super(sb, group))
return 0; return 0;
if (ext4_has_feature_meta_bg(sb)) return EXT4_SB(sb)->s_gdb_count;
return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
else
return EXT4_SB(sb)->s_gdb_count;
} }
/** /**
@@ -887,11 +887,11 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
sbi->s_desc_per_block) { sbi->s_desc_per_block) {
if (num) { if (num) {
num += ext4_bg_num_gdb(sb, block_group); num += ext4_bg_num_gdb_nometa(sb, block_group);
num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
} }
} else { /* For META_BG_BLOCK_GROUPS */ } else { /* For META_BG_BLOCK_GROUPS */
num += ext4_bg_num_gdb(sb, block_group); num += ext4_bg_num_gdb_meta(sb, block_group);
} }
return EXT4_NUM_B2C(sbi, num); return EXT4_NUM_B2C(sbi, num);
} }

View File

@@ -16,7 +16,7 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
return numchars * BITS_PER_BYTE - memweight(bitmap, numchars); return numchars * BITS_PER_BYTE - memweight(bitmap, numchars);
} }
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz) struct buffer_head *bh, int sz)
{ {
@@ -38,7 +38,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
return provided == calculated; return provided == calculated;
} }
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz) struct buffer_head *bh, int sz)
{ {
@@ -54,7 +54,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16); gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16);
} }
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, int ext4_block_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh) struct buffer_head *bh)
{ {
@@ -74,13 +74,10 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
} else } else
calculated &= 0xFFFF; calculated &= 0xFFFF;
if (provided == calculated) return provided == calculated;
return 1;
return 0;
} }
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, void ext4_block_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh) struct buffer_head *bh)
{ {

View File

@@ -40,6 +40,7 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/compat.h> #include <linux/compat.h>
#endif #endif
#include <uapi/linux/ext4.h>
#include <linux/fscrypt.h> #include <linux/fscrypt.h>
#include <linux/fsverity.h> #include <linux/fsverity.h>
@@ -591,17 +592,6 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(RESERVED); CHECK_FLAG_VALUE(RESERVED);
} }
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
__u32 group; /* Group number for this data */
__u64 block_bitmap; /* Absolute block number of block bitmap */
__u64 inode_bitmap; /* Absolute block number of inode bitmap */
__u64 inode_table; /* Absolute block number of inode table start */
__u32 blocks_count; /* Total number of blocks in this group */
__u16 reserved_blocks; /* Number of reserved blocks in this group */
__u16 unused;
};
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
struct compat_ext4_new_group_input { struct compat_ext4_new_group_input {
u32 group; u32 group;
@@ -698,70 +688,6 @@ enum {
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
#define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040 #define EXT4_FREE_BLOCKS_RERESERVE_CLUSTER 0x0040
/*
* ioctl commands
*/
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
#define EXT4_IOC_MIGRATE _IO('f', 9)
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
/* ioctl codes 19--39 are reserved for fscrypt */
#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40)
#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32)
#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap)
#define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32)
#define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid)
#define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid)
#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
/*
* Flags for going down operation
*/
#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */
#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
/*
* Flags returned by EXT4_IOC_GETSTATE
*
* We only expose to userspace a subset of the state flags in
* i_state_flags
*/
#define EXT4_STATE_FLAG_EXT_PRECACHED 0x00000001
#define EXT4_STATE_FLAG_NEW 0x00000002
#define EXT4_STATE_FLAG_NEWENTRY 0x00000004
#define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008
/* flags for ioctl EXT4_IOC_CHECKPOINT */
#define EXT4_IOC_CHECKPOINT_FLAG_DISCARD 0x1
#define EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT 0x2
#define EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN 0x4
#define EXT4_IOC_CHECKPOINT_FLAG_VALID (EXT4_IOC_CHECKPOINT_FLAG_DISCARD | \
EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \
EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
/*
* Structure for EXT4_IOC_GETFSUUID/EXT4_IOC_SETFSUUID
*/
struct fsuuid {
__u32 fsu_len;
__u32 fsu_flags;
__u8 fsu_uuid[];
};
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* /*
* ioctl commands in 32 bit emulation * ioctl commands in 32 bit emulation
@@ -776,12 +702,6 @@ struct fsuuid {
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif #endif
/*
* Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
* It indicates that the entry in extent status cache is for a hole.
*/
#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000
/* Max physical block we can address w/o extents */ /* Max physical block we can address w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF #define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
@@ -852,15 +772,6 @@ struct ext4_inode {
__le32 i_projid; /* Project ID */ __le32 i_projid; /* Project ID */
}; };
struct move_extent {
__u32 reserved; /* should be zero */
__u32 donor_fd; /* donor file descriptor */
__u64 orig_start; /* logical start offset in block for orig */
__u64 donor_start; /* logical start offset in block for donor */
__u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */
};
#define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
@@ -1120,8 +1031,8 @@ struct ext4_inode_info {
/* mballoc */ /* mballoc */
atomic_t i_prealloc_active; atomic_t i_prealloc_active;
struct list_head i_prealloc_list; struct rb_root i_prealloc_node;
spinlock_t i_prealloc_lock; rwlock_t i_prealloc_lock;
/* extents status tree */ /* extents status tree */
struct ext4_es_tree i_es_tree; struct ext4_es_tree i_es_tree;
@@ -1613,7 +1524,6 @@ struct ext4_sb_info {
unsigned int s_mb_stats; unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs; unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc; unsigned int s_mb_group_prealloc;
unsigned int s_mb_max_inode_prealloc;
unsigned int s_max_dir_size_kb; unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */ /* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group; unsigned long s_mb_last_group;
@@ -1887,7 +1797,6 @@ static inline void ext4_simulate_fail_bh(struct super_block *sb,
* Inode dynamic state flags * Inode dynamic state flags
*/ */
enum { enum {
EXT4_STATE_JDATA, /* journaled data exists */
EXT4_STATE_NEW, /* inode is newly created */ EXT4_STATE_NEW, /* inode is newly created */
EXT4_STATE_XATTR, /* has in-inode xattrs */ EXT4_STATE_XATTR, /* has in-inode xattrs */
EXT4_STATE_NO_EXPAND, /* No space for expansion */ EXT4_STATE_NO_EXPAND, /* No space for expansion */
@@ -2676,16 +2585,16 @@ struct mmpd_data {
/* bitmap.c */ /* bitmap.c */
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz); struct buffer_head *bh, int sz);
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz); struct buffer_head *bh, int sz);
void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, void ext4_block_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh); struct buffer_head *bh);
int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, int ext4_block_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct buffer_head *bh); struct buffer_head *bh);
@@ -3550,7 +3459,7 @@ extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
unsigned int len); unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
extern int ext4_readpage_inline(struct inode *inode, struct page *page); int ext4_readpage_inline(struct inode *inode, struct folio *folio);
extern int ext4_try_to_write_inline_data(struct address_space *mapping, extern int ext4_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode, struct inode *inode,
loff_t pos, unsigned len, loff_t pos, unsigned len,
@@ -3647,7 +3556,7 @@ static inline void ext4_set_de_type(struct super_block *sb,
/* readpages.c */ /* readpages.c */
extern int ext4_mpage_readpages(struct inode *inode, extern int ext4_mpage_readpages(struct inode *inode,
struct readahead_control *rac, struct page *page); struct readahead_control *rac, struct folio *folio);
extern int __init ext4_init_post_read_processing(void); extern int __init ext4_init_post_read_processing(void);
extern void ext4_exit_post_read_processing(void); extern void ext4_exit_post_read_processing(void);
@@ -3757,9 +3666,8 @@ extern void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc); struct writeback_control *wbc);
extern void ext4_end_io_rsv_work(struct work_struct *work); extern void ext4_end_io_rsv_work(struct work_struct *work);
extern void ext4_io_submit(struct ext4_io_submit *io); extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io, int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *page,
struct page *page, size_t len);
int len);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);

View File

@@ -4526,13 +4526,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
trace_ext4_zero_range(inode, offset, len, mode); trace_ext4_zero_range(inode, offset, len, mode);
/* Call ext4_force_commit to flush all data in case of data=journal. */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
if (ret)
return ret;
}
/* /*
* Round up offset. This is not fallocate, we need to zero out * Round up offset. This is not fallocate, we need to zero out
* blocks, so convert interior block aligned part of the range to * blocks, so convert interior block aligned part of the range to
@@ -4616,6 +4609,20 @@ static long ext4_zero_range(struct file *file, loff_t offset,
filemap_invalidate_unlock(mapping); filemap_invalidate_unlock(mapping);
goto out_mutex; goto out_mutex;
} }
/*
* For journalled data we need to write (and checkpoint) pages
* before discarding page cache to avoid inconsitent data on
* disk in case of crash before zeroing trans is committed.
*/
if (ext4_should_journal_data(inode)) {
ret = filemap_write_and_wait_range(mapping, start, end);
if (ret) {
filemap_invalidate_unlock(mapping);
goto out_mutex;
}
}
/* Now release the pages and zero block aligned part of pages */ /* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1); truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -5290,13 +5297,6 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
/* Call ext4_force_commit to flush all data in case of data=journal. */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
if (ret)
return ret;
}
inode_lock(inode); inode_lock(inode);
/* /*
* There is no need to overlap collapse range with EOF, in which case * There is no need to overlap collapse range with EOF, in which case
@@ -5443,13 +5443,6 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb); len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
/* Call ext4_force_commit to flush all data in case of data=journal */
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
if (ret)
return ret;
}
inode_lock(inode); inode_lock(inode);
/* Currently just for extent based files */ /* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {

View File

@@ -153,23 +153,12 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out; goto out;
/* /*
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data. * The caller's filemap_fdatawrite()/wait will sync the data.
* Metadata is in the journal, we wait for proper transaction to * Metadata is in the journal, we wait for proper transaction to
* commit here. * commit here.
*
* data=journal:
* filemap_fdatawrite won't do anything (the buffers are clean).
* ext4_force_commit will write the file data into the journal and
* will wait on that.
* filemap_fdatawait() will encounter a ton of newly-dirtied pages
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/ */
if (!sbi->s_journal) if (!sbi->s_journal)
ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier); ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
else if (ext4_should_journal_data(inode))
ret = ext4_force_commit(inode->i_sb);
else else
ret = ext4_fsync_journal(inode, datasync, &needs_barrier); ret = ext4_fsync_journal(inode, datasync, &needs_barrier);

View File

@@ -98,7 +98,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (buffer_verified(bh)) if (buffer_verified(bh))
goto verified; goto verified;
blk = ext4_inode_bitmap(sb, desc); blk = ext4_inode_bitmap(sb, desc);
if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, if (!ext4_inode_bitmap_csum_verify(sb, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8) || EXT4_INODES_PER_GROUP(sb) / 8) ||
ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) { ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) {
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
@@ -327,7 +327,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
if (percpu_counter_initialized(&sbi->s_dirs_counter)) if (percpu_counter_initialized(&sbi->s_dirs_counter))
percpu_counter_dec(&sbi->s_dirs_counter); percpu_counter_dec(&sbi->s_dirs_counter);
} }
ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8); EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, block_group, gdp); ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
@@ -813,8 +813,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp, ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp)); ext4_free_clusters_after_init(sb, group, gdp));
ext4_block_bitmap_csum_set(sb, group, gdp, ext4_block_bitmap_csum_set(sb, gdp, block_bitmap_bh);
block_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp); ext4_group_desc_csum_set(sb, group, gdp);
} }
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
@@ -852,7 +851,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
if (ext4_has_group_desc_csum(sb)) { if (ext4_has_group_desc_csum(sb)) {
ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8); EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, group, gdp); ext4_group_desc_csum_set(sb, group, gdp);
} }
@@ -1165,8 +1164,7 @@ got:
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_group_clusters_set(sb, gdp, ext4_free_group_clusters_set(sb, gdp,
ext4_free_clusters_after_init(sb, group, gdp)); ext4_free_clusters_after_init(sb, group, gdp));
ext4_block_bitmap_csum_set(sb, group, gdp, ext4_block_bitmap_csum_set(sb, gdp, block_bitmap_bh);
block_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp); ext4_group_desc_csum_set(sb, group, gdp);
} }
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
@@ -1222,7 +1220,7 @@ got:
} }
} }
if (ext4_has_group_desc_csum(sb)) { if (ext4_has_group_desc_csum(sb)) {
ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8); EXT4_INODES_PER_GROUP(sb) / 8);
ext4_group_desc_csum_set(sb, group, gdp); ext4_group_desc_csum_set(sb, group, gdp);
} }

View File

@@ -467,16 +467,16 @@ out:
return error; return error;
} }
static int ext4_read_inline_page(struct inode *inode, struct page *page) static int ext4_read_inline_folio(struct inode *inode, struct folio *folio)
{ {
void *kaddr; void *kaddr;
int ret = 0; int ret = 0;
size_t len; size_t len;
struct ext4_iloc iloc; struct ext4_iloc iloc;
BUG_ON(!PageLocked(page)); BUG_ON(!folio_test_locked(folio));
BUG_ON(!ext4_has_inline_data(inode)); BUG_ON(!ext4_has_inline_data(inode));
BUG_ON(page->index); BUG_ON(folio->index);
if (!EXT4_I(inode)->i_inline_off) { if (!EXT4_I(inode)->i_inline_off) {
ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
@@ -489,19 +489,20 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page)
goto out; goto out;
len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
kaddr = kmap_atomic(page); BUG_ON(len > PAGE_SIZE);
kaddr = kmap_local_folio(folio, 0);
ret = ext4_read_inline_data(inode, kaddr, len, &iloc); ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
flush_dcache_page(page); flush_dcache_folio(folio);
kunmap_atomic(kaddr); kunmap_local(kaddr);
zero_user_segment(page, len, PAGE_SIZE); folio_zero_segment(folio, len, folio_size(folio));
SetPageUptodate(page); folio_mark_uptodate(folio);
brelse(iloc.bh); brelse(iloc.bh);
out: out:
return ret; return ret;
} }
int ext4_readpage_inline(struct inode *inode, struct page *page) int ext4_readpage_inline(struct inode *inode, struct folio *folio)
{ {
int ret = 0; int ret = 0;
@@ -515,16 +516,16 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
* Current inline data can only exist in the 1st page, * Current inline data can only exist in the 1st page,
* So for all the other pages, just set them uptodate. * So for all the other pages, just set them uptodate.
*/ */
if (!page->index) if (!folio->index)
ret = ext4_read_inline_page(inode, page); ret = ext4_read_inline_folio(inode, folio);
else if (!PageUptodate(page)) { else if (!folio_test_uptodate(folio)) {
zero_user_segment(page, 0, PAGE_SIZE); folio_zero_segment(folio, 0, folio_size(folio));
SetPageUptodate(page); folio_mark_uptodate(folio);
} }
up_read(&EXT4_I(inode)->xattr_sem); up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page); folio_unlock(folio);
return ret >= 0 ? 0 : ret; return ret >= 0 ? 0 : ret;
} }
@@ -534,8 +535,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
int ret, needed_blocks, no_expand; int ret, needed_blocks, no_expand;
handle_t *handle = NULL; handle_t *handle = NULL;
int retries = 0, sem_held = 0; int retries = 0, sem_held = 0;
struct page *page = NULL; struct folio *folio = NULL;
unsigned int flags;
unsigned from, to; unsigned from, to;
struct ext4_iloc iloc; struct ext4_iloc iloc;
@@ -564,10 +564,9 @@ retry:
/* We cannot recurse into the filesystem as the transaction is already /* We cannot recurse into the filesystem as the transaction is already
* started */ * started */
flags = memalloc_nofs_save(); folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
page = grab_cache_page_write_begin(mapping, 0); mapping_gfp_mask(mapping));
memalloc_nofs_restore(flags); if (!folio) {
if (!page) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
@@ -582,8 +581,8 @@ retry:
from = 0; from = 0;
to = ext4_get_inline_size(inode); to = ext4_get_inline_size(inode);
if (!PageUptodate(page)) { if (!folio_test_uptodate(folio)) {
ret = ext4_read_inline_page(inode, page); ret = ext4_read_inline_folio(inode, folio);
if (ret < 0) if (ret < 0)
goto out; goto out;
} }
@@ -593,21 +592,21 @@ retry:
goto out; goto out;
if (ext4_should_dioread_nolock(inode)) { if (ext4_should_dioread_nolock(inode)) {
ret = __block_write_begin(page, from, to, ret = __block_write_begin(&folio->page, from, to,
ext4_get_block_unwritten); ext4_get_block_unwritten);
} else } else
ret = __block_write_begin(page, from, to, ext4_get_block); ret = __block_write_begin(&folio->page, from, to, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) { if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode, page_buffers(page), ret = ext4_walk_page_buffers(handle, inode,
from, to, NULL, folio_buffers(folio), from, to,
do_journal_get_write_access); NULL, do_journal_get_write_access);
} }
if (ret) { if (ret) {
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
page = NULL; folio = NULL;
ext4_orphan_add(handle, inode); ext4_orphan_add(handle, inode);
ext4_write_unlock_xattr(inode, &no_expand); ext4_write_unlock_xattr(inode, &no_expand);
sem_held = 0; sem_held = 0;
@@ -627,12 +626,12 @@ retry:
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry; goto retry;
if (page) if (folio)
block_commit_write(page, from, to); block_commit_write(&folio->page, from, to);
out: out:
if (page) { if (folio) {
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
} }
if (sem_held) if (sem_held)
ext4_write_unlock_xattr(inode, &no_expand); ext4_write_unlock_xattr(inode, &no_expand);
@@ -655,8 +654,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
{ {
int ret; int ret;
handle_t *handle; handle_t *handle;
unsigned int flags; struct folio *folio;
struct page *page;
struct ext4_iloc iloc; struct ext4_iloc iloc;
if (pos + len > ext4_get_max_inline_size(inode)) if (pos + len > ext4_get_max_inline_size(inode))
@@ -693,28 +691,27 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (ret) if (ret)
goto out; goto out;
flags = memalloc_nofs_save(); folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
page = grab_cache_page_write_begin(mapping, 0); mapping_gfp_mask(mapping));
memalloc_nofs_restore(flags); if (!folio) {
if (!page) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
*pagep = page; *pagep = &folio->page;
down_read(&EXT4_I(inode)->xattr_sem); down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode)) { if (!ext4_has_inline_data(inode)) {
ret = 0; ret = 0;
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
goto out_up_read; goto out_up_read;
} }
if (!PageUptodate(page)) { if (!folio_test_uptodate(folio)) {
ret = ext4_read_inline_page(inode, page); ret = ext4_read_inline_folio(inode, folio);
if (ret < 0) { if (ret < 0) {
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
goto out_up_read; goto out_up_read;
} }
} }
@@ -735,20 +732,21 @@ convert:
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page) unsigned copied, struct page *page)
{ {
struct folio *folio = page_folio(page);
handle_t *handle = ext4_journal_current_handle(); handle_t *handle = ext4_journal_current_handle();
int no_expand; int no_expand;
void *kaddr; void *kaddr;
struct ext4_iloc iloc; struct ext4_iloc iloc;
int ret = 0, ret2; int ret = 0, ret2;
if (unlikely(copied < len) && !PageUptodate(page)) if (unlikely(copied < len) && !folio_test_uptodate(folio))
copied = 0; copied = 0;
if (likely(copied)) { if (likely(copied)) {
ret = ext4_get_inode_loc(inode, &iloc); ret = ext4_get_inode_loc(inode, &iloc);
if (ret) { if (ret) {
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
ext4_std_error(inode->i_sb, ret); ext4_std_error(inode->i_sb, ret);
goto out; goto out;
} }
@@ -762,30 +760,30 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
*/ */
(void) ext4_find_inline_data_nolock(inode); (void) ext4_find_inline_data_nolock(inode);
kaddr = kmap_atomic(page); kaddr = kmap_local_folio(folio, 0);
ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); ext4_write_inline_data(inode, &iloc, kaddr, pos, copied);
kunmap_atomic(kaddr); kunmap_local(kaddr);
SetPageUptodate(page); folio_mark_uptodate(folio);
/* clear page dirty so that writepages wouldn't work for us. */ /* clear dirty flag so that writepages wouldn't work for us. */
ClearPageDirty(page); folio_clear_dirty(folio);
ext4_write_unlock_xattr(inode, &no_expand); ext4_write_unlock_xattr(inode, &no_expand);
brelse(iloc.bh); brelse(iloc.bh);
/* /*
* It's important to update i_size while still holding page * It's important to update i_size while still holding folio
* lock: page writeout could otherwise come in and zero * lock: page writeout could otherwise come in and zero
* beyond i_size. * beyond i_size.
*/ */
ext4_update_inode_size(inode, pos + copied); ext4_update_inode_size(inode, pos + copied);
} }
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
/* /*
* Don't mark the inode dirty under page lock. First, it unnecessarily * Don't mark the inode dirty under folio lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock * makes the holding time of folio lock longer. Second, it forces lock
* ordering of page lock and transaction start for journaling * ordering of folio lock and transaction start for journaling
* filesystems. * filesystems.
*/ */
if (likely(copied)) if (likely(copied))
@@ -852,10 +850,11 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
void **fsdata) void **fsdata)
{ {
int ret = 0, inline_size; int ret = 0, inline_size;
struct page *page; struct folio *folio;
page = grab_cache_page_write_begin(mapping, 0); folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN,
if (!page) mapping_gfp_mask(mapping));
if (!folio)
return -ENOMEM; return -ENOMEM;
down_read(&EXT4_I(inode)->xattr_sem); down_read(&EXT4_I(inode)->xattr_sem);
@@ -866,32 +865,32 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
inline_size = ext4_get_inline_size(inode); inline_size = ext4_get_inline_size(inode);
if (!PageUptodate(page)) { if (!folio_test_uptodate(folio)) {
ret = ext4_read_inline_page(inode, page); ret = ext4_read_inline_folio(inode, folio);
if (ret < 0) if (ret < 0)
goto out; goto out;
} }
ret = __block_write_begin(page, 0, inline_size, ret = __block_write_begin(&folio->page, 0, inline_size,
ext4_da_get_block_prep); ext4_da_get_block_prep);
if (ret) { if (ret) {
up_read(&EXT4_I(inode)->xattr_sem); up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
ext4_truncate_failed_write(inode); ext4_truncate_failed_write(inode);
return ret; return ret;
} }
SetPageDirty(page); folio_mark_dirty(folio);
SetPageUptodate(page); folio_mark_uptodate(folio);
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
*fsdata = (void *)CONVERT_INLINE_DATA; *fsdata = (void *)CONVERT_INLINE_DATA;
out: out:
up_read(&EXT4_I(inode)->xattr_sem); up_read(&EXT4_I(inode)->xattr_sem);
if (page) { if (folio) {
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
} }
return ret; return ret;
} }
@@ -912,10 +911,9 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
{ {
int ret; int ret;
handle_t *handle; handle_t *handle;
struct page *page; struct folio *folio;
struct ext4_iloc iloc; struct ext4_iloc iloc;
int retries = 0; int retries = 0;
unsigned int flags;
ret = ext4_get_inode_loc(inode, &iloc); ret = ext4_get_inode_loc(inode, &iloc);
if (ret) if (ret)
@@ -947,10 +945,9 @@ retry_journal:
* We cannot recurse into the filesystem as the transaction * We cannot recurse into the filesystem as the transaction
* is already started. * is already started.
*/ */
flags = memalloc_nofs_save(); folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
page = grab_cache_page_write_begin(mapping, 0); mapping_gfp_mask(mapping));
memalloc_nofs_restore(flags); if (!folio) {
if (!page) {
ret = -ENOMEM; ret = -ENOMEM;
goto out_journal; goto out_journal;
} }
@@ -961,8 +958,8 @@ retry_journal:
goto out_release_page; goto out_release_page;
} }
if (!PageUptodate(page)) { if (!folio_test_uptodate(folio)) {
ret = ext4_read_inline_page(inode, page); ret = ext4_read_inline_folio(inode, folio);
if (ret < 0) if (ret < 0)
goto out_release_page; goto out_release_page;
} }
@@ -972,13 +969,13 @@ retry_journal:
goto out_release_page; goto out_release_page;
up_read(&EXT4_I(inode)->xattr_sem); up_read(&EXT4_I(inode)->xattr_sem);
*pagep = page; *pagep = &folio->page;
brelse(iloc.bh); brelse(iloc.bh);
return 1; return 1;
out_release_page: out_release_page:
up_read(&EXT4_I(inode)->xattr_sem); up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page); folio_unlock(folio);
put_page(page); folio_put(folio);
out_journal: out_journal:
ext4_journal_stop(handle); ext4_journal_stop(handle);
out: out:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -73,11 +73,6 @@
*/ */
#define MB_DEFAULT_GROUP_PREALLOC 512 #define MB_DEFAULT_GROUP_PREALLOC 512
/*
* maximum length of inode prealloc list
*/
#define MB_DEFAULT_MAX_INODE_PREALLOC 512
/* /*
* Number of groups to search linearly before performing group scanning * Number of groups to search linearly before performing group scanning
* optimization. * optimization.
@@ -114,7 +109,10 @@ struct ext4_free_data {
}; };
struct ext4_prealloc_space { struct ext4_prealloc_space {
struct list_head pa_inode_list; union {
struct rb_node inode_node; /* for inode PA rbtree */
struct list_head lg_list; /* for lg PAs */
} pa_node;
struct list_head pa_group_list; struct list_head pa_group_list;
union { union {
struct list_head pa_tmp_list; struct list_head pa_tmp_list;
@@ -128,8 +126,11 @@ struct ext4_prealloc_space {
ext4_grpblk_t pa_len; /* len of preallocated chunk */ ext4_grpblk_t pa_len; /* len of preallocated chunk */
ext4_grpblk_t pa_free; /* how many blocks are free */ ext4_grpblk_t pa_free; /* how many blocks are free */
unsigned short pa_type; /* pa type. inode or group */ unsigned short pa_type; /* pa type. inode or group */
spinlock_t *pa_obj_lock; union {
struct inode *pa_inode; /* hack, for history only */ rwlock_t *inode_lock; /* locks the rbtree holding this PA */
spinlock_t *lg_lock; /* locks the lg list holding this PA */
} pa_node_lock;
struct inode *pa_inode; /* used to get the inode during group discard */
}; };
enum { enum {

View File

@@ -126,7 +126,6 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
{ {
struct address_space *mapping[2]; struct address_space *mapping[2];
unsigned int flags; unsigned int flags;
unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
BUG_ON(!inode1 || !inode2); BUG_ON(!inode1 || !inode2);
if (inode1 < inode2) { if (inode1 < inode2) {
@@ -139,14 +138,14 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
} }
flags = memalloc_nofs_save(); flags = memalloc_nofs_save();
folio[0] = __filemap_get_folio(mapping[0], index1, fgp_flags, folio[0] = __filemap_get_folio(mapping[0], index1, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping[0])); mapping_gfp_mask(mapping[0]));
if (!folio[0]) { if (!folio[0]) {
memalloc_nofs_restore(flags); memalloc_nofs_restore(flags);
return -ENOMEM; return -ENOMEM;
} }
folio[1] = __filemap_get_folio(mapping[1], index2, fgp_flags, folio[1] = __filemap_get_folio(mapping[1], index2, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping[1])); mapping_gfp_mask(mapping[1]));
memalloc_nofs_restore(flags); memalloc_nofs_restore(flags);
if (!folio[1]) { if (!folio[1]) {
@@ -169,25 +168,27 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
/* Force page buffers uptodate w/o dropping page's lock */ /* Force page buffers uptodate w/o dropping page's lock */
static int static int
mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
{ {
struct inode *inode = page->mapping->host; struct inode *inode = folio->mapping->host;
sector_t block; sector_t block;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, block_start, block_end; unsigned int blocksize, block_start, block_end;
int i, err, nr = 0, partial = 0; int i, err, nr = 0, partial = 0;
BUG_ON(!PageLocked(page)); BUG_ON(!folio_test_locked(folio));
BUG_ON(PageWriteback(page)); BUG_ON(folio_test_writeback(folio));
if (PageUptodate(page)) if (folio_test_uptodate(folio))
return 0; return 0;
blocksize = i_blocksize(inode); blocksize = i_blocksize(inode);
if (!page_has_buffers(page)) head = folio_buffers(folio);
create_empty_buffers(page, blocksize, 0); if (!head) {
create_empty_buffers(&folio->page, blocksize, 0);
head = folio_buffers(folio);
}
head = page_buffers(page); block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
for (bh = head, block_start = 0; bh != head || !block_start; for (bh = head, block_start = 0; bh != head || !block_start;
block++, block_start = block_end, bh = bh->b_this_page) { block++, block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize; block_end = block_start + blocksize;
@@ -201,11 +202,11 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
if (!buffer_mapped(bh)) { if (!buffer_mapped(bh)) {
err = ext4_get_block(inode, block, bh, 0); err = ext4_get_block(inode, block, bh, 0);
if (err) { if (err) {
SetPageError(page); folio_set_error(folio);
return err; return err;
} }
if (!buffer_mapped(bh)) { if (!buffer_mapped(bh)) {
zero_user(page, block_start, blocksize); folio_zero_range(folio, block_start, blocksize);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
continue; continue;
} }
@@ -227,7 +228,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
} }
out: out:
if (!partial) if (!partial)
SetPageUptodate(page); folio_mark_uptodate(folio);
return 0; return 0;
} }
@@ -355,7 +356,7 @@ again:
goto unlock_folios; goto unlock_folios;
} }
data_copy: data_copy:
*err = mext_page_mkuptodate(&folio[0]->page, from, from + replaced_size); *err = mext_page_mkuptodate(folio[0], from, from + replaced_size);
if (*err) if (*err)
goto unlock_folios; goto unlock_folios;

View File

@@ -99,30 +99,30 @@ static void buffer_io_error(struct buffer_head *bh)
static void ext4_finish_bio(struct bio *bio) static void ext4_finish_bio(struct bio *bio)
{ {
struct bio_vec *bvec; struct folio_iter fi;
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bvec, bio, iter_all) { bio_for_each_folio_all(fi, bio) {
struct page *page = bvec->bv_page; struct folio *folio = fi.folio;
struct page *bounce_page = NULL; struct folio *io_folio = NULL;
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
unsigned bio_start = bvec->bv_offset; size_t bio_start = fi.offset;
unsigned bio_end = bio_start + bvec->bv_len; size_t bio_end = bio_start + fi.length;
unsigned under_io = 0; unsigned under_io = 0;
unsigned long flags; unsigned long flags;
if (fscrypt_is_bounce_page(page)) { if (fscrypt_is_bounce_folio(folio)) {
bounce_page = page; io_folio = folio;
page = fscrypt_pagecache_page(bounce_page); folio = fscrypt_pagecache_folio(folio);
} }
if (bio->bi_status) { if (bio->bi_status) {
SetPageError(page); int err = blk_status_to_errno(bio->bi_status);
mapping_set_error(page->mapping, -EIO); folio_set_error(folio);
mapping_set_error(folio->mapping, err);
} }
bh = head = page_buffers(page); bh = head = folio_buffers(folio);
/* /*
* We check all buffers in the page under b_uptodate_lock * We check all buffers in the folio under b_uptodate_lock
* to avoid races with other end io clearing async_write flags * to avoid races with other end io clearing async_write flags
*/ */
spin_lock_irqsave(&head->b_uptodate_lock, flags); spin_lock_irqsave(&head->b_uptodate_lock, flags);
@@ -141,8 +141,8 @@ static void ext4_finish_bio(struct bio *bio)
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
spin_unlock_irqrestore(&head->b_uptodate_lock, flags); spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
if (!under_io) { if (!under_io) {
fscrypt_free_bounce_page(bounce_page); fscrypt_free_bounce_page(&io_folio->page);
end_page_writeback(page); folio_end_writeback(folio);
} }
} }
} }
@@ -409,12 +409,10 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
static void io_submit_add_bh(struct ext4_io_submit *io, static void io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode, struct inode *inode,
struct page *pagecache_page, struct folio *folio,
struct page *bounce_page, struct folio *io_folio,
struct buffer_head *bh) struct buffer_head *bh)
{ {
int ret;
if (io->io_bio && (bh->b_blocknr != io->io_next_block || if (io->io_bio && (bh->b_blocknr != io->io_next_block ||
!fscrypt_mergeable_bio_bh(io->io_bio, bh))) { !fscrypt_mergeable_bio_bh(io->io_bio, bh))) {
submit_and_retry: submit_and_retry:
@@ -422,20 +420,17 @@ submit_and_retry:
} }
if (io->io_bio == NULL) if (io->io_bio == NULL)
io_submit_init_bio(io, bh); io_submit_init_bio(io, bh);
ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page, if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh)))
bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry; goto submit_and_retry;
wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size); wbc_account_cgroup_owner(io->io_wbc, &folio->page, bh->b_size);
io->io_next_block++; io->io_next_block++;
} }
int ext4_bio_write_page(struct ext4_io_submit *io, int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
struct page *page, size_t len)
int len)
{ {
struct page *bounce_page = NULL; struct folio *io_folio = folio;
struct inode *inode = page->mapping->host; struct inode *inode = folio->mapping->host;
unsigned block_start; unsigned block_start;
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
int ret = 0; int ret = 0;
@@ -443,30 +438,30 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc = io->io_wbc; struct writeback_control *wbc = io->io_wbc;
bool keep_towrite = false; bool keep_towrite = false;
BUG_ON(!PageLocked(page)); BUG_ON(!folio_test_locked(folio));
BUG_ON(PageWriteback(page)); BUG_ON(folio_test_writeback(folio));
ClearPageError(page); folio_clear_error(folio);
/* /*
* Comments copied from block_write_full_page: * Comments copied from block_write_full_page:
* *
* The page straddles i_size. It must be zeroed out on each and every * The folio straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped * writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of * in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and * the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file." * writes to that region are not written out to the file."
*/ */
if (len < PAGE_SIZE) if (len < folio_size(folio))
zero_user_segment(page, len, PAGE_SIZE); folio_zero_segment(folio, len, folio_size(folio));
/* /*
* In the first loop we prepare and mark buffers to submit. We have to * In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that * mark all buffers in the folio before submitting so that
* end_page_writeback() cannot be called from ext4_end_bio() when IO * folio_end_writeback() cannot be called from ext4_end_bio() when IO
* on the first buffer finishes and we are still working on submitting * on the first buffer finishes and we are still working on submitting
* the second buffer. * the second buffer.
*/ */
bh = head = page_buffers(page); bh = head = folio_buffers(folio);
do { do {
block_start = bh_offset(bh); block_start = bh_offset(bh);
if (block_start >= len) { if (block_start >= len) {
@@ -481,14 +476,16 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
/* /*
* Keeping dirty some buffer we cannot write? Make sure * Keeping dirty some buffer we cannot write? Make sure
* to redirty the page and keep TOWRITE tag so that * to redirty the folio and keep TOWRITE tag so that
* racing WB_SYNC_ALL writeback does not skip the page. * racing WB_SYNC_ALL writeback does not skip the folio.
* This happens e.g. when doing writeout for * This happens e.g. when doing writeout for
* transaction commit. * transaction commit or when journalled data is not
* yet committed.
*/ */
if (buffer_dirty(bh)) { if (buffer_dirty(bh) ||
if (!PageDirty(page)) (buffer_jbd(bh) && buffer_jbddirty(bh))) {
redirty_page_for_writepage(wbc, page); if (!folio_test_dirty(folio))
folio_redirty_for_writepage(wbc, folio);
keep_towrite = true; keep_towrite = true;
} }
continue; continue;
@@ -500,11 +497,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
nr_to_submit++; nr_to_submit++;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
/* Nothing to submit? Just unlock the page... */ /* Nothing to submit? Just unlock the folio... */
if (!nr_to_submit) if (!nr_to_submit)
goto unlock; return 0;
bh = head = page_buffers(page); bh = head = folio_buffers(folio);
/* /*
* If any blocks are being written to an encrypted file, encrypt them * If any blocks are being written to an encrypted file, encrypt them
@@ -513,9 +510,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
* (e.g. holes) to be unnecessarily encrypted, but this is rare and * (e.g. holes) to be unnecessarily encrypted, but this is rare and
* can't happen in the common case of blocksize == PAGE_SIZE. * can't happen in the common case of blocksize == PAGE_SIZE.
*/ */
if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
gfp_t gfp_flags = GFP_NOFS; gfp_t gfp_flags = GFP_NOFS;
unsigned int enc_bytes = round_up(len, i_blocksize(inode)); unsigned int enc_bytes = round_up(len, i_blocksize(inode));
struct page *bounce_page;
/* /*
* Since bounce page allocation uses a mempool, we can only use * Since bounce page allocation uses a mempool, we can only use
@@ -525,8 +523,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
if (io->io_bio) if (io->io_bio)
gfp_flags = GFP_NOWAIT | __GFP_NOWARN; gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
retry_encrypt: retry_encrypt:
bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, bounce_page = fscrypt_encrypt_pagecache_blocks(&folio->page,
0, gfp_flags); enc_bytes, 0, gfp_flags);
if (IS_ERR(bounce_page)) { if (IS_ERR(bounce_page)) {
ret = PTR_ERR(bounce_page); ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM && if (ret == -ENOMEM &&
@@ -542,7 +540,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
} }
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page); folio_redirty_for_writepage(wbc, folio);
do { do {
if (buffer_async_write(bh)) { if (buffer_async_write(bh)) {
clear_buffer_async_write(bh); clear_buffer_async_write(bh);
@@ -550,22 +548,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
} }
bh = bh->b_this_page; bh = bh->b_this_page;
} while (bh != head); } while (bh != head);
goto unlock;
return ret;
} }
io_folio = page_folio(bounce_page);
} }
if (keep_towrite) __folio_start_writeback(folio, keep_towrite);
set_page_writeback_keepwrite(page);
else
set_page_writeback(page);
/* Now submit buffers to write */ /* Now submit buffers to write */
do { do {
if (!buffer_async_write(bh)) if (!buffer_async_write(bh))
continue; continue;
io_submit_add_bh(io, inode, page, bounce_page, bh); io_submit_add_bh(io, inode, folio, io_folio, bh);
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
unlock:
unlock_page(page); return 0;
return ret;
} }

View File

@@ -68,18 +68,16 @@ struct bio_post_read_ctx {
static void __read_end_io(struct bio *bio) static void __read_end_io(struct bio *bio)
{ {
struct page *page; struct folio_iter fi;
struct bio_vec *bv;
struct bvec_iter_all iter_all;
bio_for_each_segment_all(bv, bio, iter_all) { bio_for_each_folio_all(fi, bio) {
page = bv->bv_page; struct folio *folio = fi.folio;
if (bio->bi_status) if (bio->bi_status)
ClearPageUptodate(page); folio_clear_uptodate(folio);
else else
SetPageUptodate(page); folio_mark_uptodate(folio);
unlock_page(page); folio_unlock(folio);
} }
if (bio->bi_private) if (bio->bi_private)
mempool_free(bio->bi_private, bio_post_read_ctx_pool); mempool_free(bio->bi_private, bio_post_read_ctx_pool);
@@ -218,7 +216,7 @@ static inline loff_t ext4_readpage_limit(struct inode *inode)
} }
int ext4_mpage_readpages(struct inode *inode, int ext4_mpage_readpages(struct inode *inode,
struct readahead_control *rac, struct page *page) struct readahead_control *rac, struct folio *folio)
{ {
struct bio *bio = NULL; struct bio *bio = NULL;
sector_t last_block_in_bio = 0; sector_t last_block_in_bio = 0;
@@ -247,16 +245,15 @@ int ext4_mpage_readpages(struct inode *inode,
int fully_mapped = 1; int fully_mapped = 1;
unsigned first_hole = blocks_per_page; unsigned first_hole = blocks_per_page;
if (rac) { if (rac)
page = readahead_page(rac); folio = readahead_folio(rac);
prefetchw(&page->flags); prefetchw(&folio->flags);
}
if (page_has_buffers(page)) if (folio_buffers(folio))
goto confused; goto confused;
block_in_file = next_block = block_in_file = next_block =
(sector_t)page->index << (PAGE_SHIFT - blkbits); (sector_t)folio->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page; last_block = block_in_file + nr_pages * blocks_per_page;
last_block_in_file = (ext4_readpage_limit(inode) + last_block_in_file = (ext4_readpage_limit(inode) +
blocksize - 1) >> blkbits; blocksize - 1) >> blkbits;
@@ -290,7 +287,7 @@ int ext4_mpage_readpages(struct inode *inode,
/* /*
* Then do more ext4_map_blocks() calls until we are * Then do more ext4_map_blocks() calls until we are
* done with this page. * done with this folio.
*/ */
while (page_block < blocks_per_page) { while (page_block < blocks_per_page) {
if (block_in_file < last_block) { if (block_in_file < last_block) {
@@ -299,10 +296,10 @@ int ext4_mpage_readpages(struct inode *inode,
if (ext4_map_blocks(NULL, inode, &map, 0) < 0) { if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
set_error_page: set_error_page:
SetPageError(page); folio_set_error(folio);
zero_user_segment(page, 0, folio_zero_segment(folio, 0,
PAGE_SIZE); folio_size(folio));
unlock_page(page); folio_unlock(folio);
goto next_page; goto next_page;
} }
} }
@@ -333,22 +330,22 @@ int ext4_mpage_readpages(struct inode *inode,
} }
} }
if (first_hole != blocks_per_page) { if (first_hole != blocks_per_page) {
zero_user_segment(page, first_hole << blkbits, folio_zero_segment(folio, first_hole << blkbits,
PAGE_SIZE); folio_size(folio));
if (first_hole == 0) { if (first_hole == 0) {
if (ext4_need_verity(inode, page->index) && if (ext4_need_verity(inode, folio->index) &&
!fsverity_verify_page(page)) !fsverity_verify_page(&folio->page))
goto set_error_page; goto set_error_page;
SetPageUptodate(page); folio_mark_uptodate(folio);
unlock_page(page); folio_unlock(folio);
goto next_page; continue;
} }
} else if (fully_mapped) { } else if (fully_mapped) {
SetPageMappedToDisk(page); folio_set_mappedtodisk(folio);
} }
/* /*
* This page will go to BIO. Do we need to send this * This folio will go to BIO. Do we need to send this
* BIO off first? * BIO off first?
*/ */
if (bio && (last_block_in_bio != blocks[0] - 1 || if (bio && (last_block_in_bio != blocks[0] - 1 ||
@@ -366,7 +363,7 @@ int ext4_mpage_readpages(struct inode *inode,
REQ_OP_READ, GFP_KERNEL); REQ_OP_READ, GFP_KERNEL);
fscrypt_set_bio_crypt_ctx(bio, inode, next_block, fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
GFP_KERNEL); GFP_KERNEL);
ext4_set_bio_post_read_ctx(bio, inode, page->index); ext4_set_bio_post_read_ctx(bio, inode, folio->index);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io; bio->bi_end_io = mpage_end_io;
if (rac) if (rac)
@@ -374,7 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
} }
length = first_hole << blkbits; length = first_hole << blkbits;
if (bio_add_page(bio, page, length, 0) < length) if (!bio_add_folio(bio, folio, length, 0))
goto submit_and_realloc; goto submit_and_realloc;
if (((map.m_flags & EXT4_MAP_BOUNDARY) && if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
@@ -384,19 +381,18 @@ int ext4_mpage_readpages(struct inode *inode,
bio = NULL; bio = NULL;
} else } else
last_block_in_bio = blocks[blocks_per_page - 1]; last_block_in_bio = blocks[blocks_per_page - 1];
goto next_page; continue;
confused: confused:
if (bio) { if (bio) {
submit_bio(bio); submit_bio(bio);
bio = NULL; bio = NULL;
} }
if (!PageUptodate(page)) if (!folio_test_uptodate(folio))
block_read_full_folio(page_folio(page), ext4_get_block); block_read_full_folio(folio, ext4_get_block);
else else
unlock_page(page); folio_unlock(folio);
next_page: next_page:
if (rac) ; /* A label shall be followed by a statement until C23 */
put_page(page);
} }
if (bio) if (bio)
submit_bio(bio); submit_bio(bio);

View File

@@ -1306,7 +1306,6 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
} }
static int ext4_set_bitmap_checksums(struct super_block *sb, static int ext4_set_bitmap_checksums(struct super_block *sb,
ext4_group_t group,
struct ext4_group_desc *gdp, struct ext4_group_desc *gdp,
struct ext4_new_group_data *group_data) struct ext4_new_group_data *group_data)
{ {
@@ -1318,14 +1317,14 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
bh = ext4_get_bitmap(sb, group_data->inode_bitmap); bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
if (!bh) if (!bh)
return -EIO; return -EIO;
ext4_inode_bitmap_csum_set(sb, group, gdp, bh, ext4_inode_bitmap_csum_set(sb, gdp, bh,
EXT4_INODES_PER_GROUP(sb) / 8); EXT4_INODES_PER_GROUP(sb) / 8);
brelse(bh); brelse(bh);
bh = ext4_get_bitmap(sb, group_data->block_bitmap); bh = ext4_get_bitmap(sb, group_data->block_bitmap);
if (!bh) if (!bh)
return -EIO; return -EIO;
ext4_block_bitmap_csum_set(sb, group, gdp, bh); ext4_block_bitmap_csum_set(sb, gdp, bh);
brelse(bh); brelse(bh);
return 0; return 0;
@@ -1363,7 +1362,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
memset(gdp, 0, EXT4_DESC_SIZE(sb)); memset(gdp, 0, EXT4_DESC_SIZE(sb));
ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
err = ext4_set_bitmap_checksums(sb, group, gdp, group_data); err = ext4_set_bitmap_checksums(sb, gdp, group_data);
if (err) { if (err) {
ext4_std_error(sb, err); ext4_std_error(sb, err);
break; break;

View File

@@ -1183,12 +1183,81 @@ static inline void ext4_quota_off_umount(struct super_block *sb)
} }
#endif #endif
static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
{
ext4_fsblk_t block;
int err;
block = ext4_count_free_clusters(sbi->s_sb);
ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
if (!err)
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
GFP_KERNEL);
if (!err)
err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
GFP_KERNEL);
if (!err)
err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
if (err)
ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
return err;
}
static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
{
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
percpu_free_rwsem(&sbi->s_writepages_rwsem);
}
static void ext4_group_desc_free(struct ext4_sb_info *sbi)
{
struct buffer_head **group_desc;
int i;
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
}
static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
{
struct flex_groups **flex_groups;
int i;
rcu_read_lock();
flex_groups = rcu_dereference(sbi->s_flex_groups);
if (flex_groups) {
for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
rcu_read_unlock();
}
static void ext4_put_super(struct super_block *sb) static void ext4_put_super(struct super_block *sb)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
struct buffer_head **group_desc;
struct flex_groups **flex_groups;
int aborted = 0; int aborted = 0;
int i, err; int i, err;
@@ -1238,24 +1307,9 @@ static void ext4_put_super(struct super_block *sb)
if (!sb_rdonly(sb)) if (!sb_rdonly(sb))
ext4_commit_super(sb); ext4_commit_super(sb);
rcu_read_lock(); ext4_group_desc_free(sbi);
group_desc = rcu_dereference(sbi->s_group_desc); ext4_flex_groups_free(sbi);
for (i = 0; i < sbi->s_gdb_count; i++) ext4_percpu_param_destroy(sbi);
brelse(group_desc[i]);
kvfree(group_desc);
flex_groups = rcu_dereference(sbi->s_flex_groups);
if (flex_groups) {
for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
rcu_read_unlock();
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
percpu_free_rwsem(&sbi->s_writepages_rwsem);
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
for (i = 0; i < EXT4_MAXQUOTAS; i++) for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(get_qf_name(sb, sbi, i)); kfree(get_qf_name(sb, sbi, i));
@@ -1325,9 +1379,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
inode_set_iversion(&ei->vfs_inode, 1); inode_set_iversion(&ei->vfs_inode, 1);
ei->i_flags = 0; ei->i_flags = 0;
spin_lock_init(&ei->i_raw_lock); spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list); ei->i_prealloc_node = RB_ROOT;
atomic_set(&ei->i_prealloc_active, 0); atomic_set(&ei->i_prealloc_active, 0);
spin_lock_init(&ei->i_prealloc_lock); rwlock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree); ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock); rwlock_init(&ei->i_es_lock);
INIT_LIST_HEAD(&ei->i_es_list); INIT_LIST_HEAD(&ei->i_es_list);
@@ -4587,6 +4641,8 @@ static int ext4_check_feature_compatibility(struct super_block *sb,
struct ext4_super_block *es, struct ext4_super_block *es,
int silent) int silent)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
(ext4_has_compat_features(sb) || (ext4_has_compat_features(sb) ||
ext4_has_ro_compat_features(sb) || ext4_has_ro_compat_features(sb) ||
@@ -4656,14 +4712,59 @@ static int ext4_check_feature_compatibility(struct super_block *sb,
if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
return -EINVAL; return -EINVAL;
if (sbi->s_daxdev) {
if (sb->s_blocksize == PAGE_SIZE)
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
else
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
return -EINVAL;
}
if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device.");
return -EINVAL;
}
}
if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
es->s_encryption_level);
return -EINVAL;
}
return 0; return 0;
} }
static int ext4_geometry_check(struct super_block *sb, static int ext4_check_geometry(struct super_block *sb,
struct ext4_super_block *es) struct ext4_super_block *es)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
__u64 blocks_count; __u64 blocks_count;
int err;
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
"Number of reserved GDT blocks insanely large: %d",
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
return -EINVAL;
}
/*
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
err = generic_check_addressable(sb->s_blocksize_bits,
ext4_blocks_count(es));
if (err) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
return err;
}
/* check blocks count against device size */ /* check blocks count against device size */
blocks_count = sb_bdev_nr_blocks(sb); blocks_count = sb_bdev_nr_blocks(sb);
@@ -4719,19 +4820,6 @@ static int ext4_geometry_check(struct super_block *sb,
return 0; return 0;
} }
static void ext4_group_desc_free(struct ext4_sb_info *sbi)
{
struct buffer_head **group_desc;
int i;
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
}
static int ext4_group_desc_init(struct super_block *sb, static int ext4_group_desc_init(struct super_block *sb,
struct ext4_super_block *es, struct ext4_super_block *es,
ext4_fsblk_t logical_sb_block, ext4_fsblk_t logical_sb_block,
@@ -4881,7 +4969,7 @@ out:
return -EINVAL; return -EINVAL;
} }
static int ext4_journal_data_mode_check(struct super_block *sb) static int ext4_check_journal_data_mode(struct super_block *sb)
{ {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with " printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
@@ -5024,17 +5112,93 @@ out:
return ret; return ret;
} }
static void ext4_hash_info_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
unsigned int i;
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
if (ext4_has_feature_dir_index(sb)) {
i = le32_to_cpu(es->s_flags);
if (i & EXT2_FLAGS_UNSIGNED_HASH)
sbi->s_hash_unsigned = 3;
else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
#ifdef __CHAR_UNSIGNED__
if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
sbi->s_hash_unsigned = 3;
#else
if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
}
}
}
static int ext4_block_group_meta_init(struct super_block *sb, int silent)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int has_huge_files;
has_huge_files = ext4_has_feature_huge_file(sb);
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
has_huge_files);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
if (ext4_has_feature_64bit(sb)) {
if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
!is_power_of_2(sbi->s_desc_size)) {
ext4_msg(sb, KERN_ERR,
"unsupported descriptor size %lu",
sbi->s_desc_size);
return -EINVAL;
}
} else
sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
if (!silent)
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
return -EINVAL;
}
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
sbi->s_inodes_per_group);
return -EINVAL;
}
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
return 0;
}
static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
{ {
struct ext4_super_block *es = NULL; struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct flex_groups **flex_groups;
ext4_fsblk_t block;
ext4_fsblk_t logical_sb_block; ext4_fsblk_t logical_sb_block;
struct inode *root; struct inode *root;
int ret = -ENOMEM; int ret = -ENOMEM;
unsigned int i; unsigned int i;
int needs_recovery, has_huge_files; int needs_recovery;
int err = 0; int err = 0;
ext4_group_t first_not_zeroed; ext4_group_t first_not_zeroed;
struct ext4_fs_context *ctx = fc->fs_private; struct ext4_fs_context *ctx = fc->fs_private;
@@ -5094,7 +5258,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_encoding_init(sb, es)) if (ext4_encoding_init(sb, es))
goto failed_mount; goto failed_mount;
if (ext4_journal_data_mode_check(sb)) if (ext4_check_journal_data_mode(sb))
goto failed_mount; goto failed_mount;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@ -5106,116 +5270,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_check_feature_compatibility(sb, es, silent)) if (ext4_check_feature_compatibility(sb, es, silent))
goto failed_mount; goto failed_mount;
if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) { if (ext4_block_group_meta_init(sb, silent))
ext4_msg(sb, KERN_ERR,
"Number of reserved GDT blocks insanely large: %d",
le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
goto failed_mount; goto failed_mount;
}
if (sbi->s_daxdev) { ext4_hash_info_init(sb);
if (sb->s_blocksize == PAGE_SIZE)
set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
else
ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
goto failed_mount;
}
if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device.");
goto failed_mount;
}
}
if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
es->s_encryption_level);
goto failed_mount;
}
has_huge_files = ext4_has_feature_huge_file(sb);
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
has_huge_files);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
if (ext4_has_feature_64bit(sb)) {
if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
!is_power_of_2(sbi->s_desc_size)) {
ext4_msg(sb, KERN_ERR,
"unsupported descriptor size %lu",
sbi->s_desc_size);
goto failed_mount;
}
} else
sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
if (!silent)
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
goto failed_mount;
}
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
sbi->s_inodes_per_group);
goto failed_mount;
}
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
if (ext4_has_feature_dir_index(sb)) {
i = le32_to_cpu(es->s_flags);
if (i & EXT2_FLAGS_UNSIGNED_HASH)
sbi->s_hash_unsigned = 3;
else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
#ifdef __CHAR_UNSIGNED__
if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
sbi->s_hash_unsigned = 3;
#else
if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
}
}
if (ext4_handle_clustersize(sb)) if (ext4_handle_clustersize(sb))
goto failed_mount; goto failed_mount;
/* if (ext4_check_geometry(sb, es))
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
err = generic_check_addressable(sb->s_blocksize_bits,
ext4_blocks_count(es));
if (err) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
goto failed_mount;
}
if (ext4_geometry_check(sb, es))
goto failed_mount; goto failed_mount;
timer_setup(&sbi->s_err_report, print_daily_error_info, 0); timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
@@ -5440,33 +5503,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sbi->s_journal->j_commit_callback = sbi->s_journal->j_commit_callback =
ext4_journal_commit_callback; ext4_journal_commit_callback;
block = ext4_count_free_clusters(sb); if (ext4_percpu_param_init(sbi))
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb), GFP_KERNEL);
if (!err)
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
GFP_KERNEL);
if (!err)
err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
GFP_KERNEL);
if (!err)
err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount6; goto failed_mount6;
}
if (ext4_has_feature_flex_bg(sb)) if (ext4_has_feature_flex_bg(sb))
if (!ext4_fill_flex_info(sb)) { if (!ext4_fill_flex_info(sb)) {
@@ -5548,20 +5586,8 @@ failed_mount7:
ext4_unregister_li_request(sb); ext4_unregister_li_request(sb);
failed_mount6: failed_mount6:
ext4_mb_release(sb); ext4_mb_release(sb);
rcu_read_lock(); ext4_flex_groups_free(sbi);
flex_groups = rcu_dereference(sbi->s_flex_groups); ext4_percpu_param_destroy(sbi);
if (flex_groups) {
for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
rcu_read_unlock();
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
percpu_free_rwsem(&sbi->s_writepages_rwsem);
failed_mount5: failed_mount5:
ext4_ext_release(sb); ext4_ext_release(sb);
ext4_release_system_zone(sb); ext4_release_system_zone(sb);
@@ -6870,23 +6896,6 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY; sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
} }
/*
* When we journal data on quota file, we have to flush journal to see
* all updates to the file when we bypass pagecache...
*/
if (EXT4_SB(sb)->s_journal &&
ext4_should_journal_data(d_inode(path->dentry))) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
*/
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
if (err)
return err;
}
lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
err = dquot_quota_on(sb, type, format_id, path); err = dquot_quota_on(sb, type, format_id, path);
if (!err) { if (!err) {

View File

@@ -214,7 +214,6 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
@@ -264,7 +263,6 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_order2_req), ATTR_LIST(mb_order2_req),
ATTR_LIST(mb_stream_req), ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc), ATTR_LIST(mb_group_prealloc),
ATTR_LIST(mb_max_inode_prealloc),
ATTR_LIST(mb_max_linear_groups), ATTR_LIST(mb_max_linear_groups),
ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(extent_max_zeroout_kb),

View File

@@ -42,18 +42,16 @@ static int pagecache_read(struct inode *inode, void *buf, size_t count,
loff_t pos) loff_t pos)
{ {
while (count) { while (count) {
size_t n = min_t(size_t, count, struct folio *folio;
PAGE_SIZE - offset_in_page(pos)); size_t n;
struct page *page;
page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT,
NULL); NULL);
if (IS_ERR(page)) if (IS_ERR(folio))
return PTR_ERR(page); return PTR_ERR(folio);
memcpy_from_page(buf, page, offset_in_page(pos), n); n = memcpy_from_file_folio(buf, folio, pos, count);
folio_put(folio);
put_page(page);
buf += n; buf += n;
pos += n; pos += n;
@@ -363,21 +361,21 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
pgoff_t index, pgoff_t index,
unsigned long num_ra_pages) unsigned long num_ra_pages)
{ {
struct page *page; struct folio *folio;
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
if (!page || !PageUptodate(page)) { if (!folio || !folio_test_uptodate(folio)) {
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
if (page) if (folio)
put_page(page); folio_put(folio);
else if (num_ra_pages > 1) else if (num_ra_pages > 1)
page_cache_ra_unbounded(&ractl, num_ra_pages, 0); page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
page = read_mapping_page(inode->i_mapping, index, NULL); folio = read_mapping_folio(inode->i_mapping, index, NULL);
} }
return page; return folio_file_page(folio, index);
} }
static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,

View File

@@ -467,7 +467,7 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
*/ */
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos) struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
{ {
unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS; unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS;
struct folio *folio; struct folio *folio;
if (iter->flags & IOMAP_NOWAIT) if (iter->flags & IOMAP_NOWAIT)

View File

@@ -2387,6 +2387,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
spin_unlock(&jh->b_state_lock); spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
/* Already zapped buffer? Nothing to do... */
if (!bh->b_bdev)
return 0;
return -EBUSY; return -EBUSY;
} }
/* /*

View File

@@ -341,14 +341,13 @@ int netfs_write_begin(struct netfs_inode *ctx,
{ {
struct netfs_io_request *rreq; struct netfs_io_request *rreq;
struct folio *folio; struct folio *folio;
unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
pgoff_t index = pos >> PAGE_SHIFT; pgoff_t index = pos >> PAGE_SHIFT;
int ret; int ret;
DEFINE_READAHEAD(ractl, file, NULL, mapping, index); DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
retry: retry:
folio = __filemap_get_folio(mapping, index, fgp_flags, folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping)); mapping_gfp_mask(mapping));
if (!folio) if (!folio)
return -ENOMEM; return -ENOMEM;

View File

@@ -306,15 +306,6 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
return false; return false;
} }
static struct folio *
nfs_folio_grab_cache_write_begin(struct address_space *mapping, pgoff_t index)
{
unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
return __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
}
/* /*
* This does the "real" work of the write. We must allocate and lock the * This does the "real" work of the write. We must allocate and lock the
* page to be sent back to the generic routine, which then copies the * page to be sent back to the generic routine, which then copies the
@@ -335,7 +326,8 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file, mapping->host->i_ino, len, (long long) pos); file, mapping->host->i_ino, len, (long long) pos);
start: start:
folio = nfs_folio_grab_cache_write_begin(mapping, pos >> PAGE_SHIFT); folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (!folio) if (!folio)
return -ENOMEM; return -ENOMEM;
*pagep = &folio->page; *pagep = &folio->page;

View File

@@ -273,6 +273,16 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
return (struct page *)page_private(bounce_page); return (struct page *)page_private(bounce_page);
} }
static inline bool fscrypt_is_bounce_folio(struct folio *folio)
{
return folio->mapping == NULL;
}
static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
{
return bounce_folio->private;
}
void fscrypt_free_bounce_page(struct page *bounce_page); void fscrypt_free_bounce_page(struct page *bounce_page);
/* policy.c */ /* policy.c */
@@ -446,6 +456,17 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
static inline bool fscrypt_is_bounce_folio(struct folio *folio)
{
return false;
}
static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio)
{
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
}
static inline void fscrypt_free_bounce_page(struct page *bounce_page) static inline void fscrypt_free_bounce_page(struct page *bounce_page)
{ {
} }

View File

@@ -762,11 +762,6 @@ bool set_page_writeback(struct page *page);
#define folio_start_writeback_keepwrite(folio) \ #define folio_start_writeback_keepwrite(folio) \
__folio_start_writeback(folio, true) __folio_start_writeback(folio, true)
static inline void set_page_writeback_keepwrite(struct page *page)
{
folio_start_writeback_keepwrite(page_folio(page));
}
static inline bool test_set_page_writeback(struct page *page) static inline bool test_set_page_writeback(struct page *page)
{ {
return set_page_writeback(page); return set_page_writeback(page);

View File

@@ -507,6 +507,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_ENTRY 0x00000080 #define FGP_ENTRY 0x00000080
#define FGP_STABLE 0x00000100 #define FGP_STABLE 0x00000100
#define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp); int fgp_flags, gfp_t gfp);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,

View File

@@ -584,13 +584,6 @@ DECLARE_EVENT_CLASS(ext4__page_op,
(unsigned long) __entry->index) (unsigned long) __entry->index)
); );
DEFINE_EVENT(ext4__page_op, ext4_writepage,
TP_PROTO(struct page *page),
TP_ARGS(page)
);
DEFINE_EVENT(ext4__page_op, ext4_readpage, DEFINE_EVENT(ext4__page_op, ext4_readpage,
TP_PROTO(struct page *page), TP_PROTO(struct page *page),

117
include/uapi/linux/ext4.h Normal file
View File

@@ -0,0 +1,117 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_EXT4_H
#define _UAPI_LINUX_EXT4_H
#include <linux/fiemap.h>
#include <linux/fs.h>
#include <linux/ioctl.h>
#include <linux/types.h>
/*
* ext4-specific ioctl commands
*/
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
#define EXT4_IOC_MIGRATE _IO('f', 9)
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
/* ioctl codes 19--39 are reserved for fscrypt */
#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40)
#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32)
#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap)
#define EXT4_IOC_CHECKPOINT _IOW('f', 43, __u32)
#define EXT4_IOC_GETFSUUID _IOR('f', 44, struct fsuuid)
#define EXT4_IOC_SETFSUUID _IOW('f', 44, struct fsuuid)
#define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32)
/*
* ioctl commands in 32 bit emulation
*/
#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
/*
* Flags returned by EXT4_IOC_GETSTATE
*
* We only expose to userspace a subset of the state flags in
* i_state_flags
*/
#define EXT4_STATE_FLAG_EXT_PRECACHED 0x00000001
#define EXT4_STATE_FLAG_NEW 0x00000002
#define EXT4_STATE_FLAG_NEWENTRY 0x00000004
#define EXT4_STATE_FLAG_DA_ALLOC_CLOSE 0x00000008
/*
* Flags for ioctl EXT4_IOC_CHECKPOINT
*/
#define EXT4_IOC_CHECKPOINT_FLAG_DISCARD 0x1
#define EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT 0x2
#define EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN 0x4
#define EXT4_IOC_CHECKPOINT_FLAG_VALID (EXT4_IOC_CHECKPOINT_FLAG_DISCARD | \
EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT | \
EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
/*
* Structure for EXT4_IOC_GETFSUUID/EXT4_IOC_SETFSUUID
*/
struct fsuuid {
__u32 fsu_len;
__u32 fsu_flags;
__u8 fsu_uuid[];
};
/*
* Structure for EXT4_IOC_MOVE_EXT
*/
struct move_extent {
__u32 reserved; /* should be zero */
__u32 donor_fd; /* donor file descriptor */
__u64 orig_start; /* logical start offset in block for orig */
__u64 donor_start; /* logical start offset in block for donor */
__u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */
};
/*
* Flags used by EXT4_IOC_SHUTDOWN
*/
#define EXT4_GOING_FLAGS_DEFAULT 0x0 /* going down */
#define EXT4_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
__u32 group; /* Group number for this data */
__u64 block_bitmap; /* Absolute block number of block bitmap */
__u64 inode_bitmap; /* Absolute block number of inode bitmap */
__u64 inode_table; /* Absolute block number of inode table start */
__u32 blocks_count; /* Total number of blocks in this group */
__u16 reserved_blocks; /* Number of reserved blocks in this group */
__u16 unused;
};
/*
* Returned by EXT4_IOC_GET_ES_CACHE as an additional possible flag.
* It indicates that the entry in extent status cache is for a hole.
*/
#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000
#endif /* _UAPI_LINUX_EXT4_H */

View File

@@ -106,9 +106,7 @@ EXPORT_SYMBOL(pagecache_get_page);
struct page *grab_cache_page_write_begin(struct address_space *mapping, struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index) pgoff_t index)
{ {
unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; return pagecache_get_page(mapping, index, FGP_WRITEBEGIN,
return pagecache_get_page(mapping, index, fgp_flags,
mapping_gfp_mask(mapping)); mapping_gfp_mask(mapping));
} }
EXPORT_SYMBOL(grab_cache_page_write_begin); EXPORT_SYMBOL(grab_cache_page_write_begin);