Merge tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache

Pull folio updates from Matthew Wilcox:

 - Fix an accounting bug that made NR_FILE_DIRTY grow without limit
   when running xfstests

 - Convert more of mpage to use folios

 - Remove add_to_page_cache() and add_to_page_cache_locked()

 - Convert find_get_pages_range() to filemap_get_folios()

 - Improvements to the read_cache_page() family of functions

 - Remove a few unnecessary checks of PageError

 - Some straightforward filesystem conversions to use folios

 - Split PageMovable users out from address_space_operations into
   their own movable_operations

 - Convert aops->migratepage to aops->migrate_folio

 - Remove nobh support (Christoph Hellwig)

* tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache: (78 commits)
  fs: remove the NULL get_block case in mpage_writepages
  fs: don't call ->writepage from __mpage_writepage
  fs: remove the nobh helpers
  jfs: stop using the nobh helper
  ext2: remove nobh support
  ntfs3: refactor ntfs_writepages
  mm/folio-compat: Remove migration compatibility functions
  fs: Remove aops->migratepage()
  secretmem: Convert to migrate_folio
  hugetlb: Convert to migrate_folio
  aio: Convert to migrate_folio
  f2fs: Convert to filemap_migrate_folio()
  ubifs: Convert to filemap_migrate_folio()
  btrfs: Convert btrfs_migratepage to migrate_folio
  mm/migrate: Add filemap_migrate_folio()
  mm/migrate: Convert migrate_page() to migrate_folio()
  nfs: Convert to migrate_folio
  btrfs: Convert btree_migratepage to migrate_folio
  mm/migrate: Convert expected_page_refs() to folio_expected_refs()
  mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio()
  ...
This commit is contained in:
Linus Torvalds
2022-08-03 10:35:43 -07:00
97 changed files with 832 additions and 1889 deletions

View File

@@ -282,10 +282,10 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
/*
* If none of the buffers had errors and they are all
* uptodate then we can set the page uptodate.
* If all of the buffers are uptodate then we can set the page
* uptodate.
*/
if (page_uptodate && !PageError(page))
if (page_uptodate)
SetPageUptodate(page);
unlock_page(page);
return;
@@ -1604,7 +1604,7 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{
struct inode *bd_inode = bdev->bd_inode;
struct address_space *bd_mapping = bd_inode->i_mapping;
struct pagevec pvec;
struct folio_batch fbatch;
pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
pgoff_t end;
int i, count;
@@ -1612,24 +1612,24 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
struct buffer_head *head;
end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
pagevec_init(&pvec);
while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
count = pagevec_count(&pvec);
folio_batch_init(&fbatch);
while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
count = folio_batch_count(&fbatch);
for (i = 0; i < count; i++) {
struct page *page = pvec.pages[i];
struct folio *folio = fbatch.folios[i];
if (!page_has_buffers(page))
if (!folio_buffers(folio))
continue;
/*
* We use page lock instead of bd_mapping->private_lock
* We use folio lock instead of bd_mapping->private_lock
* to pin buffers here since we can afford to sleep and
* it scales better than a global spinlock lock.
*/
lock_page(page);
/* Recheck when the page is locked which pins bhs */
if (!page_has_buffers(page))
folio_lock(folio);
/* Recheck when the folio is locked which pins bhs */
head = folio_buffers(folio);
if (!head)
goto unlock_page;
head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh) || (bh->b_blocknr < block))
@@ -1643,9 +1643,9 @@ next:
bh = bh->b_this_page;
} while (bh != head);
unlock_page:
unlock_page(page);
folio_unlock(folio);
}
pagevec_release(&pvec);
folio_batch_release(&fbatch);
cond_resched();
/* End of range already reached? */
if (index > end || !index)
@@ -2259,6 +2259,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
unsigned int blocksize, bbits;
int nr, i;
int fully_mapped = 1;
bool page_error = false;
VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
@@ -2283,8 +2284,10 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
if (iblock < lblock) {
WARN_ON(bh->b_size != blocksize);
err = get_block(inode, iblock, bh, 0);
if (err)
if (err) {
folio_set_error(folio);
page_error = true;
}
}
if (!buffer_mapped(bh)) {
folio_zero_range(folio, i * blocksize,
@@ -2311,7 +2314,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
* All buffers are uptodate - we can set the folio uptodate
* as well. But not if get_block() returned an error.
*/
if (!folio_test_error(folio))
if (!page_error)
folio_mark_uptodate(folio);
folio_unlock(folio);
return 0;
@@ -2534,330 +2537,6 @@ out_unlock:
}
EXPORT_SYMBOL(block_page_mkwrite);
/*
* nobh_write_begin()'s prereads are special: the buffer_heads are freed
* immediately, while under the page lock. So it needs a special end_io
* handler which does not touch the bh after unlocking it.
*/
static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
{
__end_buffer_read_notouch(bh, uptodate);
}
/*
* Attach the singly-linked list of buffers created by nobh_write_begin, to
* the page (converting it to circular linked list and taking care of page
* dirty races).
*/
static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
{
struct buffer_head *bh;
BUG_ON(!PageLocked(page));
spin_lock(&page->mapping->private_lock);
bh = head;
do {
if (PageDirty(page))
set_buffer_dirty(bh);
if (!bh->b_this_page)
bh->b_this_page = head;
bh = bh->b_this_page;
} while (bh != head);
attach_page_private(page, head);
spin_unlock(&page->mapping->private_lock);
}
/*
* On entry, the page is fully not uptodate.
* On exit the page is fully uptodate in the areas outside (from,to)
* The filesystem needs to handle block truncation upon failure.
*/
int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
struct page **pagep, void **fsdata,
get_block_t *get_block)
{
struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
const unsigned blocksize = 1 << blkbits;
struct buffer_head *head, *bh;
struct page *page;
pgoff_t index;
unsigned from, to;
unsigned block_in_page;
unsigned block_start, block_end;
sector_t block_in_file;
int nr_reads = 0;
int ret = 0;
int is_mapped_to_disk = 1;
index = pos >> PAGE_SHIFT;
from = pos & (PAGE_SIZE - 1);
to = from + len;
page = grab_cache_page_write_begin(mapping, index);
if (!page)
return -ENOMEM;
*pagep = page;
*fsdata = NULL;
if (page_has_buffers(page)) {
ret = __block_write_begin(page, pos, len, get_block);
if (unlikely(ret))
goto out_release;
return ret;
}
if (PageMappedToDisk(page))
return 0;
/*
* Allocate buffers so that we can keep track of state, and potentially
* attach them to the page if an error occurs. In the common case of
* no error, they will just be freed again without ever being attached
* to the page (which is all OK, because we're under the page lock).
*
* Be careful: the buffer linked list is a NULL terminated one, rather
* than the circular one we're used to.
*/
head = alloc_page_buffers(page, blocksize, false);
if (!head) {
ret = -ENOMEM;
goto out_release;
}
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
/*
* We loop across all blocks in the page, whether or not they are
* part of the affected region. This is so we can discover if the
* page is fully mapped-to-disk.
*/
for (block_start = 0, block_in_page = 0, bh = head;
block_start < PAGE_SIZE;
block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
int create;
block_end = block_start + blocksize;
bh->b_state = 0;
create = 1;
if (block_start >= to)
create = 0;
ret = get_block(inode, block_in_file + block_in_page,
bh, create);
if (ret)
goto failed;
if (!buffer_mapped(bh))
is_mapped_to_disk = 0;
if (buffer_new(bh))
clean_bdev_bh_alias(bh);
if (PageUptodate(page)) {
set_buffer_uptodate(bh);
continue;
}
if (buffer_new(bh) || !buffer_mapped(bh)) {
zero_user_segments(page, block_start, from,
to, block_end);
continue;
}
if (buffer_uptodate(bh))
continue; /* reiserfs does this */
if (block_start < from || block_end > to) {
lock_buffer(bh);
bh->b_end_io = end_buffer_read_nobh;
submit_bh(REQ_OP_READ, bh);
nr_reads++;
}
}
if (nr_reads) {
/*
* The page is locked, so these buffers are protected from
* any VM or truncate activity. Hence we don't need to care
* for the buffer_head refcounts.
*/
for (bh = head; bh; bh = bh->b_this_page) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
ret = -EIO;
}
if (ret)
goto failed;
}
if (is_mapped_to_disk)
SetPageMappedToDisk(page);
*fsdata = head; /* to be released by nobh_write_end */
return 0;
failed:
BUG_ON(!ret);
/*
* Error recovery is a bit difficult. We need to zero out blocks that
* were newly allocated, and dirty them to ensure they get written out.
* Buffers need to be attached to the page at this point, otherwise
* the handling of potential IO errors during writeout would be hard
* (could try doing synchronous writeout, but what if that fails too?)
*/
attach_nobh_buffers(page, head);
page_zero_new_buffers(page, from, to);
out_release:
unlock_page(page);
put_page(page);
*pagep = NULL;
return ret;
}
EXPORT_SYMBOL(nobh_write_begin);
int nobh_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = page->mapping->host;
struct buffer_head *head = fsdata;
struct buffer_head *bh;
BUG_ON(fsdata != NULL && page_has_buffers(page));
if (unlikely(copied < len) && head)
attach_nobh_buffers(page, head);
if (page_has_buffers(page))
return generic_write_end(file, mapping, pos, len,
copied, page, fsdata);
SetPageUptodate(page);
set_page_dirty(page);
if (pos+copied > inode->i_size) {
i_size_write(inode, pos+copied);
mark_inode_dirty(inode);
}
unlock_page(page);
put_page(page);
while (head) {
bh = head;
head = head->b_this_page;
free_buffer_head(bh);
}
return copied;
}
EXPORT_SYMBOL(nobh_write_end);
/*
* nobh_writepage() - based on block_full_write_page() except
* that it tries to operate without attaching bufferheads to
* the page.
*/
int nobh_writepage(struct page *page, get_block_t *get_block,
struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
int ret;
/* Is the page fully inside i_size? */
if (page->index < end_index)
goto out;
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
unlock_page(page);
return 0; /* don't care */
}
/*
* The page straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_SIZE);
out:
ret = mpage_writepage(page, get_block, wbc);
if (ret == -EAGAIN)
ret = __block_write_full_page(inode, page, get_block, wbc,
end_buffer_async_write);
return ret;
}
EXPORT_SYMBOL(nobh_writepage);
int nobh_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
pgoff_t index = from >> PAGE_SHIFT;
struct inode *inode = mapping->host;
unsigned blocksize = i_blocksize(inode);
struct folio *folio;
struct buffer_head map_bh;
size_t offset;
sector_t iblock;
int err;
/* Block boundary? Nothing to do */
if (!(from & (blocksize - 1)))
return 0;
folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_CREAT,
mapping_gfp_mask(mapping));
err = -ENOMEM;
if (!folio)
goto out;
if (folio_buffers(folio))
goto has_buffers;
iblock = from >> inode->i_blkbits;
map_bh.b_size = blocksize;
map_bh.b_state = 0;
err = get_block(inode, iblock, &map_bh, 0);
if (err)
goto unlock;
/* unmapped? It's a hole - nothing to do */
if (!buffer_mapped(&map_bh))
goto unlock;
/* Ok, it's mapped. Make sure it's up-to-date */
if (!folio_test_uptodate(folio)) {
err = mapping->a_ops->read_folio(NULL, folio);
if (err) {
folio_put(folio);
goto out;
}
folio_lock(folio);
if (!folio_test_uptodate(folio)) {
err = -EIO;
goto unlock;
}
if (folio_buffers(folio))
goto has_buffers;
}
offset = offset_in_folio(folio, from);
folio_zero_segment(folio, offset, round_up(offset, blocksize));
folio_mark_dirty(folio);
err = 0;
unlock:
folio_unlock(folio);
folio_put(folio);
out:
return err;
has_buffers:
folio_unlock(folio);
folio_put(folio);
return block_truncate_page(mapping, from, get_block);
}
EXPORT_SYMBOL(nobh_truncate_page);
int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{