Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull mm updates from Andrew Morton:

 - Yosry Ahmed brought back some cgroup v1 stats in OOM logs

 - Yosry has also eliminated cgroup's atomic rstat flushing

 - Nhat Pham adds the new cachestat() syscall. It provides userspace
   with the ability to query pagecache status - a similar concept to
   mincore() but more powerful and with improved usability

 - Mel Gorman provides more optimizations for compaction, reducing the
   prevalence of page rescanning

 - Lorenzo Stoakes has done some maintanance work on the
   get_user_pages() interface

 - Liam Howlett continues with cleanups and maintenance work to the
   maple tree code. Peng Zhang also does some work on maple tree

 - Johannes Weiner has done some cleanup work on the compaction code

 - David Hildenbrand has contributed additional selftests for
   get_user_pages()

 - Thomas Gleixner has contributed some maintenance and optimization
   work for the vmalloc code

 - Baolin Wang has provided some compaction cleanups,

 - SeongJae Park continues maintenance work on the DAMON code

 - Huang Ying has done some maintenance on the swap code's usage of
   device refcounting

 - Christoph Hellwig has some cleanups for the filemap/directio code

 - Ryan Roberts provides two patch series which yield some
   rationalization of the kernel's access to pte entries - use the
   provided APIs rather than open-coding accesses

 - Lorenzo Stoakes has some fixes to the interaction between pagecache
   and directio access to file mappings

 - John Hubbard has a series of fixes to the MM selftesting code

 - ZhangPeng continues the folio conversion campaign

 - Hugh Dickins has been working on the pagetable handling code, mainly
   with a view to reducing the load on the mmap_lock

 - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment
   from 128 to 8

 - Domenico Cerasuolo has improved the zswap reclaim mechanism by
   reorganizing the LRU management

 - Matthew Wilcox provides some fixups to make gfs2 work better with the
   buffer_head code

 - Vishal Moola also has done some folio conversion work

 - Matthew Wilcox has removed the remnants of the pagevec code - their
   functionality is migrated over to struct folio_batch

* tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits)
  mm/hugetlb: remove hugetlb_set_page_subpool()
  mm: nommu: correct the range of mmap_sem_read_lock in task_mem()
  hugetlb: revert use of page_cache_next_miss()
  Revert "page cache: fix page_cache_next/prev_miss off by one"
  mm/vmscan: fix root proactive reclaim unthrottling unbalanced node
  mm: memcg: rename and document global_reclaim()
  mm: kill [add|del]_page_to_lru_list()
  mm: compaction: convert to use a folio in isolate_migratepages_block()
  mm: zswap: fix double invalidate with exclusive loads
  mm: remove unnecessary pagevec includes
  mm: remove references to pagevec
  mm: rename invalidate_mapping_pagevec to mapping_try_invalidate
  mm: remove struct pagevec
  net: convert sunrpc from pagevec to folio_batch
  i915: convert i915_gpu_error to use a folio_batch
  pagevec: rename fbatch_count()
  mm: remove check_move_unevictable_pages()
  drm: convert drm_gem_put_pages() to use a folio_batch
  i915: convert shmem_sg_free_table() to use a folio_batch
  scatterlist: add sg_set_folio()
  ...
This commit is contained in:
Linus Torvalds
2023-06-28 10:28:11 -07:00
334 changed files with 8347 additions and 6911 deletions

View File

@@ -194,19 +194,19 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
pgoff_t index;
struct buffer_head *bh;
struct buffer_head *head;
struct page *page;
struct folio *folio;
int all_mapped = 1;
static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
if (!page)
folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
if (IS_ERR(folio))
goto out;
spin_lock(&bd_mapping->private_lock);
if (!page_has_buffers(page))
head = folio_buffers(folio);
if (!head)
goto out_unlock;
head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh))
@@ -236,7 +236,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
put_page(page);
folio_put(folio);
out:
return ret;
}
@@ -906,8 +906,8 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
static inline void
link_dev_buffers(struct page *page, struct buffer_head *head)
static inline void link_dev_buffers(struct folio *folio,
struct buffer_head *head)
{
struct buffer_head *bh, *tail;
@@ -917,7 +917,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
bh = bh->b_this_page;
} while (bh);
tail->b_this_page = head;
attach_page_private(page, head);
folio_attach_private(folio, head);
}
static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
@@ -933,15 +933,14 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
}
/*
* Initialise the state of a blockdev page's buffers.
* Initialise the state of a blockdev folio's buffers.
*/
static sector_t
init_page_buffers(struct page *page, struct block_device *bdev,
sector_t block, int size)
static sector_t folio_init_buffers(struct folio *folio,
struct block_device *bdev, sector_t block, int size)
{
struct buffer_head *head = page_buffers(page);
struct buffer_head *head = folio_buffers(folio);
struct buffer_head *bh = head;
int uptodate = PageUptodate(page);
bool uptodate = folio_test_uptodate(folio);
sector_t end_block = blkdev_max_block(bdev, size);
do {
@@ -975,7 +974,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
pgoff_t index, int size, int sizebits, gfp_t gfp)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
struct folio *folio;
struct buffer_head *bh;
sector_t end_block;
int ret = 0;
@@ -991,42 +990,37 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
gfp_mask |= __GFP_NOFAIL;
page = find_or_create_page(inode->i_mapping, index, gfp_mask);
folio = __filemap_get_folio(inode->i_mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
BUG_ON(!PageLocked(page));
if (page_has_buffers(page)) {
bh = page_buffers(page);
bh = folio_buffers(folio);
if (bh) {
if (bh->b_size == size) {
end_block = init_page_buffers(page, bdev,
(sector_t)index << sizebits,
size);
end_block = folio_init_buffers(folio, bdev,
(sector_t)index << sizebits, size);
goto done;
}
if (!try_to_free_buffers(page_folio(page)))
if (!try_to_free_buffers(folio))
goto failed;
}
/*
* Allocate some buffers for this page
*/
bh = alloc_page_buffers(page, size, true);
bh = folio_alloc_buffers(folio, size, true);
/*
* Link the page to the buffers and initialise them. Take the
* Link the folio to the buffers and initialise them. Take the
* lock to be atomic wrt __find_get_block(), which does not
* run under the page lock.
* run under the folio lock.
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
size);
link_dev_buffers(folio, bh);
end_block = folio_init_buffers(folio, bdev,
(sector_t)index << sizebits, size);
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
failed:
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
return ret;
}
@@ -1763,7 +1757,7 @@ static struct buffer_head *folio_create_buffers(struct folio *folio,
* WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
* causes the writes to be flagged as synchronous writes.
*/
int __block_write_full_page(struct inode *inode, struct page *page,
int __block_write_full_folio(struct inode *inode, struct folio *folio,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler)
{
@@ -1775,14 +1769,14 @@ int __block_write_full_page(struct inode *inode, struct page *page,
int nr_underway = 0;
blk_opf_t write_flags = wbc_to_write_flags(wbc);
head = folio_create_buffers(page_folio(page), inode,
head = folio_create_buffers(folio, inode,
(1 << BH_Dirty) | (1 << BH_Uptodate));
/*
* Be very careful. We have no exclusion from block_dirty_folio
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
* then we just miss that fact, and the page stays dirty.
* then we just miss that fact, and the folio stays dirty.
*
* Buffers outside i_size may be dirtied by block_dirty_folio;
* handle that here by just cleaning them.
@@ -1792,7 +1786,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
blocksize = bh->b_size;
bbits = block_size_bits(blocksize);
block = (sector_t)page->index << (PAGE_SHIFT - bbits);
block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
last_block = (i_size_read(inode) - 1) >> bbits;
/*
@@ -1803,7 +1797,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
if (block > last_block) {
/*
* mapped buffers outside i_size will occur, because
* this page can be outside i_size when there is a
* this folio can be outside i_size when there is a
* truncate in progress.
*/
/*
@@ -1833,7 +1827,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
continue;
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
* lock the buffer then redirty the folio. Note that this can
* potentially cause a busy-wait loop from writeback threads
* and kswapd activity, but those code paths have their own
* higher-level throttling.
@@ -1841,7 +1835,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
if (wbc->sync_mode != WB_SYNC_NONE) {
lock_buffer(bh);
} else if (!trylock_buffer(bh)) {
redirty_page_for_writepage(wbc, page);
folio_redirty_for_writepage(wbc, folio);
continue;
}
if (test_clear_buffer_dirty(bh)) {
@@ -1852,11 +1846,11 @@ int __block_write_full_page(struct inode *inode, struct page *page,
} while ((bh = bh->b_this_page) != head);
/*
* The page and its buffers are protected by PageWriteback(), so we can
* drop the bh refcounts early.
* The folio and its buffers are protected by the writeback flag,
* so we can drop the bh refcounts early.
*/
BUG_ON(PageWriteback(page));
set_page_writeback(page);
BUG_ON(folio_test_writeback(folio));
folio_start_writeback(folio);
do {
struct buffer_head *next = bh->b_this_page;
@@ -1866,20 +1860,20 @@ int __block_write_full_page(struct inode *inode, struct page *page,
}
bh = next;
} while (bh != head);
unlock_page(page);
folio_unlock(folio);
err = 0;
done:
if (nr_underway == 0) {
/*
* The page was marked dirty, but the buffers were
* The folio was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
* write_dirty_buffer/submit_bh. A rare case.
*/
end_page_writeback(page);
folio_end_writeback(folio);
/*
* The page and buffer_heads can be released at any time from
* The folio and buffer_heads can be released at any time from
* here on.
*/
}
@@ -1890,7 +1884,7 @@ recover:
* ENOSPC, or some other error. We may already have added some
* blocks to the file, so we need to write these out to avoid
* exposing stale data.
* The page is currently locked and not marked for writeback
* The folio is currently locked and not marked for writeback
*/
bh = head;
/* Recovery: lock and submit the mapped buffers */
@@ -1902,15 +1896,15 @@ recover:
} else {
/*
* The buffer may have been set dirty during
* attachment to a dirty page.
* attachment to a dirty folio.
*/
clear_buffer_dirty(bh);
}
} while ((bh = bh->b_this_page) != head);
SetPageError(page);
BUG_ON(PageWriteback(page));
mapping_set_error(page->mapping, err);
set_page_writeback(page);
folio_set_error(folio);
BUG_ON(folio_test_writeback(folio));
mapping_set_error(folio->mapping, err);
folio_start_writeback(folio);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
@@ -1920,39 +1914,40 @@ recover:
}
bh = next;
} while (bh != head);
unlock_page(page);
folio_unlock(folio);
goto done;
}
EXPORT_SYMBOL(__block_write_full_page);
EXPORT_SYMBOL(__block_write_full_folio);
/*
* If a page has any new buffers, zero them out here, and mark them uptodate
* If a folio has any new buffers, zero them out here, and mark them uptodate
* and dirty so they'll be written out (in order to prevent uninitialised
* block data from leaking). And clear the new bit.
*/
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
{
unsigned int block_start, block_end;
size_t block_start, block_end;
struct buffer_head *head, *bh;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
BUG_ON(!folio_test_locked(folio));
head = folio_buffers(folio);
if (!head)
return;
bh = head = page_buffers(page);
bh = head;
block_start = 0;
do {
block_end = block_start + bh->b_size;
if (buffer_new(bh)) {
if (block_end > from && block_start < to) {
if (!PageUptodate(page)) {
unsigned start, size;
if (!folio_test_uptodate(folio)) {
size_t start, xend;
start = max(from, block_start);
size = min(to, block_end) - start;
xend = min(to, block_end);
zero_user(page, start, size);
folio_zero_segment(folio, start, xend);
set_buffer_uptodate(bh);
}
@@ -1965,7 +1960,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
bh = bh->b_this_page;
} while (bh != head);
}
EXPORT_SYMBOL(page_zero_new_buffers);
EXPORT_SYMBOL(folio_zero_new_buffers);
static void
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
@@ -2103,7 +2098,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
err = -EIO;
}
if (unlikely(err))
page_zero_new_buffers(&folio->page, from, to);
folio_zero_new_buffers(folio, from, to);
return err;
}
@@ -2115,15 +2110,15 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(__block_write_begin);
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
static int __block_commit_write(struct inode *inode, struct folio *folio,
size_t from, size_t to)
{
unsigned block_start, block_end;
int partial = 0;
size_t block_start, block_end;
bool partial = false;
unsigned blocksize;
struct buffer_head *bh, *head;
bh = head = page_buffers(page);
bh = head = folio_buffers(folio);
blocksize = bh->b_size;
block_start = 0;
@@ -2131,7 +2126,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
partial = true;
} else {
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
@@ -2146,11 +2141,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
/*
* If this is a partial write which happened to make all buffers
* uptodate then we can optimize away a bogus read_folio() for
* the next read(). Here we 'discover' whether the page went
* the next read(). Here we 'discover' whether the folio went
* uptodate as a result of this (potentially partial) write.
*/
if (!partial)
SetPageUptodate(page);
folio_mark_uptodate(folio);
return 0;
}
@@ -2187,10 +2182,9 @@ int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
unsigned start;
start = pos & (PAGE_SIZE - 1);
size_t start = pos - folio_pos(folio);
if (unlikely(copied < len)) {
/*
@@ -2202,18 +2196,18 @@ int block_write_end(struct file *file, struct address_space *mapping,
* read_folio might come in and destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a
* non uptodate page as a zero-length write, and force the
* non uptodate folio as a zero-length write, and force the
* caller to redo the whole thing.
*/
if (!PageUptodate(page))
if (!folio_test_uptodate(folio))
copied = 0;
page_zero_new_buffers(page, start+copied, start+len);
folio_zero_new_buffers(folio, start+copied, start+len);
}
flush_dcache_page(page);
flush_dcache_folio(folio);
/* This could be a short (even 0-length) commit */
__block_commit_write(inode, page, start, start+copied);
__block_commit_write(inode, folio, start, start + copied);
return copied;
}
@@ -2536,8 +2530,9 @@ EXPORT_SYMBOL(cont_write_begin);
int block_commit_write(struct page *page, unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
__block_commit_write(inode,page,from,to);
struct folio *folio = page_folio(page);
struct inode *inode = folio->mapping->host;
__block_commit_write(inode, folio, from, to);
return 0;
}
EXPORT_SYMBOL(block_commit_write);
@@ -2563,38 +2558,37 @@ EXPORT_SYMBOL(block_commit_write);
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
{
struct page *page = vmf->page;
struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vma->vm_file);
unsigned long end;
loff_t size;
int ret;
lock_page(page);
folio_lock(folio);
size = i_size_read(inode);
if ((page->mapping != inode->i_mapping) ||
(page_offset(page) > size)) {
if ((folio->mapping != inode->i_mapping) ||
(folio_pos(folio) >= size)) {
/* We overload EFAULT to mean page got truncated */
ret = -EFAULT;
goto out_unlock;
}
/* page is wholly or partially inside EOF */
if (((page->index + 1) << PAGE_SHIFT) > size)
end = size & ~PAGE_MASK;
else
end = PAGE_SIZE;
end = folio_size(folio);
/* folio is wholly or partially inside EOF */
if (folio_pos(folio) + end > size)
end = size - folio_pos(folio);
ret = __block_write_begin(page, 0, end, get_block);
ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
if (!ret)
ret = block_commit_write(page, 0, end);
ret = __block_commit_write(inode, folio, 0, end);
if (unlikely(ret < 0))
goto out_unlock;
set_page_dirty(page);
wait_for_stable_page(page);
folio_mark_dirty(folio);
folio_wait_stable(folio);
return 0;
out_unlock:
unlock_page(page);
folio_unlock(folio);
return ret;
}
EXPORT_SYMBOL(block_page_mkwrite);
@@ -2603,17 +2597,16 @@ int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize;
sector_t iblock;
unsigned length, pos;
size_t offset, length, pos;
struct inode *inode = mapping->host;
struct page *page;
struct folio *folio;
struct buffer_head *bh;
int err = 0;
blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
length = from & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!length)
@@ -2622,15 +2615,18 @@ int block_truncate_page(struct address_space *mapping,
length = blocksize - length;
iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
page = grab_cache_page(mapping, index);
if (!page)
return -ENOMEM;
folio = filemap_grab_folio(mapping, index);
if (IS_ERR(folio))
return PTR_ERR(folio);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
bh = folio_buffers(folio);
if (!bh) {
folio_create_empty_buffers(folio, blocksize, 0);
bh = folio_buffers(folio);
}
/* Find the buffer that contains "offset" */
bh = page_buffers(page);
offset = offset_in_folio(folio, from);
pos = blocksize;
while (offset >= pos) {
bh = bh->b_this_page;
@@ -2649,7 +2645,7 @@ int block_truncate_page(struct address_space *mapping,
}
/* Ok, it's mapped. Make sure it's up-to-date */
if (PageUptodate(page))
if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
@@ -2659,12 +2655,12 @@ int block_truncate_page(struct address_space *mapping,
goto unlock;
}
zero_user(page, offset, length);
folio_zero_range(folio, offset, length);
mark_buffer_dirty(bh);
unlock:
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
return err;
}
@@ -2676,33 +2672,32 @@ EXPORT_SYMBOL(block_truncate_page);
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
struct folio *folio = page_folio(page);
struct inode * const inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
/* Is the page fully inside i_size? */
if (page->index < end_index)
return __block_write_full_page(inode, page, get_block, wbc,
/* Is the folio fully inside i_size? */
if (folio_pos(folio) + folio_size(folio) <= i_size)
return __block_write_full_folio(inode, folio, get_block, wbc,
end_buffer_async_write);
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
unlock_page(page);
/* Is the folio fully outside i_size? (truncate in progress) */
if (folio_pos(folio) >= i_size) {
folio_unlock(folio);
return 0; /* don't care */
}
/*
* The page straddles i_size. It must be zeroed out on each and every
* The folio straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_SIZE);
return __block_write_full_page(inode, page, get_block, wbc,
end_buffer_async_write);
folio_zero_segment(folio, offset_in_folio(folio, i_size),
folio_size(folio));
return __block_write_full_folio(inode, folio, get_block, wbc,
end_buffer_async_write);
}
EXPORT_SYMBOL(block_write_full_page);