Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe:

 - NVMe pull request via Keith:
      - Various cleanups all around (Irvin, Chaitanya, Christophe)
      - Better struct packing (Christophe JAILLET)
      - Reduce controller error logs for optional commands (Keith)
      - Support for >=64KiB block sizes (Daniel Gomez)
      - Fabrics fixes and code organization (Max, Chaitanya, Daniel
        Wagner)

 - bcache updates via Coly:
      - Fix a race at init time (Mingzhe Zou)
      - Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye)

 - use page pinning in the block layer for dio (David)

 - convert old block dio code to page pinning (David, Christoph)

 - cleanups for pktcdvd (Andy)

 - cleanups for rnbd (Guoqing)

 - use the unchecked __bio_add_page() for the initial single page
   additions (Johannes)

 - fix overflows in the Amiga partition handling code (Michael)

 - improve mq-deadline zoned device support (Bart)

 - keep passthrough requests out of the IO schedulers (Christoph, Ming)

 - improve support for flush requests, making them less special to deal
   with (Christoph)

 - add bdev holder ops and shutdown methods (Christoph)

 - fix the name_to_dev_t() situation and use cases (Christoph)

 - decouple the block open flags from fmode_t (Christoph)

 - ublk updates and cleanups, including adding user copy support (Ming)

 - BFQ sanity checking (Bart)

 - convert brd from radix to xarray (Pankaj)

 - constify various structures (Thomas, Ivan)

 - more fine grained persistent reservation ioctl capability checks
   (Jingbo)

 - misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan,
   Jordy, Li, Min, Yu, Zhong, Waiman)

* tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits)
  scsi/sg: don't grab scsi host module reference
  ext4: Fix warning in blkdev_put()
  block: don't return -EINVAL for not found names in devt_from_devname
  cdrom: Fix spectre-v1 gadget
  block: Improve kernel-doc headers
  blk-mq: don't insert passthrough request into sw queue
  bsg: make bsg_class a static const structure
  ublk: make ublk_chr_class a static const structure
  aoe: make aoe_class a static const structure
  block/rnbd: make all 'class' structures const
  block: fix the exclusive open mask in disk_scan_partitions
  block: add overflow checks for Amiga partition support
  block: change all __u32 annotations to __be32 in affs_hardblocks.h
  block: fix signed int overflow in Amiga partition support
  block: add capacity validation in bdev_add_partition()
  block: fine-granular CAP_SYS_ADMIN for Persistent Reservation
  block: disallow Persistent Reservation on partitions
  reiserfs: fix blkdev_put() warning from release_journal_dev()
  block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions()
  block: document the holder argument to blkdev_get_by_path
  ...
This commit is contained in:
Linus Torvalds
2023-06-26 12:47:20 -07:00
218 changed files with 4750 additions and 3887 deletions

View File

@@ -51,7 +51,8 @@ static inline void sanity_check_pinned_pages(struct page **pages,
struct page *page = *pages;
struct folio *folio = page_folio(page);
if (!folio_test_anon(folio))
if (is_zero_page(page) ||
!folio_test_anon(folio))
continue;
if (!folio_test_large(folio) || folio_test_hugetlb(folio))
VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
@@ -131,6 +132,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
else if (flags & FOLL_PIN) {
struct folio *folio;
/*
* Don't take a pin on the zero page - it's not going anywhere
* and it is used in a *lot* of places.
*/
if (is_zero_page(page))
return page_folio(page);
/*
* Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
* right zone, so fail and let the caller fall back to the slow
@@ -180,6 +188,8 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
if (flags & FOLL_PIN) {
if (is_zero_folio(folio))
return;
node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
if (folio_test_large(folio))
atomic_sub(refs, &folio->_pincount);
@@ -224,6 +234,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
if (flags & FOLL_GET)
folio_ref_inc(folio);
else if (flags & FOLL_PIN) {
/*
* Don't take a pin on the zero page - it's not going anywhere
* and it is used in a *lot* of places.
*/
if (is_zero_page(page))
return 0;
/*
* Similar to try_grab_folio(): be sure to *also*
* increment the normal page refcount field at least once,
@@ -258,6 +275,33 @@ void unpin_user_page(struct page *page)
}
EXPORT_SYMBOL(unpin_user_page);
/**
* folio_add_pin - Try to get an additional pin on a pinned folio
* @folio: The folio to be pinned
*
* Get an additional pin on a folio we already have a pin on. Makes no change
* if the folio is a zero_page.
*/
void folio_add_pin(struct folio *folio)
{
if (is_zero_folio(folio))
return;
/*
* Similar to try_grab_folio(): be sure to *also* increment the normal
* page refcount field at least once, so that the page really is
* pinned.
*/
if (folio_test_large(folio)) {
WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
folio_ref_inc(folio);
atomic_inc(&folio->_pincount);
} else {
WARN_ON_ONCE(folio_ref_count(folio) < GUP_PIN_COUNTING_BIAS);
folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
}
}
static inline struct folio *gup_folio_range_next(struct page *start,
unsigned long npages, unsigned long i, unsigned int *ntails)
{
@@ -3079,6 +3123,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for further details.
*
* Note that if a zero_page is amongst the returned pages, it will not have
* pins in it and unpin_user_page() will not remove pins from it.
*/
int pin_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
@@ -3110,6 +3157,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
*
* Note that if a zero_page is amongst the returned pages, it will not have
* pins in it and unpin_user_page*() will not remove pins from it.
*/
long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
@@ -3143,6 +3193,9 @@ EXPORT_SYMBOL(pin_user_pages_remote);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
*
* Note that if a zero_page is amongst the returned pages, it will not have
* pins in it and unpin_user_page*() will not remove pins from it.
*/
long pin_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
@@ -3161,6 +3214,9 @@ EXPORT_SYMBOL(pin_user_pages);
* pin_user_pages_unlocked() is the FOLL_PIN variant of
* get_user_pages_unlocked(). Behavior is the same, except that this one sets
* FOLL_PIN and rejects FOLL_GET.
*
* Note that if a zero_page is amongst the returned pages, it will not have
* pins in it and unpin_user_page*() will not remove pins from it.
*/
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags)

View File

@@ -338,7 +338,7 @@ static void swap_writepage_bdev_sync(struct page *page,
bio_init(&bio, sis->bdev, &bv, 1,
REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
bio.bi_iter.bi_sector = swap_page_sector(page);
bio_add_page(&bio, page, thp_size(page), 0);
__bio_add_page(&bio, page, thp_size(page), 0);
bio_associate_blkg_from_page(&bio, page);
count_swpout_vm_event(page);
@@ -360,7 +360,7 @@ static void swap_writepage_bdev_async(struct page *page,
GFP_NOIO);
bio->bi_iter.bi_sector = swap_page_sector(page);
bio->bi_end_io = end_swap_bio_write;
bio_add_page(bio, page, thp_size(page), 0);
__bio_add_page(bio, page, thp_size(page), 0);
bio_associate_blkg_from_page(bio, page);
count_swpout_vm_event(page);
@@ -468,7 +468,7 @@ static void swap_readpage_bdev_sync(struct page *page,
bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = swap_page_sector(page);
bio_add_page(&bio, page, thp_size(page), 0);
__bio_add_page(&bio, page, thp_size(page), 0);
/*
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
@@ -488,7 +488,7 @@ static void swap_readpage_bdev_async(struct page *page,
bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
bio->bi_iter.bi_sector = swap_page_sector(page);
bio->bi_end_io = end_swap_bio_read;
bio_add_page(bio, page, thp_size(page), 0);
__bio_add_page(bio, page, thp_size(page), 0);
count_vm_event(PSWPIN);
submit_bio(bio);
}

View File

@@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
struct block_device *bdev = I_BDEV(inode);
set_blocksize(bdev, old_block_size);
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
blkdev_put(bdev, p);
}
inode_lock(inode);
@@ -2770,7 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
if (S_ISBLK(inode->i_mode)) {
p->bdev = blkdev_get_by_dev(inode->i_rdev,
FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
if (IS_ERR(p->bdev)) {
error = PTR_ERR(p->bdev);
p->bdev = NULL;
@@ -3221,7 +3221,7 @@ bad_swap:
p->cluster_next_cpu = NULL;
if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
set_blocksize(p->bdev, p->old_block_size);
blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
blkdev_put(p->bdev, p);
}
inode = NULL;
destroy_swap_extents(p);