Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe: - NVMe pull request via Keith: - Various cleanups all around (Irvin, Chaitanya, Christophe) - Better struct packing (Christophe JAILLET) - Reduce controller error logs for optional commands (Keith) - Support for >=64KiB block sizes (Daniel Gomez) - Fabrics fixes and code organization (Max, Chaitanya, Daniel Wagner) - bcache updates via Coly: - Fix a race at init time (Mingzhe Zou) - Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye) - use page pinning in the block layer for dio (David) - convert old block dio code to page pinning (David, Christoph) - cleanups for pktcdvd (Andy) - cleanups for rnbd (Guoqing) - use the unchecked __bio_add_page() for the initial single page additions (Johannes) - fix overflows in the Amiga partition handling code (Michael) - improve mq-deadline zoned device support (Bart) - keep passthrough requests out of the IO schedulers (Christoph, Ming) - improve support for flush requests, making them less special to deal with (Christoph) - add bdev holder ops and shutdown methods (Christoph) - fix the name_to_dev_t() situation and use cases (Christoph) - decouple the block open flags from fmode_t (Christoph) - ublk updates and cleanups, including adding user copy support (Ming) - BFQ sanity checking (Bart) - convert brd from radix to xarray (Pankaj) - constify various structures (Thomas, Ivan) - more fine grained persistent reservation ioctl capability checks (Jingbo) - misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan, Jordy, Li, Min, Yu, Zhong, Waiman) * tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits) scsi/sg: don't grab scsi host module reference ext4: Fix warning in blkdev_put() block: don't return -EINVAL for not found names in devt_from_devname cdrom: Fix spectre-v1 gadget block: Improve kernel-doc headers blk-mq: don't insert passthrough request into sw queue bsg: make bsg_class a static const structure ublk: make ublk_chr_class a static const structure aoe: make aoe_class a static const structure block/rnbd: make all 'class' structures const block: fix the exclusive open mask in disk_scan_partitions block: add overflow checks for Amiga partition support block: change all __u32 annotations to __be32 in affs_hardblocks.h block: fix signed int overflow in Amiga partition support block: add capacity validation in bdev_add_partition() block: fine-granular CAP_SYS_ADMIN for Persistent Reservation block: disallow Persistent Reservation on partitions reiserfs: fix blkdev_put() warning from release_journal_dev() block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions() block: document the holder argument to blkdev_get_by_path ...
2025-07-23 04:33:26 +02:00 · 2023-06-26 12:47:20 -07:00
parent 0aa69d53ac fcaa174a9c
commit a0433f8cae
218 changed files with 4750 additions and 3887 deletions
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -51,7 +51,8 @@ static inline void sanity_check_pinned_pages(struct page **pages,
 		struct page *page = *pages;
 		struct folio *folio = page_folio(page);

-		if (!folio_test_anon(folio))
+		if (is_zero_page(page) ||
+		    !folio_test_anon(folio))
 			continue;
 		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
 			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
@@ -131,6 +132,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 	else if (flags & FOLL_PIN) {
 		struct folio *folio;

+		/*
+		 * Don't take a pin on the zero page - it's not going anywhere
+		 * and it is used in a *lot* of places.
+		 */
+		if (is_zero_page(page))
+			return page_folio(page);
+
 		/*
 		 * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
 		 * right zone, so fail and let the caller fall back to the slow
@@ -180,6 +188,8 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
 {
 	if (flags & FOLL_PIN) {
+		if (is_zero_folio(folio))
+			return;
 		node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
 		if (folio_test_large(folio))
 			atomic_sub(refs, &folio->_pincount);
@@ -224,6 +234,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
 	if (flags & FOLL_GET)
 		folio_ref_inc(folio);
 	else if (flags & FOLL_PIN) {
+		/*
+		 * Don't take a pin on the zero page - it's not going anywhere
+		 * and it is used in a *lot* of places.
+		 */
+		if (is_zero_page(page))
+			return 0;
+
 		/*
 		 * Similar to try_grab_folio(): be sure to *also*
 		 * increment the normal page refcount field at least once,
@@ -258,6 +275,33 @@ void unpin_user_page(struct page *page)
 }
 EXPORT_SYMBOL(unpin_user_page);

+/**
+ * folio_add_pin - Try to get an additional pin on a pinned folio
+ * @folio: The folio to be pinned
+ *
+ * Get an additional pin on a folio we already have a pin on.  Makes no change
+ * if the folio is a zero_page.
+ */
+void folio_add_pin(struct folio *folio)
+{
+	if (is_zero_folio(folio))
+		return;
+
+	/*
+	 * Similar to try_grab_folio(): be sure to *also* increment the normal
+	 * page refcount field at least once, so that the page really is
+	 * pinned.
+	 */
+	if (folio_test_large(folio)) {
+		WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
+		folio_ref_inc(folio);
+		atomic_inc(&folio->_pincount);
+	} else {
+		WARN_ON_ONCE(folio_ref_count(folio) < GUP_PIN_COUNTING_BIAS);
+		folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+	}
+}
+
 static inline struct folio *gup_folio_range_next(struct page *start,
 		unsigned long npages, unsigned long i, unsigned int *ntails)
 {
@@ -3079,6 +3123,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
 *
 * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
 * see Documentation/core-api/pin_user_pages.rst for further details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page() will not remove pins from it.
 */
 int pin_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages)
@@ -3110,6 +3157,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
 *
 * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
 * see Documentation/core-api/pin_user_pages.rst for details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
 */
 long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
@@ -3143,6 +3193,9 @@ EXPORT_SYMBOL(pin_user_pages_remote);
 *
 * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
 * see Documentation/core-api/pin_user_pages.rst for details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
 */
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages,
@@ -3161,6 +3214,9 @@ EXPORT_SYMBOL(pin_user_pages);
 * pin_user_pages_unlocked() is the FOLL_PIN variant of
 * get_user_pages_unlocked(). Behavior is the same, except that this one sets
 * FOLL_PIN and rejects FOLL_GET.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
 */
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 			     struct page **pages, unsigned int gup_flags)
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -338,7 +338,7 @@ static void swap_writepage_bdev_sync(struct page *page,
 	bio_init(&bio, sis->bdev, &bv, 1,
 		 REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
 	bio.bi_iter.bi_sector = swap_page_sector(page);
-	bio_add_page(&bio, page, thp_size(page), 0);
+	__bio_add_page(&bio, page, thp_size(page), 0);

 	bio_associate_blkg_from_page(&bio, page);
 	count_swpout_vm_event(page);
@@ -360,7 +360,7 @@ static void swap_writepage_bdev_async(struct page *page,
 			GFP_NOIO);
 	bio->bi_iter.bi_sector = swap_page_sector(page);
 	bio->bi_end_io = end_swap_bio_write;
-	bio_add_page(bio, page, thp_size(page), 0);
+	__bio_add_page(bio, page, thp_size(page), 0);

 	bio_associate_blkg_from_page(bio, page);
 	count_swpout_vm_event(page);
@@ -468,7 +468,7 @@ static void swap_readpage_bdev_sync(struct page *page,

 	bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
 	bio.bi_iter.bi_sector = swap_page_sector(page);
-	bio_add_page(&bio, page, thp_size(page), 0);
+	__bio_add_page(&bio, page, thp_size(page), 0);
 	/*
 	 * Keep this task valid during swap readpage because the oom killer may
 	 * attempt to access it in the page fault retry time check.
@@ -488,7 +488,7 @@ static void swap_readpage_bdev_async(struct page *page,
 	bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
 	bio->bi_iter.bi_sector = swap_page_sector(page);
 	bio->bi_end_io = end_swap_bio_read;
-	bio_add_page(bio, page, thp_size(page), 0);
+	__bio_add_page(bio, page, thp_size(page), 0);
 	count_vm_event(PSWPIN);
 	submit_bio(bio);
 }
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		struct block_device *bdev = I_BDEV(inode);

 		set_blocksize(bdev, old_block_size);
-		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(bdev, p);
 	}

 	inode_lock(inode);
@@ -2770,7 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)

 	if (S_ISBLK(inode->i_mode)) {
 		p->bdev = blkdev_get_by_dev(inode->i_rdev,
-				   FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
+				BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
 		if (IS_ERR(p->bdev)) {
 			error = PTR_ERR(p->bdev);
 			p->bdev = NULL;
@@ -3221,7 +3221,7 @@ bad_swap:
 	p->cluster_next_cpu = NULL;
 	if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
 		set_blocksize(p->bdev, p->old_block_size);
-		blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(p->bdev, p);
 	}
 	inode = NULL;
 	destroy_swap_extents(p);