Merge tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe:

 - NVMe pull requests via Christoph:
      - Support some passthrough commands without CAP_SYS_ADMIN (Kanchan
        Joshi)
      - Refactor PCIe probing and reset (Christoph Hellwig)
      - Various fabrics authentication fixes and improvements (Sagi
        Grimberg)
      - Avoid fallback to sequential scan due to transient issues (Uday
        Shankar)
      - Implement support for the DEAC bit in Write Zeroes (Christoph
        Hellwig)
      - Allow overriding the IEEE OUI and firmware revision in configfs
        for nvmet (Aleksandr Miloserdov)
      - Force reconnect when number of queue changes in nvmet (Daniel
        Wagner)
      - Minor fixes and improvements (Uros Bizjak, Joel Granados, Sagi
        Grimberg, Christoph Hellwig, Christophe JAILLET)
      - Fix and cleanup nvme-fc req allocation (Chaitanya Kulkarni)
      - Use the common tagset helpers in nvme-pci driver (Christoph
        Hellwig)
      - Cleanup the nvme-pci removal path (Christoph Hellwig)
      - Use kstrtobool() instead of strtobool (Christophe JAILLET)
      - Allow unprivileged passthrough of Identify Controller (Joel
        Granados)
      - Support io stats on the mpath device (Sagi Grimberg)
      - Minor nvmet cleanup (Sagi Grimberg)

 - MD pull requests via Song:
      - Code cleanups (Christoph)
      - Various fixes

 - Floppy pull request from Denis:
      - Fix a memory leak in the init error path (Yuan)

 - Series fixing some batch wakeup issues with sbitmap (Gabriel)

 - Removal of the pktcdvd driver that was deprecated more than 5 years
   ago, and subsequent removal of the devnode callback in struct
   block_device_operations as no users are now left (Greg)

 - Fix for partition read on an exclusively opened bdev (Jan)

 - Series of elevator API cleanups (Jinlong, Christoph)

 - Series of fixes and cleanups for blk-iocost (Kemeng)

 - Series of fixes and cleanups for blk-throttle (Kemeng)

 - Series adding concurrent support for sync queues in BFQ (Yu)

 - Series bringing drbd a bit closer to the out-of-tree maintained
   version (Christian, Joel, Lars, Philipp)

 - Misc drbd fixes (Wang)

 - blk-wbt fixes and tweaks for enable/disable (Yu)

 - Fixes for mq-deadline for zoned devices (Damien)

 - Add support for read-only and offline zones for null_blk
   (Shin'ichiro)

 - Series fixing the delayed holder tracking, as used by DM (Yu,
   Christoph)

 - Series enabling bio alloc caching for IRQ based IO (Pavel)

 - Series enabling userspace peer-to-peer DMA (Logan)

 - BFQ waker fixes (Khazhismel)

 - Series fixing elevator refcount issues (Christoph, Jinlong)

 - Series cleaning up references around queue destruction (Christoph)

 - Series doing quiesce by tagset, enabling cleanups in drivers
   (Christoph, Chao)

 - Series untangling the queue kobject and queue references (Christoph)

 - Misc fixes and cleanups (Bart, David, Dawei, Jinlong, Kemeng, Ye,
   Yang, Waiman, Shin'ichiro, Randy, Pankaj, Christoph)

* tag 'for-6.2/block-2022-12-08' of git://git.kernel.dk/linux: (247 commits)
  blktrace: Fix output non-blktrace event when blk_classic option enabled
  block: sed-opal: Don't include <linux/kernel.h>
  sed-opal: allow using IOC_OPAL_SAVE for locking too
  blk-cgroup: Fix typo in comment
  block: remove bio_set_op_attrs
  nvmet: don't open-code NVME_NS_ATTR_RO enumeration
  nvme-pci: use the tagset alloc/free helpers
  nvme: add the Apple shared tag workaround to nvme_alloc_io_tag_set
  nvme: only set reserved_tags in nvme_alloc_io_tag_set for fabrics controllers
  nvme: consolidate setting the tagset flags
  nvme: pass nr_maps explicitly to nvme_alloc_io_tag_set
  block: bio_copy_data_iter
  nvme-pci: split out a nvme_pci_ctrl_is_dead helper
  nvme-pci: return early on ctrl state mismatch in nvme_reset_work
  nvme-pci: rename nvme_disable_io_queues
  nvme-pci: cleanup nvme_suspend_queue
  nvme-pci: remove nvme_pci_disable
  nvme-pci: remove nvme_disable_admin_queue
  nvme: merge nvme_shutdown_ctrl into nvme_disable_ctrl
  nvme: use nvme_wait_ready in nvme_shutdown_ctrl
  ...
This commit is contained in:
Linus Torvalds
2022-12-13 10:43:59 -08:00
144 changed files with 3235 additions and 5913 deletions

View File

@@ -434,6 +434,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
atomic_set(&sbq->wake_index, 0);
atomic_set(&sbq->ws_active, 0);
atomic_set(&sbq->completion_cnt, 0);
atomic_set(&sbq->wakeup_cnt, 0);
sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
if (!sbq->ws) {
@@ -441,40 +443,21 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
return -ENOMEM;
}
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
init_waitqueue_head(&sbq->ws[i].wait);
atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
}
return 0;
}
EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int wake_batch)
{
int i;
if (sbq->wake_batch != wake_batch) {
WRITE_ONCE(sbq->wake_batch, wake_batch);
/*
* Pairs with the memory barrier in sbitmap_queue_wake_up()
* to ensure that the batch size is updated before the wait
* counts.
*/
smp_mb();
for (i = 0; i < SBQ_WAIT_QUEUES; i++)
atomic_set(&sbq->ws[i].wait_cnt, 1);
}
}
static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
unsigned int depth)
{
unsigned int wake_batch;
wake_batch = sbq_calc_wake_batch(sbq, depth);
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
if (sbq->wake_batch != wake_batch)
WRITE_ONCE(sbq->wake_batch, wake_batch);
}
void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
@@ -488,7 +471,8 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES,
min_batch, SBQ_WAKE_BATCH);
__sbitmap_queue_update_wake_batch(sbq, wake_batch);
WRITE_ONCE(sbq->wake_batch, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
@@ -576,106 +560,56 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
}
EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
int i, wake_index;
if (!atomic_read(&sbq->ws_active))
return NULL;
return;
wake_index = atomic_read(&sbq->wake_index);
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[wake_index];
if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) {
if (wake_index != atomic_read(&sbq->wake_index))
atomic_set(&sbq->wake_index, wake_index);
return ws;
}
/*
* Advance the index before checking the current queue.
* It improves fairness, by ensuring the queue doesn't
* need to be fully emptied before trying to wake up
* from the next one.
*/
wake_index = sbq_index_inc(wake_index);
/*
* It is sufficient to wake up at least one waiter to
* guarantee forward progress.
*/
if (waitqueue_active(&ws->wait) &&
wake_up_nr(&ws->wait, nr))
break;
}
return NULL;
}
static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr)
{
struct sbq_wait_state *ws;
unsigned int wake_batch;
int wait_cnt, cur, sub;
bool ret;
if (*nr <= 0)
return false;
ws = sbq_wake_ptr(sbq);
if (!ws)
return false;
cur = atomic_read(&ws->wait_cnt);
do {
/*
* For concurrent callers of this, callers should call this
* function again to wakeup a new batch on a different 'ws'.
*/
if (cur == 0)
return true;
sub = min(*nr, cur);
wait_cnt = cur - sub;
} while (!atomic_try_cmpxchg(&ws->wait_cnt, &cur, wait_cnt));
/*
* If we decremented queue without waiters, retry to avoid lost
* wakeups.
*/
if (wait_cnt > 0)
return !waitqueue_active(&ws->wait);
*nr -= sub;
/*
* When wait_cnt == 0, we have to be particularly careful as we are
* responsible to reset wait_cnt regardless whether we've actually
* woken up anybody. But in case we didn't wakeup anybody, we still
* need to retry.
*/
ret = !waitqueue_active(&ws->wait);
wake_batch = READ_ONCE(sbq->wake_batch);
/*
* Wake up first in case that concurrent callers decrease wait_cnt
* while waitqueue is empty.
*/
wake_up_nr(&ws->wait, wake_batch);
/*
* Pairs with the memory barrier in sbitmap_queue_resize() to
* ensure that we see the batch size update before the wait
* count is reset.
*
* Also pairs with the implicit barrier between decrementing wait_cnt
* and checking for waitqueue_active() to make sure waitqueue_active()
* sees result of the wakeup if atomic_dec_return() has seen the result
* of atomic_set().
*/
smp_mb__before_atomic();
/*
* Increase wake_index before updating wait_cnt, otherwise concurrent
* callers can see valid wait_cnt in old waitqueue, which can cause
* invalid wakeup on the old waitqueue.
*/
sbq_index_atomic_inc(&sbq->wake_index);
atomic_set(&ws->wait_cnt, wake_batch);
return ret || *nr;
if (wake_index != atomic_read(&sbq->wake_index))
atomic_set(&sbq->wake_index, wake_index);
}
void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
while (__sbq_wake_up(sbq, &nr))
;
unsigned int wake_batch = READ_ONCE(sbq->wake_batch);
unsigned int wakeups;
if (!atomic_read(&sbq->ws_active))
return;
atomic_add(nr, &sbq->completion_cnt);
wakeups = atomic_read(&sbq->wakeup_cnt);
do {
if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch)
return;
} while (!atomic_try_cmpxchg(&sbq->wakeup_cnt,
&wakeups, wakeups + wake_batch));
__sbitmap_queue_wake_up(sbq, wake_batch);
}
EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
@@ -792,9 +726,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
seq_puts(m, "ws={\n");
for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
struct sbq_wait_state *ws = &sbq->ws[i];
seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
atomic_read(&ws->wait_cnt),
seq_printf(m, "\t{.wait=%s},\n",
waitqueue_active(&ws->wait) ? "active" : "inactive");
}
seq_puts(m, "}\n");