mirror of
https://github.com/tbsdtv/linux_media.git
synced 2025-07-23 04:33:26 +02:00
Merge tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: - Clean-ups in the READ path in anticipation of MSG_SPLICE_PAGES - Better NUMA awareness when allocating pages and other objects - A number of minor clean-ups to XDR encoding - Elimination of a race when accepting a TCP socket - Numerous observability enhancements * tag 'nfsd-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (46 commits) nfsd: remove redundant assignments to variable len svcrdma: Fix stale comment NFSD: Distinguish per-net namespace initialization nfsd: move init of percpu reply_cache_stats counters back to nfsd_init_net SUNRPC: Address RCU warning in net/sunrpc/svc.c SUNRPC: Use sysfs_emit in place of strlcpy/sprintf SUNRPC: Remove transport class dprintk call sites SUNRPC: Fix comments for transport class registration svcrdma: Remove an unused argument from __svc_rdma_put_rw_ctxt() svcrdma: trace cc_release calls svcrdma: Convert "might sleep" comment into a code annotation NFSD: Add an nfsd4_encode_nfstime4() helper SUNRPC: Move initialization of rq_stime SUNRPC: Optimize page release in svc_rdma_sendto() svcrdma: Prevent page release when nothing was received svcrdma: Revert2a1e4f21d8
("svcrdma: Normalize Send page handling") SUNRPC: Revert579900670a
("svcrdma: Remove unused sc_pages field") SUNRPC: Revertcc93ce9529
("svcrdma: Retain the page backing rq_res.head[0].iov_base") NFSD: add encoding of op_recall flag for write delegation NFSD: Add "official" reviewers for this subsystem ...
This commit is contained in:
@@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
|
||||
switch (*ip)
|
||||
{
|
||||
case SVC_POOL_AUTO:
|
||||
return strlcpy(buf, "auto\n", 20);
|
||||
return sysfs_emit(buf, "auto\n");
|
||||
case SVC_POOL_GLOBAL:
|
||||
return strlcpy(buf, "global\n", 20);
|
||||
return sysfs_emit(buf, "global\n");
|
||||
case SVC_POOL_PERCPU:
|
||||
return strlcpy(buf, "percpu\n", 20);
|
||||
return sysfs_emit(buf, "percpu\n");
|
||||
case SVC_POOL_PERNODE:
|
||||
return strlcpy(buf, "pernode\n", 20);
|
||||
return sysfs_emit(buf, "pernode\n");
|
||||
default:
|
||||
return sprintf(buf, "%d\n", *ip);
|
||||
return sysfs_emit(buf, "%d\n", *ip);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -597,34 +597,25 @@ svc_destroy(struct kref *ref)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(svc_destroy);
|
||||
|
||||
/*
|
||||
* Allocate an RPC server's buffer space.
|
||||
* We allocate pages and place them in rq_pages.
|
||||
*/
|
||||
static int
|
||||
static bool
|
||||
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
|
||||
{
|
||||
unsigned int pages, arghi;
|
||||
unsigned long pages, ret;
|
||||
|
||||
/* bc_xprt uses fore channel allocated buffers */
|
||||
if (svc_is_backchannel(rqstp))
|
||||
return 1;
|
||||
return true;
|
||||
|
||||
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
|
||||
* We assume one is at most one page
|
||||
*/
|
||||
arghi = 0;
|
||||
WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
|
||||
if (pages > RPCSVC_MAXPAGES)
|
||||
pages = RPCSVC_MAXPAGES;
|
||||
while (pages) {
|
||||
struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
|
||||
if (!p)
|
||||
break;
|
||||
rqstp->rq_pages[arghi++] = p;
|
||||
pages--;
|
||||
}
|
||||
return pages == 0;
|
||||
|
||||
ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
|
||||
rqstp->rq_pages);
|
||||
return ret == pages;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
|
||||
*/
|
||||
static void svc_unregister(const struct svc_serv *serv, struct net *net)
|
||||
{
|
||||
struct sighand_struct *sighand;
|
||||
struct svc_program *progp;
|
||||
unsigned long flags;
|
||||
unsigned int i;
|
||||
@@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock_irqsave(¤t->sighand->siglock, flags);
|
||||
rcu_read_lock();
|
||||
sighand = rcu_dereference(current->sighand);
|
||||
spin_lock_irqsave(&sighand->siglock, flags);
|
||||
recalc_sigpending();
|
||||
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
|
||||
spin_unlock_irqrestore(&sighand->siglock, flags);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list);
|
||||
* that no other thread will be using the transport or will
|
||||
* try to set XPT_DEAD.
|
||||
*/
|
||||
|
||||
/**
|
||||
* svc_reg_xprt_class - Register a server-side RPC transport class
|
||||
* @xcl: New transport class to be registered
|
||||
*
|
||||
* Returns zero on success; otherwise a negative errno is returned.
|
||||
*/
|
||||
int svc_reg_xprt_class(struct svc_xprt_class *xcl)
|
||||
{
|
||||
struct svc_xprt_class *cl;
|
||||
int res = -EEXIST;
|
||||
|
||||
dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
|
||||
|
||||
INIT_LIST_HEAD(&xcl->xcl_list);
|
||||
spin_lock(&svc_xprt_class_lock);
|
||||
/* Make sure there isn't already a class with the same name */
|
||||
@@ -96,9 +101,13 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
|
||||
|
||||
/**
|
||||
* svc_unreg_xprt_class - Unregister a server-side RPC transport class
|
||||
* @xcl: Transport class to be unregistered
|
||||
*
|
||||
*/
|
||||
void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
|
||||
{
|
||||
dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
|
||||
spin_lock(&svc_xprt_class_lock);
|
||||
list_del_init(&xcl->xcl_list);
|
||||
spin_unlock(&svc_xprt_class_lock);
|
||||
@@ -685,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
|
||||
}
|
||||
|
||||
for (filled = 0; filled < pages; filled = ret) {
|
||||
ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
|
||||
rqstp->rq_pages);
|
||||
ret = alloc_pages_bulk_array_node(GFP_KERNEL,
|
||||
rqstp->rq_pool->sp_id,
|
||||
pages, rqstp->rq_pages);
|
||||
if (ret > filled)
|
||||
/* Made progress, don't sleep yet */
|
||||
continue;
|
||||
@@ -843,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
|
||||
svc_xprt_received(xprt);
|
||||
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
|
||||
/* XPT_DATA|XPT_DEFERRED case: */
|
||||
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
|
||||
rqstp, rqstp->rq_pool->sp_id, xprt,
|
||||
kref_read(&xprt->xpt_ref));
|
||||
rqstp->rq_deferred = svc_deferred_dequeue(xprt);
|
||||
if (rqstp->rq_deferred)
|
||||
len = svc_deferred_recv(rqstp);
|
||||
else
|
||||
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
|
||||
rqstp->rq_stime = ktime_get();
|
||||
rqstp->rq_reserved = serv->sv_max_mesg;
|
||||
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
|
||||
} else
|
||||
@@ -894,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
|
||||
err = -EAGAIN;
|
||||
if (len <= 0)
|
||||
goto out_release;
|
||||
|
||||
trace_svc_xdr_recvfrom(&rqstp->rq_arg);
|
||||
|
||||
clear_bit(XPT_OLD, &xprt->xpt_flags);
|
||||
@@ -902,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
|
||||
|
||||
if (serv->sv_stats)
|
||||
serv->sv_stats->netcnt++;
|
||||
rqstp->rq_stime = ktime_get();
|
||||
return len;
|
||||
out_release:
|
||||
rqstp->rq_res.len = 0;
|
||||
|
@@ -826,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
|
||||
|
||||
trace_sk_data_ready(sk);
|
||||
|
||||
if (svsk) {
|
||||
/* Refer to svc_setup_socket() for details. */
|
||||
rmb();
|
||||
svsk->sk_odata(sk);
|
||||
}
|
||||
|
||||
/*
|
||||
* This callback may called twice when a new connection
|
||||
* is established as a child socket inherits everything
|
||||
@@ -840,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
|
||||
* when one of child sockets become ESTABLISHED.
|
||||
* 2) data_ready method of the child socket may be called
|
||||
* when it receives data before the socket is accepted.
|
||||
* In case of 2, we should ignore it silently.
|
||||
* In case of 2, we should ignore it silently and DO NOT
|
||||
* dereference svsk.
|
||||
*/
|
||||
if (sk->sk_state == TCP_LISTEN) {
|
||||
if (svsk) {
|
||||
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
|
||||
svc_xprt_enqueue(&svsk->sk_xprt);
|
||||
}
|
||||
if (sk->sk_state != TCP_LISTEN)
|
||||
return;
|
||||
|
||||
if (svsk) {
|
||||
/* Refer to svc_setup_socket() for details. */
|
||||
rmb();
|
||||
svsk->sk_odata(sk);
|
||||
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
|
||||
svc_xprt_enqueue(&svsk->sk_xprt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -887,13 +886,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
|
||||
clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
|
||||
err = kernel_accept(sock, &newsock, O_NONBLOCK);
|
||||
if (err < 0) {
|
||||
if (err == -ENOMEM)
|
||||
printk(KERN_WARNING "%s: no more sockets!\n",
|
||||
serv->sv_name);
|
||||
else if (err != -EAGAIN)
|
||||
net_warn_ratelimited("%s: accept failed (err %d)!\n",
|
||||
serv->sv_name, -err);
|
||||
trace_svcsock_accept_err(xprt, serv->sv_name, err);
|
||||
if (err != -EAGAIN)
|
||||
trace_svcsock_accept_err(xprt, serv->sv_name, err);
|
||||
return NULL;
|
||||
}
|
||||
if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL)))
|
||||
@@ -1464,7 +1458,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
|
||||
svsk->sk_owspace = inet->sk_write_space;
|
||||
/*
|
||||
* This barrier is necessary in order to prevent race condition
|
||||
* with svc_data_ready(), svc_listen_data_ready() and others
|
||||
* with svc_data_ready(), svc_tcp_listen_data_ready(), and others
|
||||
* when calling callbacks above.
|
||||
*/
|
||||
wmb();
|
||||
@@ -1476,7 +1470,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
|
||||
else
|
||||
svc_tcp_init(svsk, serv);
|
||||
|
||||
trace_svcsock_new_socket(sock);
|
||||
trace_svcsock_new(svsk, sock);
|
||||
return svsk;
|
||||
}
|
||||
|
||||
@@ -1657,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
|
||||
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
|
||||
struct socket *sock = svsk->sk_sock;
|
||||
|
||||
trace_svcsock_free(svsk, sock);
|
||||
|
||||
tls_handshake_cancel(sock->sk);
|
||||
if (sock->file)
|
||||
sockfd_put(sock);
|
||||
|
@@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_reserve_space);
|
||||
|
||||
|
||||
/**
|
||||
* xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
|
||||
* @xdr: pointer to xdr_stream
|
||||
* @vec: pointer to a kvec array
|
||||
* @nbytes: number of bytes to reserve
|
||||
*
|
||||
* Reserves enough buffer space to encode 'nbytes' of data and stores the
|
||||
* pointers in 'vec'. The size argument passed to xdr_reserve_space() is
|
||||
* determined based on the number of bytes remaining in the current page to
|
||||
* avoid invalidating iov_base pointers when xdr_commit_encode() is called.
|
||||
* The size argument passed to xdr_reserve_space() is determined based
|
||||
* on the number of bytes remaining in the current page to avoid
|
||||
* invalidating iov_base pointers when xdr_commit_encode() is called.
|
||||
*
|
||||
* Return values:
|
||||
* %0: success
|
||||
* %-EMSGSIZE: not enough space is available in @xdr
|
||||
*/
|
||||
int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes)
|
||||
int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
|
||||
{
|
||||
int thislen;
|
||||
int v = 0;
|
||||
size_t thislen;
|
||||
__be32 *p;
|
||||
|
||||
/*
|
||||
@@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
|
||||
xdr->end = xdr->p;
|
||||
}
|
||||
|
||||
/* XXX: Let's find a way to make this more efficient */
|
||||
while (nbytes) {
|
||||
thislen = xdr->buf->page_len % PAGE_SIZE;
|
||||
thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
|
||||
|
||||
p = xdr_reserve_space(xdr, thislen);
|
||||
if (!p)
|
||||
return -EIO;
|
||||
return -EMSGSIZE;
|
||||
|
||||
vec[v].iov_base = p;
|
||||
vec[v].iov_len = thislen;
|
||||
v++;
|
||||
nbytes -= thislen;
|
||||
}
|
||||
|
||||
return v;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);
|
||||
|
||||
|
@@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
|
||||
*/
|
||||
get_page(virt_to_page(rqst->rq_buffer));
|
||||
sctxt->sc_send_wr.opcode = IB_WR_SEND;
|
||||
ret = svc_rdma_send(rdma, sctxt);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = wait_for_completion_killable(&sctxt->sc_done);
|
||||
svc_rdma_send_ctxt_put(rdma, sctxt);
|
||||
return ret;
|
||||
return svc_rdma_send(rdma, sctxt);
|
||||
}
|
||||
|
||||
/* Server-side transport endpoint wants a whole page for its send
|
||||
|
@@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
|
||||
static struct svc_rdma_recv_ctxt *
|
||||
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
|
||||
{
|
||||
int node = ibdev_to_node(rdma->sc_cm_id->device);
|
||||
struct svc_rdma_recv_ctxt *ctxt;
|
||||
dma_addr_t addr;
|
||||
void *buffer;
|
||||
|
||||
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
|
||||
ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
|
||||
if (!ctxt)
|
||||
goto fail0;
|
||||
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
|
||||
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
|
||||
if (!buffer)
|
||||
goto fail1;
|
||||
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
|
||||
@@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
|
||||
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
|
||||
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
|
||||
ctxt->rc_recv_buf = buffer;
|
||||
ctxt->rc_temp = false;
|
||||
return ctxt;
|
||||
|
||||
fail2:
|
||||
@@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
|
||||
pcl_free(&ctxt->rc_write_pcl);
|
||||
pcl_free(&ctxt->rc_reply_pcl);
|
||||
|
||||
if (!ctxt->rc_temp)
|
||||
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
|
||||
else
|
||||
svc_rdma_recv_ctxt_destroy(rdma, ctxt);
|
||||
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
|
||||
}
|
||||
|
||||
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
|
||||
unsigned int wanted, bool temp)
|
||||
unsigned int wanted)
|
||||
{
|
||||
const struct ib_recv_wr *bad_wr = NULL;
|
||||
struct svc_rdma_recv_ctxt *ctxt;
|
||||
@@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
|
||||
break;
|
||||
|
||||
trace_svcrdma_post_recv(ctxt);
|
||||
ctxt->rc_temp = temp;
|
||||
ctxt->rc_recv_wr.next = recv_chain;
|
||||
recv_chain = &ctxt->rc_recv_wr;
|
||||
rdma->sc_pending_recvs++;
|
||||
@@ -309,7 +305,7 @@ err_free:
|
||||
*/
|
||||
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
|
||||
{
|
||||
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
|
||||
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
|
||||
* client reconnects.
|
||||
*/
|
||||
if (rdma->sc_pending_recvs < rdma->sc_max_requests)
|
||||
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
|
||||
if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
|
||||
goto dropped;
|
||||
|
||||
/* All wc fields are now known to be valid */
|
||||
@@ -775,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
|
||||
*
|
||||
* The next ctxt is removed from the "receive" lists.
|
||||
*
|
||||
* - If the ctxt completes a Read, then finish assembling the Call
|
||||
* message and return the number of bytes in the message.
|
||||
*
|
||||
* - If the ctxt completes a Receive, then construct the Call
|
||||
* message from the contents of the Receive buffer.
|
||||
*
|
||||
@@ -786,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
|
||||
* in the message.
|
||||
*
|
||||
* - If there are Read chunks in this message, post Read WRs to
|
||||
* pull that payload and return 0.
|
||||
* pull that payload. When the Read WRs complete, build the
|
||||
* full message and return the number of bytes in it.
|
||||
*/
|
||||
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
|
||||
{
|
||||
@@ -796,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
|
||||
struct svc_rdma_recv_ctxt *ctxt;
|
||||
int ret;
|
||||
|
||||
/* Prevent svc_xprt_release() from releasing pages in rq_pages
|
||||
* when returning 0 or an error.
|
||||
*/
|
||||
rqstp->rq_respages = rqstp->rq_pages;
|
||||
rqstp->rq_next_page = rqstp->rq_respages;
|
||||
|
||||
rqstp->rq_xprt_ctxt = NULL;
|
||||
|
||||
ctxt = NULL;
|
||||
@@ -819,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
|
||||
DMA_FROM_DEVICE);
|
||||
svc_rdma_build_arg_xdr(rqstp, ctxt);
|
||||
|
||||
/* Prevent svc_xprt_release from releasing pages in rq_pages
|
||||
* if we return 0 or an error.
|
||||
*/
|
||||
rqstp->rq_respages = rqstp->rq_pages;
|
||||
rqstp->rq_next_page = rqstp->rq_respages;
|
||||
|
||||
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
|
||||
if (ret < 0)
|
||||
goto out_err;
|
||||
|
@@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
|
||||
if (node) {
|
||||
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
|
||||
} else {
|
||||
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
|
||||
GFP_KERNEL);
|
||||
ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
|
||||
GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
|
||||
if (!ctxt)
|
||||
goto out_noctx;
|
||||
|
||||
@@ -84,8 +84,7 @@ out_noctx:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_rw_ctxt *ctxt,
|
||||
static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
|
||||
struct llist_head *list)
|
||||
{
|
||||
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
|
||||
@@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
|
||||
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_rw_ctxt *ctxt)
|
||||
{
|
||||
__svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
|
||||
__svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
|
||||
struct svc_rdma_rw_ctxt *ctxt;
|
||||
LLIST_HEAD(free);
|
||||
|
||||
trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
|
||||
|
||||
first = last = NULL;
|
||||
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
|
||||
list_del(&ctxt->rw_list);
|
||||
@@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
|
||||
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
|
||||
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
|
||||
ctxt->rw_nents, dir);
|
||||
__svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
|
||||
__svc_rdma_put_rw_ctxt(ctxt, &free);
|
||||
|
||||
ctxt->rw_node.next = first;
|
||||
first = &ctxt->rw_node;
|
||||
@@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
|
||||
{
|
||||
struct svc_rdma_write_info *info;
|
||||
|
||||
info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||
info = kmalloc_node(sizeof(*info), GFP_KERNEL,
|
||||
ibdev_to_node(rdma->sc_cm_id->device));
|
||||
if (!info)
|
||||
return info;
|
||||
|
||||
@@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
|
||||
{
|
||||
struct svc_rdma_read_info *info;
|
||||
|
||||
info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||
info = kmalloc_node(sizeof(*info), GFP_KERNEL,
|
||||
ibdev_to_node(rdma->sc_cm_id->device));
|
||||
if (!info)
|
||||
return info;
|
||||
|
||||
@@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
|
||||
return;
|
||||
}
|
||||
|
||||
/* This function sleeps when the transport's Send Queue is congested.
|
||||
*
|
||||
/*
|
||||
* Assumptions:
|
||||
* - If ib_post_send() succeeds, only one completion is expected,
|
||||
* even if one or more WRs are flushed. This is true when posting
|
||||
@@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
|
||||
struct ib_cqe *cqe;
|
||||
int ret;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (cc->cc_sqecount > rdma->sc_sq_depth)
|
||||
return -EINVAL;
|
||||
|
||||
|
@@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
|
||||
static struct svc_rdma_send_ctxt *
|
||||
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
|
||||
{
|
||||
int node = ibdev_to_node(rdma->sc_cm_id->device);
|
||||
struct svc_rdma_send_ctxt *ctxt;
|
||||
dma_addr_t addr;
|
||||
void *buffer;
|
||||
size_t size;
|
||||
int i;
|
||||
|
||||
size = sizeof(*ctxt);
|
||||
size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
|
||||
ctxt = kmalloc(size, GFP_KERNEL);
|
||||
ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
|
||||
GFP_KERNEL, node);
|
||||
if (!ctxt)
|
||||
goto fail0;
|
||||
buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
|
||||
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
|
||||
if (!buffer)
|
||||
goto fail1;
|
||||
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
|
||||
@@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
|
||||
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
|
||||
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
|
||||
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
|
||||
init_completion(&ctxt->sc_done);
|
||||
ctxt->sc_cqe.done = svc_rdma_wc_send;
|
||||
ctxt->sc_xprt_buf = buffer;
|
||||
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
|
||||
@@ -214,6 +212,7 @@ out:
|
||||
|
||||
ctxt->sc_send_wr.num_sge = 0;
|
||||
ctxt->sc_cur_sge_no = 0;
|
||||
ctxt->sc_page_count = 0;
|
||||
return ctxt;
|
||||
|
||||
out_empty:
|
||||
@@ -228,6 +227,8 @@ out_empty:
|
||||
* svc_rdma_send_ctxt_put - Return send_ctxt to free list
|
||||
* @rdma: controlling svcxprt_rdma
|
||||
* @ctxt: object to return to the free list
|
||||
*
|
||||
* Pages left in sc_pages are DMA unmapped and released.
|
||||
*/
|
||||
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_send_ctxt *ctxt)
|
||||
@@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
|
||||
struct ib_device *device = rdma->sc_cm_id->device;
|
||||
unsigned int i;
|
||||
|
||||
if (ctxt->sc_page_count)
|
||||
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
|
||||
|
||||
/* The first SGE contains the transport header, which
|
||||
* remains mapped until @ctxt is destroyed.
|
||||
*/
|
||||
@@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
|
||||
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
|
||||
|
||||
svc_rdma_wake_send_waiters(rdma, 1);
|
||||
complete(&ctxt->sc_done);
|
||||
|
||||
if (unlikely(wc->status != IB_WC_SUCCESS))
|
||||
goto flushed;
|
||||
|
||||
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
|
||||
svc_rdma_send_ctxt_put(rdma, ctxt);
|
||||
return;
|
||||
|
||||
flushed:
|
||||
@@ -294,6 +298,7 @@ flushed:
|
||||
trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
|
||||
else
|
||||
trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
|
||||
svc_rdma_send_ctxt_put(rdma, ctxt);
|
||||
svc_xprt_deferred_close(&rdma->sc_xprt);
|
||||
}
|
||||
|
||||
@@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
|
||||
struct ib_send_wr *wr = &ctxt->sc_send_wr;
|
||||
int ret;
|
||||
|
||||
reinit_completion(&ctxt->sc_done);
|
||||
might_sleep();
|
||||
|
||||
/* Sync the transport header buffer */
|
||||
ib_dma_sync_single_for_device(rdma->sc_pd->device,
|
||||
@@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
|
||||
svc_rdma_xb_dma_map, &args);
|
||||
}
|
||||
|
||||
/* The svc_rqst and all resources it owns are released as soon as
|
||||
* svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
|
||||
* so they are released by the Send completion handler.
|
||||
*/
|
||||
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
|
||||
struct svc_rdma_send_ctxt *ctxt)
|
||||
{
|
||||
int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
|
||||
|
||||
ctxt->sc_page_count += pages;
|
||||
for (i = 0; i < pages; i++) {
|
||||
ctxt->sc_pages[i] = rqstp->rq_respages[i];
|
||||
rqstp->rq_respages[i] = NULL;
|
||||
}
|
||||
|
||||
/* Prevent svc_xprt_release from releasing pages in rq_pages */
|
||||
rqstp->rq_next_page = rqstp->rq_respages;
|
||||
}
|
||||
|
||||
/* Prepare the portion of the RPC Reply that will be transmitted
|
||||
* via RDMA Send. The RPC-over-RDMA transport header is prepared
|
||||
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
|
||||
@@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
svc_rdma_save_io_pages(rqstp, sctxt);
|
||||
|
||||
if (rctxt->rc_inv_rkey) {
|
||||
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
|
||||
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
|
||||
@@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
|
||||
sctxt->sc_send_wr.opcode = IB_WR_SEND;
|
||||
}
|
||||
|
||||
ret = svc_rdma_send(rdma, sctxt);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = wait_for_completion_killable(&sctxt->sc_done);
|
||||
svc_rdma_send_ctxt_put(rdma, sctxt);
|
||||
return ret;
|
||||
return svc_rdma_send(rdma, sctxt);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
|
||||
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
|
||||
if (svc_rdma_send(rdma, sctxt))
|
||||
goto put_ctxt;
|
||||
|
||||
wait_for_completion_killable(&sctxt->sc_done);
|
||||
return;
|
||||
|
||||
put_ctxt:
|
||||
svc_rdma_send_ctxt_put(rdma, sctxt);
|
||||
@@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
|
||||
if (ret < 0)
|
||||
goto put_ctxt;
|
||||
|
||||
/* Prevent svc_xprt_release() from releasing the page backing
|
||||
* rq_res.head[0].iov_base. It's no longer being accessed by
|
||||
* the I/O device. */
|
||||
rqstp->rq_respages++;
|
||||
return 0;
|
||||
|
||||
reply_chunk:
|
||||
if (ret != -E2BIG && ret != -EINVAL)
|
||||
goto put_ctxt;
|
||||
|
||||
/* Send completion releases payload pages that were part
|
||||
* of previously posted RDMA Writes.
|
||||
*/
|
||||
svc_rdma_save_io_pages(rqstp, sctxt);
|
||||
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
|
||||
return 0;
|
||||
|
||||
|
@@ -64,7 +64,7 @@
|
||||
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
|
||||
|
||||
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
|
||||
struct net *net);
|
||||
struct net *net, int node);
|
||||
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
|
||||
struct net *net,
|
||||
struct sockaddr *sa, int salen,
|
||||
@@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context)
|
||||
}
|
||||
|
||||
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
|
||||
struct net *net)
|
||||
struct net *net, int node)
|
||||
{
|
||||
struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
|
||||
struct svcxprt_rdma *cma_xprt;
|
||||
|
||||
if (!cma_xprt) {
|
||||
dprintk("svcrdma: failed to create new transport\n");
|
||||
cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
|
||||
if (!cma_xprt)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
|
||||
@@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
|
||||
struct svcxprt_rdma *newxprt;
|
||||
struct sockaddr *sa;
|
||||
|
||||
/* Create a new transport */
|
||||
newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
|
||||
listen_xprt->sc_xprt.xpt_net);
|
||||
listen_xprt->sc_xprt.xpt_net,
|
||||
ibdev_to_node(new_cma_id->device));
|
||||
if (!newxprt)
|
||||
return;
|
||||
newxprt->sc_cm_id = new_cma_id;
|
||||
@@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
|
||||
|
||||
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
|
||||
return ERR_PTR(-EAFNOSUPPORT);
|
||||
cma_xprt = svc_rdma_create_xprt(serv, net);
|
||||
cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
|
||||
if (!cma_xprt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
|
||||
|
Reference in New Issue
Block a user