add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
ib_cm.o iw_cm.o $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
iw_cm-y := iwcm.o
rdma_cm-y := cma.o
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o

View File

@@ -0,0 +1,534 @@
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mutex.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/if_arp.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <net/netevent.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");
struct addr_req {
struct list_head list;
struct sockaddr_storage src_addr;
struct sockaddr_storage dst_addr;
struct rdma_dev_addr *addr;
struct rdma_addr_client *client;
void *context;
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
int status;
};
static void process_req(struct work_struct *work);
static DEFINE_MUTEX(lock);
static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);
static inline void put_client(struct rdma_addr_client *client)
{
if (atomic_dec_and_test(&client->refcount))
complete(&client->comp);
}
void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
put_client(client);
wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
switch (dev->type) {
case ARPHRD_INFINIBAND:
dev_addr->dev_type = RDMA_NODE_IB_CA;
break;
case ARPHRD_ETHER:
dev_addr->dev_type = RDMA_NODE_RNIC;
break;
default:
return -EADDRNOTAVAIL;
}
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->src_dev = dev;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
if (!dev)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
break;
}
}
break;
#endif
}
return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);
static void set_timeout(unsigned long time)
{
unsigned long delay;
cancel_delayed_work(&work);
delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
queue_delayed_work(addr_wq, &work, delay);
}
static void queue_req(struct addr_req *req)
{
struct addr_req *temp_req;
mutex_lock(&lock);
list_for_each_entry_reverse(temp_req, &req_list, list) {
if (time_after_eq(req->timeout, temp_req->timeout))
break;
}
list_add(&req->list, &temp_req->list);
if (req_list.next == &req->list)
set_timeout(req->timeout);
mutex_unlock(&lock);
}
static void addr_send_arp(struct sockaddr *dst_in)
{
struct rtable *rt;
struct flowi fl;
memset(&fl, 0, sizeof fl);
switch (dst_in->sa_family) {
case AF_INET:
fl.nl_u.ip4_u.daddr =
((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
if (ip_route_output_key(&init_net, &rt, &fl))
return;
neigh_event_send(rt->u.dst.neighbour, NULL);
ip_rt_put(rt);
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
{
struct dst_entry *dst;
fl.nl_u.ip6_u.daddr =
((struct sockaddr_in6 *) dst_in)->sin6_addr;
dst = ip6_route_output(&init_net, NULL, &fl);
if (!dst)
return;
neigh_event_send(dst->neighbour, NULL);
dst_release(dst);
break;
}
#endif
}
}
static int addr4_resolve_remote(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
struct flowi fl;
struct rtable *rt;
struct neighbour *neigh;
int ret;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
goto put;
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
if (!neigh) {
ret = -ENODATA;
goto put;
}
if (!(neigh->nud_state & NUD_VALID)) {
ret = -ENODATA;
goto release;
}
if (!src_ip) {
src_in->sin_family = dst_in->sin_family;
src_in->sin_addr.s_addr = rt->rt_src;
}
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
put:
ip_rt_put(rt);
out:
return ret;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
int ret = -ENODATA;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
dst = ip6_route_output(&init_net, NULL, &fl);
if (!dst)
return ret;
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
} else {
neigh = dst->neighbour;
if (neigh && (neigh->nud_state & NUD_VALID))
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
}
dst_release(dst);
return ret;
}
#else
static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
static int addr_resolve_remote(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
return addr4_resolve_remote((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
struct sockaddr *src_in, *dst_in;
struct list_head done_list;
INIT_LIST_HEAD(&done_list);
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve_remote(src_in, dst_in,
req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
continue;
}
list_move_tail(&req->list, &done_list);
}
if (!list_empty(&req_list)) {
req = list_entry(req_list.next, struct addr_req, list);
set_timeout(req->timeout);
}
mutex_unlock(&lock);
list_for_each_entry_safe(req, temp_req, &done_list, list) {
list_del(&req->list);
req->callback(req->status, (struct sockaddr *) &req->src_addr,
req->addr, req->context);
put_client(req->client);
kfree(req);
}
}
static int addr_resolve_local(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
struct net_device *dev;
int ret;
switch (dst_in->sa_family) {
case AF_INET:
{
__be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
__be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
dev = ip_dev_find(&init_net, dst_ip);
if (!dev)
return -EADDRNOTAVAIL;
if (ipv4_is_zeronet(src_ip)) {
src_in->sa_family = dst_in->sa_family;
((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
} else if (ipv4_is_loopback(src_ip)) {
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip(src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
dev_put(dev);
break;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
{
struct in6_addr *a;
for_each_netdev(&init_net, dev)
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) dst_in)->sin6_addr,
dev, 1))
break;
if (!dev)
return -EADDRNOTAVAIL;
a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
if (ipv6_addr_any(a)) {
src_in->sa_family = dst_in->sa_family;
((struct sockaddr_in6 *) src_in)->sin6_addr =
((struct sockaddr_in6 *) dst_in)->sin6_addr;
ret = rdma_copy_addr(addr, dev, dev->dev_addr);
} else if (ipv6_addr_loopback(a)) {
ret = rdma_translate_ip(dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
} else {
ret = rdma_translate_ip(src_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
}
break;
}
#endif
default:
ret = -EADDRNOTAVAIL;
break;
}
return ret;
}
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
void *context)
{
struct sockaddr *src_in, *dst_in;
struct addr_req *req;
int ret = 0;
req = kzalloc(sizeof *req, GFP_KERNEL);
if (!req)
return -ENOMEM;
if (src_addr)
memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve_local(src_in, dst_in, addr);
if (req->status == -EADDRNOTAVAIL)
req->status = addr_resolve_remote(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
queue_req(req);
break;
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
kfree(req);
break;
}
return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->addr == addr) {
req->status = -ECANCELED;
req->timeout = jiffies;
list_move(&req->list, &req_list);
set_timeout(req->timeout);
break;
}
}
mutex_unlock(&lock);
}
EXPORT_SYMBOL(rdma_addr_cancel);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
if (event == NETEVENT_NEIGH_UPDATE) {
struct neighbour *neigh = ctx;
if (neigh->nud_state & NUD_VALID) {
set_timeout(jiffies);
}
}
return 0;
}
static struct notifier_block nb = {
.notifier_call = netevent_callback
};
static int __init addr_init(void)
{
addr_wq = create_singlethread_workqueue("ib_addr");
if (!addr_wq)
return -ENOMEM;
register_netevent_notifier(&nb);
return 0;
}
static void __exit addr_cleanup(void)
{
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
module_init(addr_init);
module_exit(addr_cleanup);

View File

@@ -0,0 +1,211 @@
/*
* Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/slab.h>
#include <linux/string.h>
#include "agent.h"
#include "smi.h"
#include "mad_priv.h"
#define SPFX "ib_agent: "
struct ib_agent_port_private {
struct list_head port_list;
struct ib_mad_agent *agent[2];
};
static DEFINE_SPINLOCK(ib_agent_port_list_lock);
static LIST_HEAD(ib_agent_port_list);
static struct ib_agent_port_private *
__ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
list_for_each_entry(entry, &ib_agent_port_list, port_list) {
if (entry->agent[0]->device == device &&
entry->agent[0]->port_num == port_num)
return entry;
}
return NULL;
}
static struct ib_agent_port_private *
ib_get_agent_port(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *entry;
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
entry = __ib_get_agent_port(device, port_num);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
return entry;
}
void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
struct ib_wc *wc, struct ib_device *device,
int port_num, int qpn)
{
struct ib_agent_port_private *port_priv;
struct ib_mad_agent *agent;
struct ib_mad_send_buf *send_buf;
struct ib_ah *ah;
struct ib_mad_send_wr_private *mad_send_wr;
if (device->node_type == RDMA_NODE_IB_SWITCH)
port_priv = ib_get_agent_port(device, 0);
else
port_priv = ib_get_agent_port(device, port_num);
if (!port_priv) {
printk(KERN_ERR SPFX "Unable to find port agent\n");
return;
}
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
printk(KERN_ERR SPFX "ib_create_ah_from_wc error\n");
return;
}
send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_KERNEL);
if (IS_ERR(send_buf)) {
printk(KERN_ERR SPFX "ib_create_send_mad error\n");
goto err1;
}
memcpy(send_buf->mad, mad, sizeof *mad);
send_buf->ah = ah;
if (device->node_type == RDMA_NODE_IB_SWITCH) {
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
mad_send_wr->send_wr.wr.ud.port_num = port_num;
}
if (ib_post_send_mad(send_buf, NULL)) {
printk(KERN_ERR SPFX "ib_post_send_mad error\n");
goto err2;
}
return;
err2:
ib_free_send_mad(send_buf);
err1:
ib_destroy_ah(ah);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
ib_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
int ib_agent_port_open(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *port_priv;
unsigned long flags;
int ret;
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
/* Obtain send only MAD agent for SMI QP */
port_priv->agent[0] = ib_register_mad_agent(device, port_num,
IB_QPT_SMI, NULL, 0,
&agent_send_handler,
NULL, NULL);
if (IS_ERR(port_priv->agent[0])) {
ret = PTR_ERR(port_priv->agent[0]);
goto error2;
}
/* Obtain send only MAD agent for GSI QP */
port_priv->agent[1] = ib_register_mad_agent(device, port_num,
IB_QPT_GSI, NULL, 0,
&agent_send_handler,
NULL, NULL);
if (IS_ERR(port_priv->agent[1])) {
ret = PTR_ERR(port_priv->agent[1]);
goto error3;
}
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_agent_port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
return 0;
error3:
ib_unregister_mad_agent(port_priv->agent[0]);
error2:
kfree(port_priv);
error1:
return ret;
}
int ib_agent_port_close(struct ib_device *device, int port_num)
{
struct ib_agent_port_private *port_priv;
unsigned long flags;
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
port_priv = __ib_get_agent_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
printk(KERN_ERR SPFX "Port %d not found\n", port_num);
return -ENODEV;
}
list_del(&port_priv->port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
ib_unregister_mad_agent(port_priv->agent[1]);
ib_unregister_mad_agent(port_priv->agent[0]);
kfree(port_priv);
return 0;
}

View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __AGENT_H_
#define __AGENT_H_
#include <linux/err.h>
#include <rdma/ib_mad.h>
extern int ib_agent_port_open(struct ib_device *device, int port_num);
extern int ib_agent_port_close(struct ib_device *device, int port_num);
extern void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
struct ib_wc *wc, struct ib_device *device,
int port_num, int qpn);
#endif /* __AGENT_H_ */

View File

@@ -0,0 +1,397 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <rdma/ib_cache.h>
#include "core_priv.h"
struct ib_pkey_cache {
int table_len;
u16 table[0];
};
struct ib_gid_cache {
int table_len;
union ib_gid table[0];
};
struct ib_update_work {
struct work_struct work;
struct ib_device *device;
u8 port_num;
};
static inline int start_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
}
static inline int end_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ?
0 : device->phys_port_cnt;
}
int ib_get_cached_gid(struct ib_device *device,
u8 port_num,
int index,
union ib_gid *gid)
{
struct ib_gid_cache *cache;
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.gid_cache[port_num - start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*gid = cache->table[index];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
union ib_gid *gid,
u8 *port_num,
u16 *index)
{
struct ib_gid_cache *cache;
unsigned long flags;
int p, i;
int ret = -ENOENT;
*port_num = -1;
if (index)
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
cache = device->cache.gid_cache[p];
for (i = 0; i < cache->table_len; ++i) {
if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
*port_num = p + start_port(device);
if (index)
*index = i;
ret = 0;
goto found;
}
}
}
found:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_gid);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
int index,
u16 *pkey)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_pkey);
int ib_find_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int i;
int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
*index = -1;
for (i = 0; i < cache->table_len; ++i)
if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
*index = i;
ret = 0;
break;
}
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,
u8 *lmc)
{
unsigned long flags;
int ret = 0;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
*lmc = device->cache.lmc_cache[port_num - start_port(device)];
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
static void ib_cache_update(struct ib_device *device,
u8 port)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
struct ib_gid_cache *gid_cache = NULL, *old_gid_cache;
int i;
int ret;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return;
ret = ib_query_port(device, port, tprops);
if (ret) {
printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
ret, device->name);
goto err;
}
pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
sizeof *pkey_cache->table, GFP_KERNEL);
if (!pkey_cache)
goto err;
pkey_cache->table_len = tprops->pkey_tbl_len;
gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
sizeof *gid_cache->table, GFP_KERNEL);
if (!gid_cache)
goto err;
gid_cache->table_len = tprops->gid_tbl_len;
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
}
for (i = 0; i < gid_cache->table_len; ++i) {
ret = ib_query_gid(device, port, i, gid_cache->table + i);
if (ret) {
printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
}
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
old_gid_cache = device->cache.gid_cache [port - start_port(device)];
device->cache.pkey_cache[port - start_port(device)] = pkey_cache;
device->cache.gid_cache [port - start_port(device)] = gid_cache;
device->cache.lmc_cache[port - start_port(device)] = tprops->lmc;
write_unlock_irq(&device->cache.lock);
kfree(old_pkey_cache);
kfree(old_gid_cache);
kfree(tprops);
return;
err:
kfree(pkey_cache);
kfree(gid_cache);
kfree(tprops);
}
static void ib_cache_task(struct work_struct *_work)
{
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
ib_cache_update(work->device, work->port_num);
kfree(work);
}
static void ib_cache_event(struct ib_event_handler *handler,
struct ib_event *event)
{
struct ib_update_work *work;
if (event->event == IB_EVENT_PORT_ERR ||
event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER) {
work = kmalloc(sizeof *work, GFP_ATOMIC);
if (work) {
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
schedule_work(&work->work);
}
}
}
static void ib_cache_setup_one(struct ib_device *device)
{
int p;
rwlock_init(&device->cache.lock);
device->cache.pkey_cache =
kmalloc(sizeof *device->cache.pkey_cache *
(end_port(device) - start_port(device) + 1), GFP_KERNEL);
device->cache.gid_cache =
kmalloc(sizeof *device->cache.gid_cache *
(end_port(device) - start_port(device) + 1), GFP_KERNEL);
device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
(end_port(device) -
start_port(device) + 1),
GFP_KERNEL);
if (!device->cache.pkey_cache || !device->cache.gid_cache ||
!device->cache.lmc_cache) {
printk(KERN_WARNING "Couldn't allocate cache "
"for %s\n", device->name);
goto err;
}
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
device->cache.pkey_cache[p] = NULL;
device->cache.gid_cache [p] = NULL;
ib_cache_update(device, p + start_port(device));
}
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
if (ib_register_event_handler(&device->cache.event_handler))
goto err_cache;
return;
err_cache:
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
kfree(device->cache.gid_cache[p]);
}
err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static void ib_cache_cleanup_one(struct ib_device *device)
{
int p;
ib_unregister_event_handler(&device->cache.event_handler);
flush_scheduled_work();
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);
kfree(device->cache.gid_cache[p]);
}
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
}
static struct ib_client cache_client = {
.name = "cache",
.add = ib_cache_setup_one,
.remove = ib_cache_cleanup_one
};
int __init ib_cache_setup(void)
{
return ib_register_client(&cache_client);
}
void __exit ib_cache_cleanup(void)
{
ib_unregister_client(&cache_client);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,819 @@
/*
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING the madirectory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use source and binary forms, with or
* withmodification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retathe above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
* SOFTWARE.
*/
#if !defined(CM_MSGS_H)
#define CM_MSGS_H
#include <rdma/ib_mad.h>
#include <rdma/ib_cm.h>
/*
* Parameters to routines below should be in network-byte order, and values
* are returned in network-byte order.
*/
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
enum cm_msg_sequence {
CM_MSG_SEQUENCE_REQ,
CM_MSG_SEQUENCE_LAP,
CM_MSG_SEQUENCE_DREQ,
CM_MSG_SEQUENCE_SIDR
};
struct cm_req_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 rsvd4;
__be64 service_id;
__be64 local_ca_guid;
__be32 rsvd24;
__be32 local_qkey;
/* local QPN:24, responder resources:8 */
__be32 offset32;
/* local EECN:24, initiator depth:8 */
__be32 offset36;
/*
* remote EECN:24, remote CM response timeout:5,
* transport service type:2, end-to-end flow control:1
*/
__be32 offset40;
/* starting PSN:24, local CM response timeout:5, retry count:3 */
__be32 offset44;
__be16 pkey;
/* path MTU:4, RDC exists:1, RNR retry count:3. */
u8 offset50;
/* max CM Retries:4, SRQ:1, rsvd:3 */
u8 offset51;
__be16 primary_local_lid;
__be16 primary_remote_lid;
union ib_gid primary_local_gid;
union ib_gid primary_remote_gid;
/* flow label:20, rsvd:6, packet rate:6 */
__be32 primary_offset88;
u8 primary_traffic_class;
u8 primary_hop_limit;
/* SL:4, subnet local:1, rsvd:3 */
u8 primary_offset94;
/* local ACK timeout:5, rsvd:3 */
u8 primary_offset95;
__be16 alt_local_lid;
__be16 alt_remote_lid;
union ib_gid alt_local_gid;
union ib_gid alt_remote_gid;
/* flow label:20, rsvd:6, packet rate:6 */
__be32 alt_offset132;
u8 alt_traffic_class;
u8 alt_hop_limit;
/* SL:4, subnet local:1, rsvd:3 */
u8 alt_offset138;
/* local ACK timeout:5, rsvd:3 */
u8 alt_offset139;
u8 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
}
static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
{
req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(req_msg->offset32) &
0x000000FF));
}
static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
{
return (u8) be32_to_cpu(req_msg->offset32);
}
static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
{
req_msg->offset32 = cpu_to_be32(resp_res |
(be32_to_cpu(req_msg->offset32) &
0xFFFFFF00));
}
static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
{
return (u8) be32_to_cpu(req_msg->offset36);
}
static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
u8 init_depth)
{
req_msg->offset36 = cpu_to_be32(init_depth |
(be32_to_cpu(req_msg->offset36) &
0xFFFFFF00));
}
static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
{
return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
}
static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
u8 resp_timeout)
{
req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
(be32_to_cpu(req_msg->offset40) &
0xFFFFFF07));
}
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
switch(transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
default: return 0;
}
}
static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
enum ib_qp_type qp_type)
{
switch(qp_type) {
case IB_QPT_UC:
req_msg->offset40 = cpu_to_be32((be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9);
}
}
static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
{
return be32_to_cpu(req_msg->offset40) & 0x1;
}
static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
u8 flow_ctrl)
{
req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
(be32_to_cpu(req_msg->offset40) &
0xFFFFFFFE));
}
static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
}
static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
__be32 starting_psn)
{
req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
(be32_to_cpu(req_msg->offset44) & 0x000000FF));
}
static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
{
return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
}
static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
u8 resp_timeout)
{
req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
(be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
}
static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
{
return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
}
static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
u8 retry_count)
{
req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
(be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
}
static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
{
return req_msg->offset50 >> 4;
}
static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
{
req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
}
static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
{
return req_msg->offset50 & 0x7;
}
static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
u8 rnr_retry_count)
{
req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
(rnr_retry_count & 0x7));
}
static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
{
return req_msg->offset51 >> 4;
}
static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
u8 retries)
{
req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
}
static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
{
return (req_msg->offset51 & 0x8) >> 3;
}
static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
{
req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
((srq & 0x1) << 3));
}
static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
}
static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
__be32 flow_label)
{
req_msg->primary_offset88 = cpu_to_be32(
(be32_to_cpu(req_msg->primary_offset88) &
0x00000FFF) |
(be32_to_cpu(flow_label) << 12));
}
static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
{
return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
}
static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
u8 rate)
{
req_msg->primary_offset88 = cpu_to_be32(
(be32_to_cpu(req_msg->primary_offset88) &
0xFFFFFFC0) | (rate & 0x3F));
}
static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->primary_offset94 >> 4);
}
static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
{
req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
(sl << 4));
}
static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
{
return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
}
static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
u8 subnet_local)
{
req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
((subnet_local & 0x1) << 3));
}
static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->primary_offset95 >> 3);
}
static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
u8 local_ack_timeout)
{
req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
(local_ack_timeout << 3));
}
static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
{
return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
}
static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
__be32 flow_label)
{
req_msg->alt_offset132 = cpu_to_be32(
(be32_to_cpu(req_msg->alt_offset132) &
0x00000FFF) |
(be32_to_cpu(flow_label) << 12));
}
static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
{
return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
}
static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
u8 rate)
{
req_msg->alt_offset132 = cpu_to_be32(
(be32_to_cpu(req_msg->alt_offset132) &
0xFFFFFFC0) | (rate & 0x3F));
}
static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->alt_offset138 >> 4);
}
static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
{
req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
(sl << 4));
}
static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
{
return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
}
static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
u8 subnet_local)
{
req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
((subnet_local & 0x1) << 3));
}
static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
{
return (u8) (req_msg->alt_offset139 >> 3);
}
static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
u8 local_ack_timeout)
{
req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
(local_ack_timeout << 3));
}
/* Message REJected or MRAed */
enum cm_msg_response {
CM_MSG_RESPONSE_REQ = 0x0,
CM_MSG_RESPONSE_REP = 0x1,
CM_MSG_RESPONSE_OTHER = 0x2
};
struct cm_mra_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
/* message MRAed:2, rsvd:6 */
u8 offset8;
/* service timeout:5, rsvd:3 */
u8 offset9;
u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
{
return (u8) (mra_msg->offset8 >> 6);
}
static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
{
mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
}
static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
{
return (u8) (mra_msg->offset9 >> 3);
}
static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
u8 service_timeout)
{
mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
(service_timeout << 3));
}
struct cm_rej_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
/* message REJected:2, rsvd:6 */
u8 offset8;
/* reject info length:7, rsvd:1. */
u8 offset9;
__be16 reason;
u8 ari[IB_CM_REJ_ARI_LENGTH];
u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
{
return (u8) (rej_msg->offset8 >> 6);
}
static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
{
rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
}
static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
{
return (u8) (rej_msg->offset9 >> 1);
}
static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
u8 len)
{
rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
}
struct cm_rep_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
__be32 local_qkey;
/* local QPN:24, rsvd:8 */
__be32 offset12;
/* local EECN:24, rsvd:8 */
__be32 offset16;
/* starting PSN:24 rsvd:8 */
__be32 offset20;
u8 resp_resources;
u8 initiator_depth;
/* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
u8 offset26;
/* RNR retry count:3, SRQ:1, rsvd:5 */
u8 offset27;
__be64 local_ca_guid;
u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
}
static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
{
rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(rep_msg->offset12) & 0x000000FF));
}
static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
}
static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
__be32 starting_psn)
{
rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
(be32_to_cpu(rep_msg->offset20) & 0x000000FF));
}
static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
{
return (u8) (rep_msg->offset26 >> 3);
}
static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
u8 target_ack_delay)
{
rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
(target_ack_delay << 3));
}
static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
{
return (u8) ((rep_msg->offset26 & 0x06) >> 1);
}
static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
{
rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
((failover & 0x3) << 1));
}
static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
{
return (u8) (rep_msg->offset26 & 0x01);
}
static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
u8 flow_ctrl)
{
rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
(flow_ctrl & 0x1));
}
static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
{
return (u8) (rep_msg->offset27 >> 5);
}
static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
u8 rnr_retry_count)
{
rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
(rnr_retry_count << 5));
}
static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
{
return (u8) ((rep_msg->offset27 >> 4) & 0x1);
}
static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
{
rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
((srq & 0x1) << 4));
}
struct cm_rtu_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_dreq_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
/* remote QPN/EECN:24, rsvd:8 */
__be32 offset8;
u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
{
return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
}
static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
{
dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
}
struct cm_drep_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_lap_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
__be32 rsvd8;
/* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
__be32 offset12;
__be32 rsvd16;
__be16 alt_local_lid;
__be16 alt_remote_lid;
union ib_gid alt_local_gid;
union ib_gid alt_remote_gid;
/* flow label:20, rsvd:4, traffic class:8 */
__be32 offset56;
u8 alt_hop_limit;
/* rsvd:2, packet rate:6 */
u8 offset61;
/* SL:4, subnet local:1, rsvd:3 */
u8 offset62;
/* local ACK timeout:5, rsvd:3 */
u8 offset63;
u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
{
return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
}
static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
{
lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(lap_msg->offset12) &
0x000000FF));
}
static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
{
return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
}
static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
u8 resp_timeout)
{
lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
(be32_to_cpu(lap_msg->offset12) &
0xFFFFFF07));
}
static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
{
return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
}
static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
__be32 flow_label)
{
lap_msg->offset56 = cpu_to_be32(
(be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
(be32_to_cpu(flow_label) << 12));
}
static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
{
return (u8) be32_to_cpu(lap_msg->offset56);
}
static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
u8 traffic_class)
{
lap_msg->offset56 = cpu_to_be32(traffic_class |
(be32_to_cpu(lap_msg->offset56) &
0xFFFFFF00));
}
static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
{
return lap_msg->offset61 & 0x3F;
}
static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
u8 packet_rate)
{
lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
}
static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
{
return lap_msg->offset62 >> 4;
}
static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
{
lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
}
static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
{
return (lap_msg->offset62 >> 3) & 0x1;
}
static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
u8 subnet_local)
{
lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
(lap_msg->offset61 & 0xF7);
}
static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
{
return lap_msg->offset63 >> 3;
}
static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
u8 local_ack_timeout)
{
lap_msg->offset63 = (local_ack_timeout << 3) |
(lap_msg->offset63 & 0x07);
}
struct cm_apr_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
u8 info_length;
u8 ap_status;
u8 info[IB_CM_APR_INFO_LENGTH];
u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_sidr_req_msg {
struct ib_mad_hdr hdr;
__be32 request_id;
__be16 pkey;
__be16 rsvd;
__be64 service_id;
u8 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
struct cm_sidr_rep_msg {
struct ib_mad_hdr hdr;
__be32 request_id;
u8 status;
u8 info_length;
__be16 rsvd;
/* QPN:24, rsvd:8 */
__be32 offset8;
__be64 service_id;
__be32 qkey;
u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
{
return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
}
static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
__be32 qpn)
{
sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
(be32_to_cpu(sidr_rep_msg->offset8) &
0x000000FF));
}
#endif /* CM_MSGS_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _CORE_PRIV_H
#define _CORE_PRIV_H
#include <linux/list.h>
#include <linux/spinlock.h>
#include <rdma/ib_verbs.h>
int ib_device_register_sysfs(struct ib_device *device);
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
void ib_sysfs_cleanup(void);
int ib_cache_setup(void);
void ib_cache_cleanup(void);
#endif /* _CORE_PRIV_H */

View File

@@ -0,0 +1,741 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
struct ib_client_data {
struct list_head list;
struct ib_client *client;
void * data;
};
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
/*
* device_mutex protects access to both device_list and client_list.
* There's no real point to using multiple locks or something fancier
* like an rwsem: we always access both lists, and we're always
* modifying one list or the other list. In any case this is not a
* hot path so there's no point in trying to optimize.
*/
static DEFINE_MUTEX(device_mutex);
static int ib_device_check_mandatory(struct ib_device *device)
{
#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
static const struct {
size_t offset;
char *name;
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_ah),
IB_MANDATORY_FUNC(destroy_ah),
IB_MANDATORY_FUNC(create_qp),
IB_MANDATORY_FUNC(modify_qp),
IB_MANDATORY_FUNC(destroy_qp),
IB_MANDATORY_FUNC(post_send),
IB_MANDATORY_FUNC(post_recv),
IB_MANDATORY_FUNC(create_cq),
IB_MANDATORY_FUNC(destroy_cq),
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
IB_MANDATORY_FUNC(dereg_mr)
};
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
return -EINVAL;
}
}
return 0;
}
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
list_for_each_entry(device, &device_list, core_list)
if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
return device;
return NULL;
}
static int alloc_name(char *name)
{
unsigned long *inuse;
char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
int i;
inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
if (!inuse)
return -ENOMEM;
list_for_each_entry(device, &device_list, core_list) {
if (!sscanf(device->name, name, &i))
continue;
if (i < 0 || i >= PAGE_SIZE * 8)
continue;
snprintf(buf, sizeof buf, name, i);
if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
set_bit(i, inuse);
}
i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
free_page((unsigned long) inuse);
snprintf(buf, sizeof buf, name, i);
if (__ib_device_get_by_name(buf))
return -ENFILE;
strlcpy(name, buf, IB_DEVICE_NAME_MAX);
return 0;
}
static int start_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
}
static int end_port(struct ib_device *device)
{
return (device->node_type == RDMA_NODE_IB_SWITCH) ?
0 : device->phys_port_cnt;
}
/**
* ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
*
* Low-level drivers should use ib_alloc_device() to allocate &struct
* ib_device. @size is the size of the structure to be allocated,
* including any private data used by the low-level driver.
* ib_dealloc_device() must be used to free structures allocated with
* ib_alloc_device().
*/
struct ib_device *ib_alloc_device(size_t size)
{
BUG_ON(size < sizeof (struct ib_device));
return kzalloc(size, GFP_KERNEL);
}
EXPORT_SYMBOL(ib_alloc_device);
/**
* ib_dealloc_device - free an IB device struct
* @device:structure to free
*
* Free a structure allocated with ib_alloc_device().
*/
void ib_dealloc_device(struct ib_device *device)
{
if (device->reg_state == IB_DEV_UNINITIALIZED) {
kfree(device);
return;
}
BUG_ON(device->reg_state != IB_DEV_UNREGISTERED);
kobject_put(&device->dev.kobj);
}
EXPORT_SYMBOL(ib_dealloc_device);
static int add_client_context(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context) {
printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
device->name, client->name);
return -ENOMEM;
}
context->client = client;
context->data = NULL;
spin_lock_irqsave(&device->client_data_lock, flags);
list_add(&context->list, &device->client_data_list);
spin_unlock_irqrestore(&device->client_data_lock, flags);
return 0;
}
static int read_port_table_lengths(struct ib_device *device)
{
struct ib_port_attr *tprops = NULL;
int num_ports, ret = -ENOMEM;
u8 port_index;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
goto out;
num_ports = end_port(device) - start_port(device) + 1;
device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
GFP_KERNEL);
device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
GFP_KERNEL);
if (!device->pkey_tbl_len || !device->gid_tbl_len)
goto err;
for (port_index = 0; port_index < num_ports; ++port_index) {
ret = ib_query_port(device, port_index + start_port(device),
tprops);
if (ret)
goto err;
device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
}
ret = 0;
goto out;
err:
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
out:
kfree(tprops);
return ret;
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device)
{
int ret;
mutex_lock(&device_mutex);
if (strchr(device->name, '%')) {
ret = alloc_name(device->name);
if (ret)
goto out;
}
if (ib_device_check_mandatory(device)) {
ret = -EINVAL;
goto out;
}
INIT_LIST_HEAD(&device->event_handler_list);
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
ret = read_port_table_lengths(device);
if (ret) {
printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
device->name);
goto out;
}
ret = ib_device_register_sysfs(device);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
goto out;
}
list_add_tail(&device->core_list, &device_list);
device->reg_state = IB_DEV_REGISTERED;
{
struct ib_client *client;
list_for_each_entry(client, &client_list, list)
if (client->add && !add_client_context(device, client))
client->add(device);
}
out:
mutex_unlock(&device_mutex);
return ret;
}
EXPORT_SYMBOL(ib_register_device);
/**
* ib_unregister_device - Unregister an IB device
* @device:Device to unregister
*
* Unregister an IB device. All clients will receive a remove callback.
*/
void ib_unregister_device(struct ib_device *device)
{
struct ib_client *client;
struct ib_client_data *context, *tmp;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry_reverse(client, &client_list, list)
if (client->remove)
client->remove(device);
list_del(&device->core_list);
kfree(device->gid_tbl_len);
kfree(device->pkey_tbl_len);
mutex_unlock(&device_mutex);
ib_device_unregister_sysfs(device);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
kfree(context);
spin_unlock_irqrestore(&device->client_data_lock, flags);
device->reg_state = IB_DEV_UNREGISTERED;
}
EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_register_client - Register an IB client
* @client:Client to register
*
* Upper level users of the IB drivers can use ib_register_client() to
* register callbacks for IB device addition and removal. When an IB
* device is added, each registered client's add method will be called
* (in the order the clients were registered), and when a device is
* removed, each client's remove method will be called (in the reverse
* order that clients were registered). In addition, when
* ib_register_client() is called, the client will receive an add
* callback for all devices already registered.
*/
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
mutex_lock(&device_mutex);
list_add_tail(&client->list, &client_list);
list_for_each_entry(device, &device_list, core_list)
if (client->add && !add_client_context(device, client))
client->add(device);
mutex_unlock(&device_mutex);
return 0;
}
EXPORT_SYMBOL(ib_register_client);
/**
* ib_unregister_client - Unregister an IB client
* @client:Client to unregister
*
* Upper level users use ib_unregister_client() to remove their client
* registration. When ib_unregister_client() is called, the client
* will receive a remove callback for each IB device still registered.
*/
void ib_unregister_client(struct ib_client *client)
{
struct ib_client_data *context, *tmp;
struct ib_device *device;
unsigned long flags;
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list) {
if (client->remove)
client->remove(device);
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
if (context->client == client) {
list_del(&context->list);
kfree(context);
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
list_del(&client->list);
mutex_unlock(&device_mutex);
}
EXPORT_SYMBOL(ib_unregister_client);
/**
* ib_get_client_data - Get IB client context
* @device:Device to get context for
* @client:Client to get context for
*
* ib_get_client_data() returns client context set with
* ib_set_client_data().
*/
void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
{
struct ib_client_data *context;
void *ret = NULL;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
ret = context->data;
break;
}
spin_unlock_irqrestore(&device->client_data_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_client_data);
/**
* ib_set_client_data - Set IB client context
* @device:Device to set context for
* @client:Client to set context for
* @data:Context to set
*
* ib_set_client_data() sets client context that can be retrieved with
* ib_get_client_data().
*/
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data)
{
struct ib_client_data *context;
unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) {
context->data = data;
goto out;
}
printk(KERN_WARNING "No client context found for %s/%s\n",
device->name, client->name);
out:
spin_unlock_irqrestore(&device->client_data_lock, flags);
}
EXPORT_SYMBOL(ib_set_client_data);
/**
* ib_register_event_handler - Register an IB event handler
* @event_handler:Handler to register
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
* chapter 11 of the InfiniBand Architecture Specification). This
* callback may occur in interrupt context.
*/
int ib_register_event_handler (struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_register_event_handler);
/**
* ib_unregister_event_handler - Unregister an event handler
* @event_handler:Handler to unregister
*
* Unregister an event handler registered with
* ib_register_event_handler().
*/
int ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
unsigned long flags;
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_del(&event_handler->list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_unregister_event_handler);
/**
* ib_dispatch_event - Dispatch an asynchronous event
* @event:Event to dispatch
*
* Low-level drivers must call ib_dispatch_event() to dispatch the
* event to all registered event handlers when an asynchronous event
* occurs.
*/
void ib_dispatch_event(struct ib_event *event)
{
unsigned long flags;
struct ib_event_handler *handler;
spin_lock_irqsave(&event->device->event_handler_lock, flags);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
}
EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_device - Query IB device attributes
* @device:Device to query
* @device_attr:Device attributes
*
* ib_query_device() returns the attributes of a device through the
* @device_attr pointer.
*/
int ib_query_device(struct ib_device *device,
struct ib_device_attr *device_attr)
{
return device->query_device(device, device_attr);
}
EXPORT_SYMBOL(ib_query_device);
/**
* ib_query_port - Query IB port attributes
* @device:Device to query
* @port_num:Port number to query
* @port_attr:Port attributes
*
* ib_query_port() returns the attributes of a port through the
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr)
{
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
/**
* ib_query_gid - Get GID table entry
* @device:Device to query
* @port_num:Port number to query
* @index:GID table index to query
* @gid:Returned GID
*
* ib_query_gid() fetches the specified GID table entry.
*/
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid)
{
return device->query_gid(device, port_num, index, gid);
}
EXPORT_SYMBOL(ib_query_gid);
/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
* @port_num:Port number to query
* @index:P_Key table index to query
* @pkey:Returned P_Key
*
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
return device->query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
/**
* ib_modify_device - Change IB device attributes
* @device:Device to modify
* @device_modify_mask:Mask of attributes to change
* @device_modify:New attribute values
*
* ib_modify_device() changes a device's attributes as specified by
* the @device_modify_mask and @device_modify structure.
*/
int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
return device->modify_device(device, device_modify_mask,
device_modify);
}
EXPORT_SYMBOL(ib_modify_device);
/**
* ib_modify_port - Modifies the attributes for the specified port.
* @device: The device to modify.
* @port_num: The number of the port to modify.
* @port_modify_mask: Mask used to specify which attributes of the port
* to change.
* @port_modify: New attribute values for the port.
*
* ib_modify_port() changes a port's attributes as specified by the
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
port_modify);
}
EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = start_port(device); port <= end_port(device); ++port) {
for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
*index = i;
return 0;
}
}
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_gid);
/**
* ib_find_pkey - Returns the PKey table index where a specified
* PKey value occurs.
* @device: The device to query.
* @port_num: The port number of the device to search for the PKey.
* @pkey: The PKey value to search for.
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
*index = i;
return 0;
}
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
static int __init ib_core_init(void)
{
int ret;
ret = ib_sysfs_setup();
if (ret)
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
ib_sysfs_cleanup();
}
return ret;
}
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
flush_scheduled_work();
}
module_init(ib_core_init);
module_exit(ib_core_cleanup);

View File

@@ -0,0 +1,544 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <linux/kthread.h>
#include <rdma/ib_fmr_pool.h>
#include "core_priv.h"
#define PFX "fmr_pool: "
enum {
IB_FMR_MAX_REMAPS = 32,
IB_FMR_HASH_BITS = 8,
IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
};
/*
* If an FMR is not in use, then the list member will point to either
* its pool's free_list (if the FMR can be mapped again; that is,
* remap_count < pool->max_remaps) or its pool's dirty_list (if the
* FMR needs to be unmapped before being remapped). In either of
* these cases it is a bug if the ref_count is not 0. In other words,
* if ref_count is > 0, then the list member must not be linked into
* either free_list or dirty_list.
*
* The cache_node member is used to link the FMR into a cache bucket
* (if caching is enabled). This is independent of the reference
* count of the FMR. When a valid FMR is released, its ref_count is
* decremented, and if ref_count reaches 0, the FMR is placed in
* either free_list or dirty_list as appropriate. However, it is not
* removed from the cache and may be "revived" if a call to
* ib_fmr_register_physical() occurs before the FMR is remapped. In
* this case we just increment the ref_count and remove the FMR from
* free_list/dirty_list.
*
* Before we remap an FMR from free_list, we remove it from the cache
* (to prevent another user from obtaining a stale FMR). When an FMR
* is released, we add it to the tail of the free list, so that our
* cache eviction policy is "least recently used."
*
* All manipulation of ref_count, list and cache_node is protected by
* pool_lock to maintain consistency.
*/
struct ib_fmr_pool {
spinlock_t pool_lock;
int pool_size;
int max_pages;
int max_remaps;
int dirty_watermark;
int dirty_len;
struct list_head free_list;
struct list_head dirty_list;
struct hlist_head *cache_bucket;
void (*flush_function)(struct ib_fmr_pool *pool,
void * arg);
void *flush_arg;
struct task_struct *thread;
atomic_t req_ser;
atomic_t flush_ser;
wait_queue_head_t force_wait;
};
static inline u32 ib_fmr_hash(u64 first_page)
{
return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
(IB_FMR_HASH_SIZE - 1);
}
/* Caller must hold pool_lock */
static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
u64 *page_list,
int page_list_len,
u64 io_virtual_address)
{
struct hlist_head *bucket;
struct ib_pool_fmr *fmr;
struct hlist_node *pos;
if (!pool->cache_bucket)
return NULL;
bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
hlist_for_each_entry(fmr, pos, bucket, cache_node)
if (io_virtual_address == fmr->io_virtual_address &&
page_list_len == fmr->page_list_len &&
!memcmp(page_list, fmr->page_list,
page_list_len * sizeof *page_list))
return fmr;
return NULL;
}
static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
{
int ret;
struct ib_pool_fmr *fmr;
LIST_HEAD(unmap_list);
LIST_HEAD(fmr_list);
spin_lock_irq(&pool->pool_lock);
list_for_each_entry(fmr, &pool->dirty_list, list) {
hlist_del_init(&fmr->cache_node);
fmr->remap_count = 0;
list_add_tail(&fmr->fmr->list, &fmr_list);
#ifdef DEBUG
if (fmr->ref_count !=0) {
printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
fmr, fmr->ref_count);
}
#endif
}
list_splice_init(&pool->dirty_list, &unmap_list);
pool->dirty_len = 0;
spin_unlock_irq(&pool->pool_lock);
if (list_empty(&unmap_list)) {
return;
}
ret = ib_unmap_fmr(&fmr_list);
if (ret)
printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
spin_lock_irq(&pool->pool_lock);
list_splice(&unmap_list, &pool->free_list);
spin_unlock_irq(&pool->pool_lock);
}
static int ib_fmr_cleanup_thread(void *pool_ptr)
{
struct ib_fmr_pool *pool = pool_ptr;
do {
if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
ib_fmr_batch_release(pool);
atomic_inc(&pool->flush_ser);
wake_up_interruptible(&pool->force_wait);
if (pool->flush_function)
pool->flush_function(pool, pool->flush_arg);
}
set_current_state(TASK_INTERRUPTIBLE);
if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
!kthread_should_stop())
schedule();
__set_current_state(TASK_RUNNING);
} while (!kthread_should_stop());
return 0;
}
/**
* ib_create_fmr_pool - Create an FMR pool
* @pd:Protection domain for FMRs
* @params:FMR pool parameters
*
* Create a pool of FMRs. Return value is pointer to new pool or
* error code if creation failed.
*/
struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
struct ib_fmr_pool_param *params)
{
struct ib_device *device;
struct ib_fmr_pool *pool;
struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
if (!params)
return ERR_PTR(-EINVAL);
device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) {
printk(KERN_INFO PFX "Device %s does not support FMRs\n",
device->name);
return ERR_PTR(-ENOSYS);
}
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
return ERR_PTR(-ENOMEM);
}
ret = ib_query_device(device, attr);
if (ret) {
printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
kfree(attr);
return ERR_PTR(ret);
}
if (!attr->max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
max_remaps = attr->max_map_per_fmr;
kfree(attr);
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
return ERR_PTR(-ENOMEM);
}
pool->cache_bucket = NULL;
pool->flush_function = params->flush_function;
pool->flush_arg = params->flush_arg;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->dirty_list);
if (params->cache) {
pool->cache_bucket =
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
INIT_HLIST_HEAD(pool->cache_bucket + i);
}
pool->pool_size = 0;
pool->max_pages = params->max_pages_per_fmr;
pool->max_remaps = max_remaps;
pool->dirty_watermark = params->dirty_watermark;
pool->dirty_len = 0;
spin_lock_init(&pool->pool_lock);
atomic_set(&pool->req_ser, 0);
atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait);
pool->thread = kthread_run(ib_fmr_cleanup_thread,
pool,
"ib_fmr(%s)",
device->name);
if (IS_ERR(pool->thread)) {
printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
ret = PTR_ERR(pool->thread);
goto out_free_pool;
}
{
struct ib_pool_fmr *fmr;
struct ib_fmr_attr fmr_attr = {
.max_pages = params->max_pages_per_fmr,
.max_maps = pool->max_remaps,
.page_shift = params->page_shift
};
int bytes_per_fmr = sizeof *fmr;
if (pool->cache_bucket)
bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
for (i = 0; i < params->pool_size; ++i) {
fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
if (!fmr) {
printk(KERN_WARNING PFX "failed to allocate fmr "
"struct for FMR %d\n", i);
goto out_fail;
}
fmr->pool = pool;
fmr->remap_count = 0;
fmr->ref_count = 0;
INIT_HLIST_NODE(&fmr->cache_node);
fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
if (IS_ERR(fmr->fmr)) {
printk(KERN_WARNING PFX "fmr_create failed "
"for FMR %d\n", i);
kfree(fmr);
goto out_fail;
}
list_add_tail(&fmr->list, &pool->free_list);
++pool->pool_size;
}
}
return pool;
out_free_pool:
kfree(pool->cache_bucket);
kfree(pool);
return ERR_PTR(ret);
out_fail:
ib_destroy_fmr_pool(pool);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(ib_create_fmr_pool);
/**
* ib_destroy_fmr_pool - Free FMR pool
* @pool:FMR pool to free
*
* Destroy an FMR pool and free all associated resources.
*/
void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
{
struct ib_pool_fmr *fmr;
struct ib_pool_fmr *tmp;
LIST_HEAD(fmr_list);
int i;
kthread_stop(pool->thread);
ib_fmr_batch_release(pool);
i = 0;
list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
if (fmr->remap_count) {
INIT_LIST_HEAD(&fmr_list);
list_add_tail(&fmr->fmr->list, &fmr_list);
ib_unmap_fmr(&fmr_list);
}
ib_dealloc_fmr(fmr->fmr);
list_del(&fmr->list);
kfree(fmr);
++i;
}
if (i < pool->pool_size)
printk(KERN_WARNING PFX "pool still has %d regions registered\n",
pool->pool_size - i);
kfree(pool->cache_bucket);
kfree(pool);
}
EXPORT_SYMBOL(ib_destroy_fmr_pool);
/**
* ib_flush_fmr_pool - Invalidate all unmapped FMRs
* @pool:FMR pool to flush
*
* Ensure that all unmapped FMRs are fully invalidated.
*/
int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
{
int serial;
struct ib_pool_fmr *fmr, *next;
/*
* The free_list holds FMRs that may have been used
* but have not been remapped enough times to be dirty.
* Put them on the dirty list now so that the cleanup
* thread will reap them too.
*/
spin_lock_irq(&pool->pool_lock);
list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
if (fmr->remap_count > 0)
list_move(&fmr->list, &pool->dirty_list);
}
spin_unlock_irq(&pool->pool_lock);
serial = atomic_inc_return(&pool->req_ser);
wake_up_process(pool->thread);
if (wait_event_interruptible(pool->force_wait,
atomic_read(&pool->flush_ser) - serial >= 0))
return -EINTR;
return 0;
}
EXPORT_SYMBOL(ib_flush_fmr_pool);
/**
* ib_fmr_pool_map_phys -
* @pool:FMR pool to allocate FMR from
* @page_list:List of pages to map
* @list_len:Number of pages in @page_list
* @io_virtual_address:I/O virtual address for new FMR
*
* Map an FMR from an FMR pool.
*/
struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
u64 *page_list,
int list_len,
u64 io_virtual_address)
{
struct ib_fmr_pool *pool = pool_handle;
struct ib_pool_fmr *fmr;
unsigned long flags;
int result;
if (list_len < 1 || list_len > pool->max_pages)
return ERR_PTR(-EINVAL);
spin_lock_irqsave(&pool->pool_lock, flags);
fmr = ib_fmr_cache_lookup(pool,
page_list,
list_len,
io_virtual_address);
if (fmr) {
/* found in cache */
++fmr->ref_count;
if (fmr->ref_count == 1) {
list_del(&fmr->list);
}
spin_unlock_irqrestore(&pool->pool_lock, flags);
return fmr;
}
if (list_empty(&pool->free_list)) {
spin_unlock_irqrestore(&pool->pool_lock, flags);
return ERR_PTR(-EAGAIN);
}
fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
list_del(&fmr->list);
hlist_del_init(&fmr->cache_node);
spin_unlock_irqrestore(&pool->pool_lock, flags);
result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
io_virtual_address);
if (result) {
spin_lock_irqsave(&pool->pool_lock, flags);
list_add(&fmr->list, &pool->free_list);
spin_unlock_irqrestore(&pool->pool_lock, flags);
printk(KERN_WARNING PFX "fmr_map returns %d\n", result);
return ERR_PTR(result);
}
++fmr->remap_count;
fmr->ref_count = 1;
if (pool->cache_bucket) {
fmr->io_virtual_address = io_virtual_address;
fmr->page_list_len = list_len;
memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
spin_lock_irqsave(&pool->pool_lock, flags);
hlist_add_head(&fmr->cache_node,
pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
spin_unlock_irqrestore(&pool->pool_lock, flags);
}
return fmr;
}
EXPORT_SYMBOL(ib_fmr_pool_map_phys);
/**
* ib_fmr_pool_unmap - Unmap FMR
* @fmr:FMR to unmap
*
* Unmap an FMR. The FMR mapping may remain valid until the FMR is
* reused (or until ib_flush_fmr_pool() is called).
*/
int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
{
struct ib_fmr_pool *pool;
unsigned long flags;
pool = fmr->pool;
spin_lock_irqsave(&pool->pool_lock, flags);
--fmr->ref_count;
if (!fmr->ref_count) {
if (fmr->remap_count < pool->max_remaps) {
list_add_tail(&fmr->list, &pool->free_list);
} else {
list_add_tail(&fmr->list, &pool->dirty_list);
if (++pool->dirty_len >= pool->dirty_watermark) {
atomic_inc(&pool->req_ser);
wake_up_process(pool->thread);
}
}
}
#ifdef DEBUG
if (fmr->ref_count < 0)
printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
fmr, fmr->ref_count);
#endif
spin_unlock_irqrestore(&pool->pool_lock, flags);
return 0;
}
EXPORT_SYMBOL(ib_fmr_pool_unmap);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IWCM_H
#define IWCM_H
enum iw_cm_state {
IW_CM_STATE_IDLE, /* unbound, inactive */
IW_CM_STATE_LISTEN, /* listen waiting for connect */
IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */
IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */
IW_CM_STATE_ESTABLISHED, /* established */
IW_CM_STATE_CLOSING, /* disconnect */
IW_CM_STATE_DESTROYING /* object being deleted */
};
struct iwcm_id_private {
struct iw_cm_id id;
enum iw_cm_state state;
unsigned long flags;
struct ib_qp *qp;
struct completion destroy_comp;
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
atomic_t refcount;
struct list_head work_free_list;
};
#define IWCM_F_CALLBACK_DESTROY 1
#define IWCM_F_CONNECT_WAIT 2
#endif /* IWCM_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,230 @@
/*
* Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __IB_MAD_PRIV_H__
#define __IB_MAD_PRIV_H__
#include <linux/completion.h>
#include <linux/err.h>
#include <linux/workqueue.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#define PFX "ib_mad: "
#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
/* QP and CQ parameters */
#define IB_MAD_QP_SEND_SIZE 128
#define IB_MAD_QP_RECV_SIZE 512
#define IB_MAD_QP_MIN_SIZE 64
#define IB_MAD_QP_MAX_SIZE 8192
#define IB_MAD_SEND_REQ_MAX_SG 2
#define IB_MAD_RECV_REQ_MAX_SG 1
#define IB_MAD_SEND_Q_PSN 0
/* Registration table sizes */
#define MAX_MGMT_CLASS 80
#define MAX_MGMT_VERSION 8
#define MAX_MGMT_OUI 8
#define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \
IB_MGMT_CLASS_VENDOR_RANGE2_START + 1)
struct ib_mad_list_head {
struct list_head list;
struct ib_mad_queue *mad_queue;
};
struct ib_mad_private_header {
struct ib_mad_list_head mad_list;
struct ib_mad_recv_wc recv_wc;
struct ib_wc wc;
u64 mapping;
} __attribute__ ((packed));
struct ib_mad_private {
struct ib_mad_private_header header;
struct ib_grh grh;
union {
struct ib_mad mad;
struct ib_rmpp_mad rmpp_mad;
struct ib_smp smp;
} mad;
} __attribute__ ((packed));
struct ib_rmpp_segment {
struct list_head list;
u32 num;
u8 data[0];
};
struct ib_mad_agent_private {
struct list_head agent_list;
struct ib_mad_agent agent;
struct ib_mad_reg_req *reg_req;
struct ib_mad_qp_info *qp_info;
spinlock_t lock;
struct list_head send_list;
struct list_head wait_list;
struct list_head done_list;
struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
struct list_head rmpp_list;
atomic_t refcount;
struct completion comp;
};
struct ib_mad_snoop_private {
struct ib_mad_agent agent;
struct ib_mad_qp_info *qp_info;
int snoop_index;
int mad_snoop_flags;
atomic_t refcount;
struct completion comp;
};
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_buf send_buf;
u64 header_mapping;
u64 payload_mapping;
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
unsigned long timeout;
int max_retries;
int retries_left;
int retry;
int refcount;
enum ib_wc_status status;
/* RMPP control */
struct list_head rmpp_list;
struct ib_rmpp_segment *last_ack_seg;
struct ib_rmpp_segment *cur_seg;
int last_ack;
int seg_num;
int newwin;
int pad;
};
struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
struct ib_mad_agent_private *recv_mad_agent;
struct ib_mad_send_wr_private *mad_send_wr;
};
struct ib_mad_mgmt_method_table {
struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS];
};
struct ib_mad_mgmt_class_table {
struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
};
struct ib_mad_mgmt_vendor_class {
u8 oui[MAX_MGMT_OUI][3];
struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI];
};
struct ib_mad_mgmt_vendor_class_table {
struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2];
};
struct ib_mad_mgmt_version_table {
struct ib_mad_mgmt_class_table *class;
struct ib_mad_mgmt_vendor_class_table *vendor;
};
struct ib_mad_queue {
spinlock_t lock;
struct list_head list;
int count;
int max_active;
struct ib_mad_qp_info *qp_info;
};
struct ib_mad_qp_info {
struct ib_mad_port_private *port_priv;
struct ib_qp *qp;
struct ib_mad_queue send_queue;
struct ib_mad_queue recv_queue;
struct list_head overflow_list;
spinlock_t snoop_lock;
struct ib_mad_snoop_private **snoop_table;
int snoop_table_size;
atomic_t snoop_count;
};
struct ib_mad_port_private {
struct list_head port_list;
struct ib_device *device;
int port_num;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
spinlock_t reg_lock;
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct list_head agent_list;
struct workqueue_struct *wq;
struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);
struct ib_mad_send_wr_private *
ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_recv_wc *mad_recv_wc);
void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc);
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
int timeout_ms);
#endif /* __IB_MAD_PRIV_H__ */

View File

@@ -0,0 +1,951 @@
/*
* Copyright (c) 2005 Intel Inc. All rights reserved.
* Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mad_priv.h"
#include "mad_rmpp.h"
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
RMPP_STATE_COMPLETE,
RMPP_STATE_CANCELING
};
struct mad_rmpp_recv {
struct ib_mad_agent_private *agent;
struct list_head list;
struct delayed_work timeout_work;
struct delayed_work cleanup_work;
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
atomic_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
struct ib_mad_recv_buf *cur_seg_buf;
int last_ack;
int seg_num;
int newwin;
int repwin;
__be64 tid;
u32 src_qp;
u16 slid;
u8 mgmt_class;
u8 class_version;
u8 method;
};
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
if (atomic_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
ib_destroy_ah(rmpp_recv->ah);
kfree(rmpp_recv);
}
void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
{
struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv;
unsigned long flags;
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
if (rmpp_recv->state != RMPP_STATE_COMPLETE)
ib_free_recv_mad(rmpp_recv->rmpp_wc);
rmpp_recv->state = RMPP_STATE_CANCELING;
}
spin_unlock_irqrestore(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
destroy_rmpp_recv(rmpp_recv);
}
}
static void format_ack(struct ib_mad_send_buf *msg,
struct ib_rmpp_mad *data,
struct mad_rmpp_recv *rmpp_recv)
{
struct ib_rmpp_mad *ack = msg->mad;
unsigned long flags;
memcpy(ack, &data->mad_hdr, msg->hdr_len);
ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK;
ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
spin_lock_irqsave(&rmpp_recv->lock, flags);
rmpp_recv->last_ack = rmpp_recv->seg_num;
ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num);
ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin);
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
}
static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
int ret, hdr_len;
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
recv_wc->wc->pkey_index, 1, hdr_len,
0, GFP_KERNEL);
if (IS_ERR(msg))
return;
format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
msg->ah = rmpp_recv->ah;
ret = ib_post_send_mad(msg, NULL);
if (ret)
ib_free_send_mad(msg);
}
static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
struct ib_ah *ah;
int hdr_len;
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
return (void *) ah;
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
recv_wc->wc->pkey_index, 1,
hdr_len, 0, GFP_KERNEL);
if (IS_ERR(msg))
ib_destroy_ah(ah);
else {
msg->ah = ah;
msg->context[0] = ah;
}
return msg;
}
static void ack_ds_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *recv_wc)
{
struct ib_mad_send_buf *msg;
struct ib_rmpp_mad *rmpp_mad;
int ret;
msg = alloc_response_msg(&agent->agent, recv_wc);
if (IS_ERR(msg))
return;
rmpp_mad = msg->mad;
memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.seg_num = 0;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
ib_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
ib_destroy_ah(mad_send_wc->send_buf->ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
static void nack_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *recv_wc, u8 rmpp_status)
{
struct ib_mad_send_buf *msg;
struct ib_rmpp_mad *rmpp_mad;
int ret;
msg = alloc_response_msg(&agent->agent, recv_wc);
if (IS_ERR(msg))
return;
rmpp_mad = msg->mad;
memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status;
rmpp_mad->rmpp_hdr.seg_num = 0;
rmpp_mad->rmpp_hdr.paylen_newwin = 0;
ret = ib_post_send_mad(msg, NULL);
if (ret) {
ib_destroy_ah(msg->ah);
ib_free_send_mad(msg);
}
}
static void recv_timeout_handler(struct work_struct *work)
{
struct mad_rmpp_recv *rmpp_recv =
container_of(work, struct mad_rmpp_recv, timeout_work.work);
struct ib_mad_recv_wc *rmpp_wc;
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
if (rmpp_recv->state != RMPP_STATE_ACTIVE) {
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
return;
}
rmpp_recv->state = RMPP_STATE_TIMEOUT;
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
rmpp_wc = rmpp_recv->rmpp_wc;
nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L);
destroy_rmpp_recv(rmpp_recv);
ib_free_recv_mad(rmpp_wc);
}
static void recv_cleanup_handler(struct work_struct *work)
{
struct mad_rmpp_recv *rmpp_recv =
container_of(work, struct mad_rmpp_recv, cleanup_work.work);
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
if (rmpp_recv->state == RMPP_STATE_CANCELING) {
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
return;
}
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
}
static struct mad_rmpp_recv *
create_rmpp_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
struct ib_mad_hdr *mad_hdr;
rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL);
if (!rmpp_recv)
return NULL;
rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd,
mad_recv_wc->wc,
mad_recv_wc->recv_buf.grh,
agent->agent.port_num);
if (IS_ERR(rmpp_recv->ah))
goto error;
rmpp_recv->agent = agent;
init_completion(&rmpp_recv->comp);
INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler);
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
atomic_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
rmpp_recv->newwin = 1;
rmpp_recv->seg_num = 1;
rmpp_recv->last_ack = 0;
rmpp_recv->repwin = 1;
mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
rmpp_recv->tid = mad_hdr->tid;
rmpp_recv->src_qp = mad_recv_wc->wc->src_qp;
rmpp_recv->slid = mad_recv_wc->wc->slid;
rmpp_recv->mgmt_class = mad_hdr->mgmt_class;
rmpp_recv->class_version = mad_hdr->class_version;
rmpp_recv->method = mad_hdr->method;
return rmpp_recv;
error: kfree(rmpp_recv);
return NULL;
}
static struct mad_rmpp_recv *
find_rmpp_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
if (rmpp_recv->tid == mad_hdr->tid &&
rmpp_recv->src_qp == mad_recv_wc->wc->src_qp &&
rmpp_recv->slid == mad_recv_wc->wc->slid &&
rmpp_recv->mgmt_class == mad_hdr->mgmt_class &&
rmpp_recv->class_version == mad_hdr->class_version &&
rmpp_recv->method == mad_hdr->method)
return rmpp_recv;
}
return NULL;
}
static struct mad_rmpp_recv *
acquire_rmpp_recv(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
unsigned long flags;
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
atomic_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
static struct mad_rmpp_recv *
insert_rmpp_recv(struct ib_mad_agent_private *agent,
struct mad_rmpp_recv *rmpp_recv)
{
struct mad_rmpp_recv *cur_rmpp_recv;
cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc);
if (!cur_rmpp_recv)
list_add_tail(&rmpp_recv->list, &agent->rmpp_list);
return cur_rmpp_recv;
}
static inline int get_last_flag(struct ib_mad_recv_buf *seg)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST;
}
static inline int get_seg_num(struct ib_mad_recv_buf *seg)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *) seg->mad;
return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
}
static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
struct ib_mad_recv_buf *seg)
{
if (seg->list.next == rmpp_list)
return NULL;
return container_of(seg->list.next, struct ib_mad_recv_buf, list);
}
static inline int window_size(struct ib_mad_agent_private *agent)
{
return max(agent->qp_info->recv_queue.max_active >> 3, 1);
}
static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
int seg_num)
{
struct ib_mad_recv_buf *seg_buf;
int cur_seg_num;
list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
cur_seg_num = get_seg_num(seg_buf);
if (seg_num > cur_seg_num)
return seg_buf;
if (seg_num == cur_seg_num)
break;
}
return NULL;
}
static void update_seg_num(struct mad_rmpp_recv *rmpp_recv,
struct ib_mad_recv_buf *new_buf)
{
struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list;
while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) {
rmpp_recv->cur_seg_buf = new_buf;
rmpp_recv->seg_num++;
new_buf = get_next_seg(rmpp_list, new_buf);
}
}
static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_rmpp_mad *rmpp_mad;
int hdr_size, data_size, pad;
rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad;
hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
if (pad > IB_MGMT_RMPP_DATA || pad < 0)
pad = 0;
return hdr_size + rmpp_recv->seg_num * data_size - pad;
}
static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_mad_recv_wc *rmpp_wc;
ack_recv(rmpp_recv, rmpp_recv->rmpp_wc);
if (rmpp_recv->seg_num > 1)
cancel_delayed_work(&rmpp_recv->timeout_work);
rmpp_wc = rmpp_recv->rmpp_wc;
rmpp_wc->mad_len = get_mad_len(rmpp_recv);
/* 10 seconds until we can find the packet lifetime */
queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq,
&rmpp_recv->cleanup_work, msecs_to_jiffies(10000));
return rmpp_wc;
}
static struct ib_mad_recv_wc *
continue_rmpp(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
struct ib_mad_recv_buf *prev_buf;
struct ib_mad_recv_wc *done_wc;
int seg_num;
unsigned long flags;
rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc);
if (!rmpp_recv)
goto drop1;
seg_num = get_seg_num(&mad_recv_wc->recv_buf);
spin_lock_irqsave(&rmpp_recv->lock, flags);
if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) ||
(seg_num > rmpp_recv->newwin))
goto drop3;
if ((seg_num <= rmpp_recv->last_ack) ||
(rmpp_recv->state == RMPP_STATE_COMPLETE)) {
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
ack_recv(rmpp_recv, mad_recv_wc);
goto drop2;
}
prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num);
if (!prev_buf)
goto drop3;
done_wc = NULL;
list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list);
if (rmpp_recv->cur_seg_buf == prev_buf) {
update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf);
if (get_last_flag(rmpp_recv->cur_seg_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
done_wc = complete_rmpp(rmpp_recv);
goto out;
} else if (rmpp_recv->seg_num == rmpp_recv->newwin) {
rmpp_recv->newwin += window_size(agent);
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
ack_recv(rmpp_recv, mad_recv_wc);
goto out;
}
}
spin_unlock_irqrestore(&rmpp_recv->lock, flags);
out:
deref_rmpp_recv(rmpp_recv);
return done_wc;
drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags);
drop2: deref_rmpp_recv(rmpp_recv);
drop1: ib_free_recv_mad(mad_recv_wc);
return NULL;
}
static struct ib_mad_recv_wc *
start_rmpp(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct mad_rmpp_recv *rmpp_recv;
unsigned long flags;
rmpp_recv = create_rmpp_recv(agent, mad_recv_wc);
if (!rmpp_recv) {
ib_free_recv_mad(mad_recv_wc);
return NULL;
}
spin_lock_irqsave(&agent->lock, flags);
if (insert_rmpp_recv(agent, rmpp_recv)) {
spin_unlock_irqrestore(&agent->lock, flags);
/* duplicate first MAD */
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
atomic_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
spin_unlock_irqrestore(&agent->lock, flags);
complete_rmpp(rmpp_recv);
} else {
spin_unlock_irqrestore(&agent->lock, flags);
/* 40 seconds until we can find the packet lifetimes */
queue_delayed_work(agent->qp_info->port_priv->wq,
&rmpp_recv->timeout_work,
msecs_to_jiffies(40000));
rmpp_recv->newwin += window_size(agent);
ack_recv(rmpp_recv, mad_recv_wc);
mad_recv_wc = NULL;
}
deref_rmpp_recv(rmpp_recv);
return mad_recv_wc;
}
static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int timeout;
u32 paylen = 0;
rmpp_mad = mad_send_wr->send_buf.mad;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num);
if (mad_send_wr->seg_num == 1) {
rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
paylen = mad_send_wr->send_buf.seg_count * IB_MGMT_RMPP_DATA -
mad_send_wr->pad;
}
if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) {
rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
}
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
/* 2 seconds for an ACK until we can find the packet lifetime */
timeout = mad_send_wr->send_buf.timeout_ms;
if (!timeout || timeout > 2000)
mad_send_wr->timeout = msecs_to_jiffies(2000);
return ib_send_mad(mad_send_wr);
}
static void abort_send(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc wc;
unsigned long flags;
spin_lock_irqsave(&agent->lock, flags);
mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
if (!mad_send_wr)
goto out; /* Unmatched send */
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
goto out; /* Send is already done */
ib_mark_mad_done(mad_send_wr);
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_REM_ABORT_ERR;
wc.vendor_err = rmpp_status;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
spin_unlock_irqrestore(&agent->lock, flags);
}
static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
int seg_num)
{
struct list_head *list;
wr->last_ack = seg_num;
list = &wr->last_ack_seg->list;
list_for_each_entry(wr->last_ack_seg, list, list)
if (wr->last_ack_seg->num == seg_num)
break;
}
static void process_ds_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc, int newwin)
{
struct mad_rmpp_recv *rmpp_recv;
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
rmpp_recv->repwin = newwin;
}
static void process_rmpp_ack(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_rmpp_mad *rmpp_mad;
unsigned long flags;
int seg_num, newwin, ret;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (rmpp_mad->rmpp_hdr.rmpp_status) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
return;
}
seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
if (newwin < seg_num) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S);
return;
}
spin_lock_irqsave(&agent->lock, flags);
mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
if (!mad_send_wr) {
if (!seg_num)
process_ds_ack(agent, mad_recv_wc, newwin);
goto out; /* Unmatched or DS RMPP ACK */
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
(mad_send_wr->timeout)) {
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return; /* Repeated ACK for DS RMPP transaction */
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
(!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
goto out; /* Send is already done */
if (seg_num > mad_send_wr->send_buf.seg_count ||
seg_num > mad_send_wr->newwin) {
spin_unlock_irqrestore(&agent->lock, flags);
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B);
return;
}
if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack)
goto out; /* Old ACK */
if (seg_num > mad_send_wr->last_ack) {
adjust_last_ack(mad_send_wr, seg_num);
mad_send_wr->retries_left = mad_send_wr->max_retries;
}
mad_send_wr->newwin = newwin;
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
/* If no response is expected, the ACK completes the send */
if (!mad_send_wr->send_buf.timeout_ms) {
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
spin_unlock_irqrestore(&agent->lock, flags);
wc.status = IB_WC_SUCCESS;
wc.vendor_err = 0;
wc.send_buf = &mad_send_wr->send_buf;
ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
if (mad_send_wr->refcount == 1)
ib_reset_mad_timeout(mad_send_wr,
mad_send_wr->send_buf.timeout_ms);
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return;
} else if (mad_send_wr->refcount == 1 &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
/* Send failure will just result in a timeout/retry */
ret = send_next_seg(mad_send_wr);
if (ret)
goto out;
mad_send_wr->refcount++;
list_move_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->send_list);
}
out:
spin_unlock_irqrestore(&agent->lock, flags);
}
static struct ib_mad_recv_wc *
process_rmpp_data(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_hdr *rmpp_hdr;
u8 rmpp_status;
rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr;
if (rmpp_hdr->rmpp_status) {
rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS;
goto bad;
}
if (rmpp_hdr->seg_num == cpu_to_be32(1)) {
if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) {
rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
goto bad;
}
return start_rmpp(agent, mad_recv_wc);
} else {
if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) {
rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG;
goto bad;
}
return continue_rmpp(agent, mad_recv_wc);
}
bad:
nack_recv(agent, mad_recv_wc, rmpp_status);
ib_free_recv_mad(mad_recv_wc);
return NULL;
}
static void process_rmpp_stop(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
} else
abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
}
static void process_rmpp_abort(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN ||
rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS);
} else
abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status);
}
struct ib_mad_recv_wc *
ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_rmpp_mad *rmpp_mad;
rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad;
if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE))
return mad_recv_wc;
if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) {
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV);
goto out;
}
switch (rmpp_mad->rmpp_hdr.rmpp_type) {
case IB_MGMT_RMPP_TYPE_DATA:
return process_rmpp_data(agent, mad_recv_wc);
case IB_MGMT_RMPP_TYPE_ACK:
process_rmpp_ack(agent, mad_recv_wc);
break;
case IB_MGMT_RMPP_TYPE_STOP:
process_rmpp_stop(agent, mad_recv_wc);
break;
case IB_MGMT_RMPP_TYPE_ABORT:
process_rmpp_abort(agent, mad_recv_wc);
break;
default:
abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT);
break;
}
out:
ib_free_recv_mad(mad_recv_wc);
return NULL;
}
static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
struct ib_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
goto out;
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
if (rmpp_recv->tid != mad_hdr->tid ||
rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
rmpp_recv->class_version != mad_hdr->class_version ||
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
if (rmpp_recv->slid == ah_attr.dlid) {
newwin = rmpp_recv->repwin;
break;
}
}
spin_unlock_irqrestore(&agent->lock, flags);
out:
return newwin;
}
int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED;
if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
mad_send_wr->seg_num = 1;
return IB_RMPP_RESULT_INTERNAL;
}
mad_send_wr->newwin = init_newwin(mad_send_wr);
/* We need to wait for the final ACK even if there isn't a response */
mad_send_wr->refcount += (mad_send_wr->timeout == 0);
ret = send_next_seg(mad_send_wr);
if (!ret)
return IB_RMPP_RESULT_CONSUMED;
return ret;
}
int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_rmpp_mad *rmpp_mad;
int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
if (mad_send_wc->status != IB_WC_SUCCESS ||
mad_send_wr->status != IB_WC_SUCCESS)
return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
if (!mad_send_wr->timeout)
return IB_RMPP_RESULT_PROCESSED; /* Response received */
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) {
mad_send_wr->timeout =
msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
return IB_RMPP_RESULT_PROCESSED; /* Send done */
}
if (mad_send_wr->seg_num == mad_send_wr->newwin ||
mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count)
return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */
ret = send_next_seg(mad_send_wr);
if (ret) {
mad_send_wc->status = IB_WC_GENERAL_ERR;
return IB_RMPP_RESULT_PROCESSED;
}
return IB_RMPP_RESULT_CONSUMED;
}
int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE))
return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count)
return IB_RMPP_RESULT_PROCESSED;
mad_send_wr->seg_num = mad_send_wr->last_ack;
mad_send_wr->cur_seg = mad_send_wr->last_ack_seg;
ret = send_next_seg(mad_send_wr);
if (ret)
return IB_RMPP_RESULT_PROCESSED;
return IB_RMPP_RESULT_CONSUMED;
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright (c) 2005 Intel Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __MAD_RMPP_H__
#define __MAD_RMPP_H__
enum {
IB_RMPP_RESULT_PROCESSED,
IB_RMPP_RESULT_CONSUMED,
IB_RMPP_RESULT_INTERNAL,
IB_RMPP_RESULT_UNHANDLED
};
int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr);
struct ib_mad_recv_wc *
ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent,
struct ib_mad_recv_wc *mad_recv_wc);
int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_send_wc *mad_send_wc);
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
#endif /* __MAD_RMPP_H__ */

View File

@@ -0,0 +1,879 @@
/*
* Copyright (c) 2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <linux/random.h>
#include <rdma/ib_cache.h>
#include "sa.h"
static void mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device);
static struct ib_client mcast_client = {
.name = "ib_multicast",
.add = mcast_add_one,
.remove = mcast_remove_one
};
static struct ib_sa_client sa_client;
static struct workqueue_struct *mcast_wq;
static union ib_gid mgid0;
struct mcast_device;
struct mcast_port {
struct mcast_device *dev;
spinlock_t lock;
struct rb_root table;
atomic_t refcount;
struct completion comp;
u8 port_num;
};
struct mcast_device {
struct ib_device *device;
struct ib_event_handler event_handler;
int start_port;
int end_port;
struct mcast_port port[0];
};
enum mcast_state {
MCAST_JOINING,
MCAST_MEMBER,
MCAST_ERROR,
};
enum mcast_group_state {
MCAST_IDLE,
MCAST_BUSY,
MCAST_GROUP_ERROR,
MCAST_PKEY_EVENT
};
enum {
MCAST_INVALID_PKEY_INDEX = 0xFFFF
};
struct mcast_member;
struct mcast_group {
struct ib_sa_mcmember_rec rec;
struct rb_node node;
struct mcast_port *port;
spinlock_t lock;
struct work_struct work;
struct list_head pending_list;
struct list_head active_list;
struct mcast_member *last_join;
int members[3];
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
int query_id;
u16 pkey_index;
u8 leave_state;
int retries;
};
struct mcast_member {
struct ib_sa_multicast multicast;
struct ib_sa_client *client;
struct mcast_group *group;
struct list_head list;
enum mcast_state state;
atomic_t refcount;
struct completion comp;
};
static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context);
static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context);
static struct mcast_group *mcast_find(struct mcast_port *port,
union ib_gid *mgid)
{
struct rb_node *node = port->table.rb_node;
struct mcast_group *group;
int ret;
while (node) {
group = rb_entry(node, struct mcast_group, node);
ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
if (!ret)
return group;
if (ret < 0)
node = node->rb_left;
else
node = node->rb_right;
}
return NULL;
}
static struct mcast_group *mcast_insert(struct mcast_port *port,
struct mcast_group *group,
int allow_duplicates)
{
struct rb_node **link = &port->table.rb_node;
struct rb_node *parent = NULL;
struct mcast_group *cur_group;
int ret;
while (*link) {
parent = *link;
cur_group = rb_entry(parent, struct mcast_group, node);
ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
sizeof group->rec.mgid);
if (ret < 0)
link = &(*link)->rb_left;
else if (ret > 0)
link = &(*link)->rb_right;
else if (allow_duplicates)
link = &(*link)->rb_left;
else
return cur_group;
}
rb_link_node(&group->node, parent, link);
rb_insert_color(&group->node, &port->table);
return NULL;
}
static void deref_port(struct mcast_port *port)
{
if (atomic_dec_and_test(&port->refcount))
complete(&port->comp);
}
static void release_group(struct mcast_group *group)
{
struct mcast_port *port = group->port;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
if (atomic_dec_and_test(&group->refcount)) {
rb_erase(&group->node, &port->table);
spin_unlock_irqrestore(&port->lock, flags);
kfree(group);
deref_port(port);
} else
spin_unlock_irqrestore(&port->lock, flags);
}
static void deref_member(struct mcast_member *member)
{
if (atomic_dec_and_test(&member->refcount))
complete(&member->comp);
}
static void queue_join(struct mcast_member *member)
{
struct mcast_group *group = member->group;
unsigned long flags;
spin_lock_irqsave(&group->lock, flags);
list_add_tail(&member->list, &group->pending_list);
if (group->state == MCAST_IDLE) {
group->state = MCAST_BUSY;
atomic_inc(&group->refcount);
queue_work(mcast_wq, &group->work);
}
spin_unlock_irqrestore(&group->lock, flags);
}
/*
* A multicast group has three types of members: full member, non member, and
* send only member. We need to keep track of the number of members of each
* type based on their join state. Adjust the number of members the belong to
* the specified join states.
*/
static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
{
int i;
for (i = 0; i < 3; i++, join_state >>= 1)
if (join_state & 0x1)
group->members[i] += inc;
}
/*
* If a multicast group has zero members left for a particular join state, but
* the group is still a member with the SA, we need to leave that join state.
* Determine which join states we still belong to, but that do not have any
* active members.
*/
static u8 get_leave_state(struct mcast_group *group)
{
u8 leave_state = 0;
int i;
for (i = 0; i < 3; i++)
if (!group->members[i])
leave_state |= (0x1 << i);
return leave_state & group->rec.join_state;
}
static int check_selector(ib_sa_comp_mask comp_mask,
ib_sa_comp_mask selector_mask,
ib_sa_comp_mask value_mask,
u8 selector, u8 src_value, u8 dst_value)
{
int err;
if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
return 0;
switch (selector) {
case IB_SA_GT:
err = (src_value <= dst_value);
break;
case IB_SA_LT:
err = (src_value >= dst_value);
break;
case IB_SA_EQ:
err = (src_value != dst_value);
break;
default:
err = 0;
break;
}
return err;
}
static int cmp_rec(struct ib_sa_mcmember_rec *src,
struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
{
/* MGID must already match */
if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
return -EINVAL;
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
src->mtu, dst->mtu))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
src->traffic_class != dst->traffic_class)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
return -EINVAL;
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
src->rate, dst->rate))
return -EINVAL;
if (check_selector(comp_mask,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
dst->packet_life_time_selector,
src->packet_life_time, dst->packet_life_time))
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
src->flow_label != dst->flow_label)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
src->hop_limit != dst->hop_limit)
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
return -EINVAL;
/* join_state checked separately, proxy_join ignored */
return 0;
}
static int send_join(struct mcast_group *group, struct mcast_member *member)
{
struct mcast_port *port = group->port;
int ret;
group->last_join = member;
ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
port->port_num, IB_MGMT_METHOD_SET,
&member->multicast.rec,
member->multicast.comp_mask,
3000, GFP_KERNEL, join_handler, group,
&group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
}
static int send_leave(struct mcast_group *group, u8 leave_state)
{
struct mcast_port *port = group->port;
struct ib_sa_mcmember_rec rec;
int ret;
rec = group->rec;
rec.join_state = leave_state;
group->leave_state = leave_state;
ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
port->port_num, IB_SA_METHOD_DELETE, &rec,
IB_SA_MCMEMBER_REC_MGID |
IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_JOIN_STATE,
3000, GFP_KERNEL, leave_handler,
group, &group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
}
static void join_group(struct mcast_group *group, struct mcast_member *member,
u8 join_state)
{
member->state = MCAST_MEMBER;
adjust_membership(group, join_state, 1);
group->rec.join_state |= join_state;
member->multicast.rec = group->rec;
member->multicast.rec.join_state = join_state;
list_move(&member->list, &group->active_list);
}
static int fail_join(struct mcast_group *group, struct mcast_member *member,
int status)
{
spin_lock_irq(&group->lock);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
return member->multicast.callback(status, &member->multicast);
}
static void process_group_error(struct mcast_group *group)
{
struct mcast_member *member;
int ret = 0;
u16 pkey_index;
if (group->state == MCAST_PKEY_EVENT)
ret = ib_find_pkey(group->port->dev->device,
group->port->port_num,
be16_to_cpu(group->rec.pkey), &pkey_index);
spin_lock_irq(&group->lock);
if (group->state == MCAST_PKEY_EVENT && !ret &&
group->pkey_index == pkey_index)
goto out;
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
atomic_inc(&member->refcount);
list_del_init(&member->list);
adjust_membership(group, member->multicast.rec.join_state, -1);
member->state = MCAST_ERROR;
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(-ENETRESET,
&member->multicast);
deref_member(member);
if (ret)
ib_sa_free_multicast(&member->multicast);
spin_lock_irq(&group->lock);
}
group->rec.join_state = 0;
out:
group->state = MCAST_BUSY;
spin_unlock_irq(&group->lock);
}
static void mcast_work_handler(struct work_struct *work)
{
struct mcast_group *group;
struct mcast_member *member;
struct ib_sa_multicast *multicast;
int status, ret;
u8 join_state;
group = container_of(work, typeof(*group), work);
retest:
spin_lock_irq(&group->lock);
while (!list_empty(&group->pending_list) ||
(group->state != MCAST_BUSY)) {
if (group->state != MCAST_BUSY) {
spin_unlock_irq(&group->lock);
process_group_error(group);
goto retest;
}
member = list_entry(group->pending_list.next,
struct mcast_member, list);
multicast = &member->multicast;
join_state = multicast->rec.join_state;
atomic_inc(&member->refcount);
if (join_state == (group->rec.join_state & join_state)) {
status = cmp_rec(&group->rec, &multicast->rec,
multicast->comp_mask);
if (!status)
join_group(group, member, join_state);
else
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = multicast->callback(status, multicast);
} else {
spin_unlock_irq(&group->lock);
status = send_join(group, member);
if (!status) {
deref_member(member);
return;
}
ret = fail_join(group, member, status);
}
deref_member(member);
if (ret)
ib_sa_free_multicast(&member->multicast);
spin_lock_irq(&group->lock);
}
join_state = get_leave_state(group);
if (join_state) {
group->rec.join_state &= ~join_state;
spin_unlock_irq(&group->lock);
if (send_leave(group, join_state))
goto retest;
} else {
group->state = MCAST_IDLE;
spin_unlock_irq(&group->lock);
release_group(group);
}
}
/*
* Fail a join request if it is still active - at the head of the pending queue.
*/
static void process_join_error(struct mcast_group *group, int status)
{
struct mcast_member *member;
int ret;
spin_lock_irq(&group->lock);
member = list_entry(group->pending_list.next,
struct mcast_member, list);
if (group->last_join == member) {
atomic_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(status, &member->multicast);
deref_member(member);
if (ret)
ib_sa_free_multicast(&member->multicast);
} else
spin_unlock_irq(&group->lock);
}
static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context)
{
struct mcast_group *group = context;
u16 pkey_index = MCAST_INVALID_PKEY_INDEX;
if (status)
process_join_error(group, status);
else {
ib_find_pkey(group->port->dev->device, group->port->port_num,
be16_to_cpu(rec->pkey), &pkey_index);
spin_lock_irq(&group->port->lock);
group->rec = *rec;
if (group->state == MCAST_BUSY &&
group->pkey_index == MCAST_INVALID_PKEY_INDEX)
group->pkey_index = pkey_index;
if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
rb_erase(&group->node, &group->port->table);
mcast_insert(group->port, group, 1);
}
spin_unlock_irq(&group->port->lock);
}
mcast_work_handler(&group->work);
}
static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
void *context)
{
struct mcast_group *group = context;
if (status && group->retries > 0 &&
!send_leave(group, group->leave_state))
group->retries--;
else
mcast_work_handler(&group->work);
}
static struct mcast_group *acquire_group(struct mcast_port *port,
union ib_gid *mgid, gfp_t gfp_mask)
{
struct mcast_group *group, *cur_group;
unsigned long flags;
int is_mgid0;
is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
if (!is_mgid0) {
spin_lock_irqsave(&port->lock, flags);
group = mcast_find(port, mgid);
if (group)
goto found;
spin_unlock_irqrestore(&port->lock, flags);
}
group = kzalloc(sizeof *group, gfp_mask);
if (!group)
return NULL;
group->retries = 3;
group->port = port;
group->rec.mgid = *mgid;
group->pkey_index = MCAST_INVALID_PKEY_INDEX;
INIT_LIST_HEAD(&group->pending_list);
INIT_LIST_HEAD(&group->active_list);
INIT_WORK(&group->work, mcast_work_handler);
spin_lock_init(&group->lock);
spin_lock_irqsave(&port->lock, flags);
cur_group = mcast_insert(port, group, is_mgid0);
if (cur_group) {
kfree(group);
group = cur_group;
} else
atomic_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
return group;
}
/*
* We serialize all join requests to a single group to make our lives much
* easier. Otherwise, two users could try to join the same group
* simultaneously, with different configurations, one could leave while the
* join is in progress, etc., which makes locking around error recovery
* difficult.
*/
struct ib_sa_multicast *
ib_sa_join_multicast(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
struct ib_sa_multicast *multicast),
void *context)
{
struct mcast_device *dev;
struct mcast_member *member;
struct ib_sa_multicast *multicast;
int ret;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return ERR_PTR(-ENODEV);
member = kmalloc(sizeof *member, gfp_mask);
if (!member)
return ERR_PTR(-ENOMEM);
ib_sa_client_get(client);
member->client = client;
member->multicast.rec = *rec;
member->multicast.comp_mask = comp_mask;
member->multicast.callback = callback;
member->multicast.context = context;
init_completion(&member->comp);
atomic_set(&member->refcount, 1);
member->state = MCAST_JOINING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
&rec->mgid, gfp_mask);
if (!member->group) {
ret = -ENOMEM;
goto err;
}
/*
* The user will get the multicast structure in their callback. They
* could then free the multicast structure before we can return from
* this routine. So we save the pointer to return before queuing
* any callback.
*/
multicast = &member->multicast;
queue_join(member);
return multicast;
err:
ib_sa_client_put(client);
kfree(member);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_sa_join_multicast);
void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
{
struct mcast_member *member;
struct mcast_group *group;
member = container_of(multicast, struct mcast_member, multicast);
group = member->group;
spin_lock_irq(&group->lock);
if (member->state == MCAST_MEMBER)
adjust_membership(group, multicast->rec.join_state, -1);
list_del_init(&member->list);
if (group->state == MCAST_IDLE) {
group->state = MCAST_BUSY;
spin_unlock_irq(&group->lock);
/* Continue to hold reference on group until callback */
queue_work(mcast_wq, &group->work);
} else {
spin_unlock_irq(&group->lock);
release_group(group);
}
deref_member(member);
wait_for_completion(&member->comp);
ib_sa_client_put(member->client);
kfree(member);
}
EXPORT_SYMBOL(ib_sa_free_multicast);
int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
{
struct mcast_device *dev;
struct mcast_port *port;
struct mcast_group *group;
unsigned long flags;
int ret = 0;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return -ENODEV;
port = &dev->port[port_num - dev->start_port];
spin_lock_irqsave(&port->lock, flags);
group = mcast_find(port, mgid);
if (group)
*rec = group->rec;
else
ret = -EADDRNOTAVAIL;
spin_unlock_irqrestore(&port->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
u8 p;
ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
if (ret)
return ret;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->mlid);
ah_attr->sl = rec->sl;
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->mgid;
ah_attr->grh.sgid_index = (u8) gid_index;
ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
static void mcast_groups_event(struct mcast_port *port,
enum mcast_group_state state)
{
struct mcast_group *group;
struct rb_node *node;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
for (node = rb_first(&port->table); node; node = rb_next(node)) {
group = rb_entry(node, struct mcast_group, node);
spin_lock(&group->lock);
if (group->state == MCAST_IDLE) {
atomic_inc(&group->refcount);
queue_work(mcast_wq, &group->work);
}
if (group->state != MCAST_GROUP_ERROR)
group->state = state;
spin_unlock(&group->lock);
}
spin_unlock_irqrestore(&port->lock, flags);
}
static void mcast_event_handler(struct ib_event_handler *handler,
struct ib_event *event)
{
struct mcast_device *dev;
int index;
dev = container_of(handler, struct mcast_device, event_handler);
index = event->element.port_num - dev->start_port;
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;
case IB_EVENT_PKEY_CHANGE:
mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT);
break;
default:
break;
}
}
static void mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
int i;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
GFP_KERNEL);
if (!dev)
return;
if (device->node_type == RDMA_NODE_IB_SWITCH)
dev->start_port = dev->end_port = 0;
else {
dev->start_port = 1;
dev->end_port = device->phys_port_cnt;
}
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
port = &dev->port[i];
port->dev = dev;
port->port_num = dev->start_port + i;
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
atomic_set(&port->refcount, 1);
}
dev->device = device;
ib_set_client_data(device, &mcast_client, dev);
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
}
static void mcast_remove_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
int i;
dev = ib_get_client_data(device, &mcast_client);
if (!dev)
return;
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
port = &dev->port[i];
deref_port(port);
wait_for_completion(&port->comp);
}
kfree(dev);
}
int mcast_init(void)
{
int ret;
mcast_wq = create_singlethread_workqueue("ib_mcast");
if (!mcast_wq)
return -ENOMEM;
ib_sa_register_client(&sa_client);
ret = ib_register_client(&mcast_client);
if (ret)
goto err;
return 0;
err:
ib_sa_unregister_client(&sa_client);
destroy_workqueue(mcast_wq);
return ret;
}
void mcast_cleanup(void)
{
ib_unregister_client(&mcast_client);
ib_sa_unregister_client(&sa_client);
destroy_workqueue(mcast_wq);
}

View File

@@ -0,0 +1,202 @@
/*
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/string.h>
#include <rdma/ib_pack.h>
static u64 value_read(int offset, int size, void *structure)
{
switch (size) {
case 1: return *(u8 *) (structure + offset);
case 2: return be16_to_cpup((__be16 *) (structure + offset));
case 4: return be32_to_cpup((__be32 *) (structure + offset));
case 8: return be64_to_cpup((__be64 *) (structure + offset));
default:
printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
return 0;
}
}
/**
* ib_pack - Pack a structure into a buffer
* @desc:Array of structure field descriptions
* @desc_len:Number of entries in @desc
* @structure:Structure to pack from
* @buf:Buffer to pack into
*
* ib_pack() packs a list of structure fields into a buffer,
* controlled by the array of fields in @desc.
*/
void ib_pack(const struct ib_field *desc,
int desc_len,
void *structure,
void *buf)
{
int i;
for (i = 0; i < desc_len; ++i) {
if (desc[i].size_bits <= 32) {
int shift;
u32 val;
__be32 mask;
__be32 *addr;
shift = 32 - desc[i].offset_bits - desc[i].size_bits;
if (desc[i].struct_size_bytes)
val = value_read(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
structure) << shift;
else
val = 0;
mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift);
addr = (__be32 *) buf + desc[i].offset_words;
*addr = (*addr & ~mask) | (cpu_to_be32(val) & mask);
} else if (desc[i].size_bits <= 64) {
int shift;
u64 val;
__be64 mask;
__be64 *addr;
shift = 64 - desc[i].offset_bits - desc[i].size_bits;
if (desc[i].struct_size_bytes)
val = value_read(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
structure) << shift;
else
val = 0;
mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift);
addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words);
*addr = (*addr & ~mask) | (cpu_to_be64(val) & mask);
} else {
if (desc[i].offset_bits % 8 ||
desc[i].size_bits % 8) {
printk(KERN_WARNING "Structure field %s of size %d "
"bits is not byte-aligned\n",
desc[i].field_name, desc[i].size_bits);
}
if (desc[i].struct_size_bytes)
memcpy(buf + desc[i].offset_words * 4 +
desc[i].offset_bits / 8,
structure + desc[i].struct_offset_bytes,
desc[i].size_bits / 8);
else
memset(buf + desc[i].offset_words * 4 +
desc[i].offset_bits / 8,
0,
desc[i].size_bits / 8);
}
}
}
EXPORT_SYMBOL(ib_pack);
static void value_write(int offset, int size, u64 val, void *structure)
{
switch (size * 8) {
case 8: *( u8 *) (structure + offset) = val; break;
case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break;
case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
default:
printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
}
}
/**
* ib_unpack - Unpack a buffer into a structure
* @desc:Array of structure field descriptions
* @desc_len:Number of entries in @desc
* @buf:Buffer to unpack from
* @structure:Structure to unpack into
*
* ib_pack() unpacks a list of structure fields from a buffer,
* controlled by the array of fields in @desc.
*/
void ib_unpack(const struct ib_field *desc,
int desc_len,
void *buf,
void *structure)
{
int i;
for (i = 0; i < desc_len; ++i) {
if (!desc[i].struct_size_bytes)
continue;
if (desc[i].size_bits <= 32) {
int shift;
u32 val;
u32 mask;
__be32 *addr;
shift = 32 - desc[i].offset_bits - desc[i].size_bits;
mask = ((1ull << desc[i].size_bits) - 1) << shift;
addr = (__be32 *) buf + desc[i].offset_words;
val = (be32_to_cpup(addr) & mask) >> shift;
value_write(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
val,
structure);
} else if (desc[i].size_bits <= 64) {
int shift;
u64 val;
u64 mask;
__be64 *addr;
shift = 64 - desc[i].offset_bits - desc[i].size_bits;
mask = (~0ull >> (64 - desc[i].size_bits)) << shift;
addr = (__be64 *) buf + desc[i].offset_words;
val = (be64_to_cpup(addr) & mask) >> shift;
value_write(desc[i].struct_offset_bytes,
desc[i].struct_size_bytes,
val,
structure);
} else {
if (desc[i].offset_bits % 8 ||
desc[i].size_bits % 8) {
printk(KERN_WARNING "Structure field %s of size %d "
"bits is not byte-aligned\n",
desc[i].field_name, desc[i].size_bits);
}
memcpy(structure + desc[i].struct_offset_bytes,
buf + desc[i].offset_words * 4 +
desc[i].offset_bits / 8,
desc[i].size_bits / 8);
}
}
}
EXPORT_SYMBOL(ib_unpack);

View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef SA_H
#define SA_H
#include <rdma/ib_sa.h>
static inline void ib_sa_client_get(struct ib_sa_client *client)
{
atomic_inc(&client->users);
}
static inline void ib_sa_client_put(struct ib_sa_client *client)
{
if (atomic_dec_and_test(&client->users))
complete(&client->comp);
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
int mcast_init(void);
void mcast_cleanup(void);
#endif /* SA_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,253 @@
/*
* Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <rdma/ib_smi.h>
#include "smi.h"
/*
* Fixup a directed route SMP for sending
* Return 0 if the SMP should be discarded
*/
enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
u8 node_type, int port_num)
{
u8 hop_ptr, hop_cnt;
hop_ptr = smp->hop_ptr;
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
/* C14-6 -- valid hop_cnt values are from 0 to 63 */
if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
return IB_SMI_DISCARD;
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 */
if (hop_cnt && hop_ptr == 0) {
smp->hop_ptr++;
return (smp->initial_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:2 */
if (hop_ptr && hop_ptr < hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
/* smp->return_path set when received */
smp->hop_ptr++;
return (smp->initial_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:3 -- We're at the end of the DR segment of path */
if (hop_ptr == hop_cnt) {
/* smp->return_path set when received */
smp->hop_ptr++;
return (node_type == RDMA_NODE_IB_SWITCH ||
smp->dr_dlid == IB_LID_PERMISSIVE ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
/* C14-9:5 -- Fail unreasonable hop pointer */
return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
} else {
/* C14-13:1 */
if (hop_cnt && hop_ptr == hop_cnt + 1) {
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:2 */
if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:3 -- at the end of the DR segment of path */
if (hop_ptr == 1) {
smp->hop_ptr--;
/* C14-13:3 -- SMPs destined for SM shouldn't be here */
return (node_type == RDMA_NODE_IB_SWITCH ||
smp->dr_slid == IB_LID_PERMISSIVE ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
if (hop_ptr == 0)
return IB_SMI_HANDLE;
/* C14-13:5 -- Check for unreasonable hop pointer */
return IB_SMI_DISCARD;
}
}
/*
* Adjust information for a received SMP
* Return 0 if the SMP should be dropped
*/
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
int port_num, int phys_port_cnt)
{
u8 hop_ptr, hop_cnt;
hop_ptr = smp->hop_ptr;
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
/* C14-6 -- valid hop_cnt values are from 0 to 63 */
if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
return IB_SMI_DISCARD;
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 -- sender should have incremented hop_ptr */
if (hop_cnt && hop_ptr == 0)
return IB_SMI_DISCARD;
/* C14-9:2 -- intermediate hop */
if (hop_ptr && hop_ptr < hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
smp->return_path[hop_ptr] = port_num;
/* smp->hop_ptr updated when sending */
return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:3 -- We're at the end of the DR segment of path */
if (hop_ptr == hop_cnt) {
if (hop_cnt)
smp->return_path[hop_ptr] = port_num;
/* smp->hop_ptr updated when sending */
return (node_type == RDMA_NODE_IB_SWITCH ||
smp->dr_dlid == IB_LID_PERMISSIVE ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
/* C14-9:5 -- fail unreasonable hop pointer */
return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
} else {
/* C14-13:1 */
if (hop_cnt && hop_ptr == hop_cnt + 1) {
smp->hop_ptr--;
return (smp->return_path[smp->hop_ptr] ==
port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:2 */
if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
if (node_type != RDMA_NODE_IB_SWITCH)
return IB_SMI_DISCARD;
/* smp->hop_ptr updated when sending */
return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:3 -- We're at the end of the DR segment of path */
if (hop_ptr == 1) {
if (smp->dr_slid == IB_LID_PERMISSIVE) {
/* giving SMP to SM - update hop_ptr */
smp->hop_ptr--;
return IB_SMI_HANDLE;
}
/* smp->hop_ptr updated when sending */
return (node_type == RDMA_NODE_IB_SWITCH ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/* C14-13:4 -- hop_ptr = 0 -> give to SM */
/* C14-13:5 -- Check for unreasonable hop pointer */
return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
}
enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
{
u8 hop_ptr, hop_cnt;
hop_ptr = smp->hop_ptr;
hop_cnt = smp->hop_cnt;
if (!ib_get_smp_direction(smp)) {
/* C14-9:2 -- intermediate hop */
if (hop_ptr && hop_ptr < hop_cnt)
return IB_SMI_FORWARD;
/* C14-9:3 -- at the end of the DR segment of path */
if (hop_ptr == hop_cnt)
return (smp->dr_dlid == IB_LID_PERMISSIVE ?
IB_SMI_SEND : IB_SMI_LOCAL);
/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
if (hop_ptr == hop_cnt + 1)
return IB_SMI_SEND;
} else {
/* C14-13:2 -- intermediate hop */
if (2 <= hop_ptr && hop_ptr <= hop_cnt)
return IB_SMI_FORWARD;
/* C14-13:3 -- at the end of the DR segment of path */
if (hop_ptr == 1)
return (smp->dr_slid != IB_LID_PERMISSIVE ?
IB_SMI_SEND : IB_SMI_LOCAL);
}
return IB_SMI_LOCAL;
}
/*
* Return the forwarding port number from initial_path for outgoing SMP and
* from return_path for returning SMP
*/
int smi_get_fwd_port(struct ib_smp *smp)
{
return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] :
smp->return_path[smp->hop_ptr-1]);
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef __SMI_H_
#define __SMI_H_
#include <rdma/ib_smi.h>
enum smi_action {
IB_SMI_DISCARD,
IB_SMI_HANDLE
};
enum smi_forward_action {
IB_SMI_LOCAL, /* SMP should be completed up the stack */
IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */
IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */
};
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
int port_num, int phys_port_cnt);
int smi_get_fwd_port(struct ib_smp *smp);
extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
u8 node_type, int port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
* via process_mad
*/
static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
struct ib_device *device)
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
return ((device->process_mad &&
!ib_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
}
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
* via process_mad
*/
static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
struct ib_device *device)
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
return ((device->process_mad &&
ib_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
#endif /* __SMI_H_ */

View File

@@ -0,0 +1,859 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "core_priv.h"
#include <linux/slab.h>
#include <linux/string.h>
#include <rdma/ib_mad.h>
struct ib_port {
struct kobject kobj;
struct ib_device *ibdev;
struct attribute_group gid_group;
struct attribute_group pkey_group;
u8 port_num;
};
struct port_attribute {
struct attribute attr;
ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
ssize_t (*store)(struct ib_port *, struct port_attribute *,
const char *buf, size_t count);
};
#define PORT_ATTR(_name, _mode, _show, _store) \
struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
#define PORT_ATTR_RO(_name) \
struct port_attribute port_attr_##_name = __ATTR_RO(_name)
struct port_table_attribute {
struct port_attribute attr;
char name[8];
int index;
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->show)
return -EIO;
return port_attr->show(p, port_attr, buf);
}
static struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show
};
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
static const char *state_name[] = {
[IB_PORT_NOP] = "NOP",
[IB_PORT_DOWN] = "DOWN",
[IB_PORT_INIT] = "INIT",
[IB_PORT_ARMED] = "ARMED",
[IB_PORT_ACTIVE] = "ACTIVE",
[IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
};
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "0x%x\n", attr.lid);
}
static ssize_t lid_mask_count_show(struct ib_port *p,
struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "%d\n", attr.lmc);
}
static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "0x%x\n", attr.sm_lid);
}
static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "%d\n", attr.sm_sl);
}
static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
}
static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
char *speed = "";
int rate;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
switch (attr.active_speed) {
case 2: speed = " DDR"; break;
case 4: speed = " QDR"; break;
}
rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
if (rate < 0)
return -EINVAL;
return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
rate / 10, rate % 10 ? ".5" : "",
ib_width_enum_to_int(attr.active_width), speed);
}
static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
switch (attr.phys_state) {
case 1: return sprintf(buf, "1: Sleep\n");
case 2: return sprintf(buf, "2: Polling\n");
case 3: return sprintf(buf, "3: Disabled\n");
case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
case 5: return sprintf(buf, "5: LinkUp\n");
case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
case 7: return sprintf(buf, "7: Phy Test\n");
default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
}
}
static PORT_ATTR_RO(state);
static PORT_ATTR_RO(lid);
static PORT_ATTR_RO(lid_mask_count);
static PORT_ATTR_RO(sm_lid);
static PORT_ATTR_RO(sm_sl);
static PORT_ATTR_RO(cap_mask);
static PORT_ATTR_RO(rate);
static PORT_ATTR_RO(phys_state);
static struct attribute *port_default_attrs[] = {
&port_attr_state.attr,
&port_attr_lid.attr,
&port_attr_lid_mask_count.attr,
&port_attr_sm_lid.attr,
&port_attr_sm_sl.attr,
&port_attr_cap_mask.attr,
&port_attr_rate.attr,
&port_attr_phys_state.attr,
NULL
};
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
if (ret)
return ret;
return sprintf(buf, "%pI6\n", gid.raw);
}
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
u16 pkey;
ssize_t ret;
ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
if (ret)
return ret;
return sprintf(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
}
static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
struct ib_mad *in_mad = NULL;
struct ib_mad *out_mad = NULL;
ssize_t ret;
if (!p->ibdev->process_mad)
return sprintf(buf, "N/A (no PMA)\n");
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
}
in_mad->mad_hdr.base_version = 1;
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
in_mad->data[41] = p->port_num; /* PortSelect field */
if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
p->port_num, NULL, NULL, in_mad, out_mad) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
goto out;
}
switch (width) {
case 4:
ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
break;
case 16:
ret = sprintf(buf, "%u\n",
be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
break;
case 32:
ret = sprintf(buf, "%u\n",
be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
break;
default:
ret = 0;
}
out:
kfree(in_mad);
kfree(out_mad);
return ret;
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
static PORT_PMA_ATTR(link_downed , 2, 8, 56);
static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
static struct attribute *pma_attrs[] = {
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
&port_pma_attr_link_downed.attr.attr,
&port_pma_attr_port_rcv_errors.attr.attr,
&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
&port_pma_attr_port_xmit_discards.attr.attr,
&port_pma_attr_port_xmit_constraint_errors.attr.attr,
&port_pma_attr_port_rcv_constraint_errors.attr.attr,
&port_pma_attr_local_link_integrity_errors.attr.attr,
&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
&port_pma_attr_VL15_dropped.attr.attr,
&port_pma_attr_port_xmit_data.attr.attr,
&port_pma_attr_port_rcv_data.attr.attr,
&port_pma_attr_port_xmit_packets.attr.attr,
&port_pma_attr_port_rcv_packets.attr.attr,
NULL
};
static struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
struct attribute *a;
int i;
for (i = 0; (a = p->gid_group.attrs[i]); ++i)
kfree(a);
kfree(p->gid_group.attrs);
for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
kfree(a);
kfree(p->pkey_group.attrs);
kfree(p);
}
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
.default_attrs = port_default_attrs
};
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
kfree(dev);
}
static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
if (add_uevent_var(env, "NAME=%s", dev->name))
return -ENOMEM;
/*
* It would be nice to pass the node GUID with the event...
*/
return 0;
}
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
struct port_attribute *, char *buf),
int len)
{
struct attribute **tab_attr;
struct port_table_attribute *element;
int i;
tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
if (!tab_attr)
return NULL;
for (i = 0; i < len; i++) {
element = kzalloc(sizeof(struct port_table_attribute),
GFP_KERNEL);
if (!element)
goto err;
if (snprintf(element->name, sizeof(element->name),
"%d", i) >= sizeof(element->name)) {
kfree(element);
goto err;
}
element->attr.attr.name = element->name;
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
tab_attr[i] = &element->attr.attr;
}
return tab_attr;
err:
while (--i >= 0)
kfree(tab_attr[i]);
kfree(tab_attr);
return NULL;
}
static int add_port(struct ib_device *device, int port_num)
{
struct ib_port *p;
struct ib_port_attr attr;
int i;
int ret;
ret = ib_query_port(device, port_num, &attr);
if (ret)
return ret;
p = kzalloc(sizeof *p, GFP_KERNEL);
if (!p)
return -ENOMEM;
p->ibdev = device;
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
kobject_get(device->ports_parent),
"%d", port_num);
if (ret)
goto err_put;
ret = sysfs_create_group(&p->kobj, &pma_group);
if (ret)
goto err_put;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
if (!p->gid_group.attrs)
goto err_remove_pma;
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
if (!p->pkey_group.attrs)
goto err_remove_gid;
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
goto err_free_pkey;
list_add_tail(&p->kobj.entry, &device->port_list);
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
err_free_pkey:
for (i = 0; i < attr.pkey_tbl_len; ++i)
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
err_free_gid:
for (i = 0; i < attr.gid_tbl_len; ++i)
kfree(p->gid_group.attrs[i]);
kfree(p->gid_group.attrs);
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
err_put:
kobject_put(device->ports_parent);
kfree(p);
return ret;
}
static ssize_t show_node_type(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
switch (dev->node_type) {
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
static ssize_t show_sys_image_guid(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_attr attr;
ssize_t ret;
ret = ib_query_device(dev, &attr);
if (ret)
return ret;
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
}
static ssize_t show_node_guid(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
}
static ssize_t show_node_desc(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%.64s\n", dev->node_desc);
}
static ssize_t set_node_desc(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_modify desc = {};
int ret;
if (!dev->modify_device)
return -EIO;
memcpy(desc.node_desc, buf, min_t(int, count, 64));
ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
if (ret)
return ret;
return count;
}
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
&dev_attr_node_desc
};
static struct class ib_class = {
.name = "infiniband",
.dev_release = ib_device_release,
.dev_uevent = ib_device_uevent,
};
/* Show a given an attribute in the statistics group */
static ssize_t show_protocol_stat(const struct device *device,
struct device_attribute *attr, char *buf,
unsigned offset)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
union rdma_protocol_stats stats;
ssize_t ret;
ret = dev->get_protocol_stats(dev, &stats);
if (ret)
return ret;
return sprintf(buf, "%llu\n",
(unsigned long long) ((u64 *) &stats)[offset]);
}
/* generate a read-only iwarp statistics attribute */
#define IW_STATS_ENTRY(name) \
static ssize_t show_##name(struct device *device, \
struct device_attribute *attr, char *buf) \
{ \
return show_protocol_stat(device, attr, buf, \
offsetof(struct iw_protocol_stats, name) / \
sizeof (u64)); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
IW_STATS_ENTRY(ipInReceives);
IW_STATS_ENTRY(ipInHdrErrors);
IW_STATS_ENTRY(ipInTooBigErrors);
IW_STATS_ENTRY(ipInNoRoutes);
IW_STATS_ENTRY(ipInAddrErrors);
IW_STATS_ENTRY(ipInUnknownProtos);
IW_STATS_ENTRY(ipInTruncatedPkts);
IW_STATS_ENTRY(ipInDiscards);
IW_STATS_ENTRY(ipInDelivers);
IW_STATS_ENTRY(ipOutForwDatagrams);
IW_STATS_ENTRY(ipOutRequests);
IW_STATS_ENTRY(ipOutDiscards);
IW_STATS_ENTRY(ipOutNoRoutes);
IW_STATS_ENTRY(ipReasmTimeout);
IW_STATS_ENTRY(ipReasmReqds);
IW_STATS_ENTRY(ipReasmOKs);
IW_STATS_ENTRY(ipReasmFails);
IW_STATS_ENTRY(ipFragOKs);
IW_STATS_ENTRY(ipFragFails);
IW_STATS_ENTRY(ipFragCreates);
IW_STATS_ENTRY(ipInMcastPkts);
IW_STATS_ENTRY(ipOutMcastPkts);
IW_STATS_ENTRY(ipInBcastPkts);
IW_STATS_ENTRY(ipOutBcastPkts);
IW_STATS_ENTRY(tcpRtoAlgorithm);
IW_STATS_ENTRY(tcpRtoMin);
IW_STATS_ENTRY(tcpRtoMax);
IW_STATS_ENTRY(tcpMaxConn);
IW_STATS_ENTRY(tcpActiveOpens);
IW_STATS_ENTRY(tcpPassiveOpens);
IW_STATS_ENTRY(tcpAttemptFails);
IW_STATS_ENTRY(tcpEstabResets);
IW_STATS_ENTRY(tcpCurrEstab);
IW_STATS_ENTRY(tcpInSegs);
IW_STATS_ENTRY(tcpOutSegs);
IW_STATS_ENTRY(tcpRetransSegs);
IW_STATS_ENTRY(tcpInErrs);
IW_STATS_ENTRY(tcpOutRsts);
static struct attribute *iw_proto_stats_attrs[] = {
&dev_attr_ipInReceives.attr,
&dev_attr_ipInHdrErrors.attr,
&dev_attr_ipInTooBigErrors.attr,
&dev_attr_ipInNoRoutes.attr,
&dev_attr_ipInAddrErrors.attr,
&dev_attr_ipInUnknownProtos.attr,
&dev_attr_ipInTruncatedPkts.attr,
&dev_attr_ipInDiscards.attr,
&dev_attr_ipInDelivers.attr,
&dev_attr_ipOutForwDatagrams.attr,
&dev_attr_ipOutRequests.attr,
&dev_attr_ipOutDiscards.attr,
&dev_attr_ipOutNoRoutes.attr,
&dev_attr_ipReasmTimeout.attr,
&dev_attr_ipReasmReqds.attr,
&dev_attr_ipReasmOKs.attr,
&dev_attr_ipReasmFails.attr,
&dev_attr_ipFragOKs.attr,
&dev_attr_ipFragFails.attr,
&dev_attr_ipFragCreates.attr,
&dev_attr_ipInMcastPkts.attr,
&dev_attr_ipOutMcastPkts.attr,
&dev_attr_ipInBcastPkts.attr,
&dev_attr_ipOutBcastPkts.attr,
&dev_attr_tcpRtoAlgorithm.attr,
&dev_attr_tcpRtoMin.attr,
&dev_attr_tcpRtoMax.attr,
&dev_attr_tcpMaxConn.attr,
&dev_attr_tcpActiveOpens.attr,
&dev_attr_tcpPassiveOpens.attr,
&dev_attr_tcpAttemptFails.attr,
&dev_attr_tcpEstabResets.attr,
&dev_attr_tcpCurrEstab.attr,
&dev_attr_tcpInSegs.attr,
&dev_attr_tcpOutSegs.attr,
&dev_attr_tcpRetransSegs.attr,
&dev_attr_tcpInErrs.attr,
&dev_attr_tcpOutRsts.attr,
NULL
};
static struct attribute_group iw_stats_group = {
.name = "proto_stats",
.attrs = iw_proto_stats_attrs,
};
int ib_device_register_sysfs(struct ib_device *device)
{
struct device *class_dev = &device->dev;
int ret;
int i;
class_dev->class = &ib_class;
class_dev->parent = device->dma_device;
dev_set_name(class_dev, device->name);
dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
ret = device_register(class_dev);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
ret = device_create_file(class_dev, ib_class_attributes[i]);
if (ret)
goto err_unregister;
}
device->ports_parent = kobject_create_and_add("ports",
kobject_get(&class_dev->kobj));
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
}
if (device->node_type == RDMA_NODE_IB_SWITCH) {
ret = add_port(device, 0);
if (ret)
goto err_put;
} else {
for (i = 1; i <= device->phys_port_cnt; ++i) {
ret = add_port(device, i);
if (ret)
goto err_put;
}
}
if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) {
ret = sysfs_create_group(&class_dev->kobj, &iw_stats_group);
if (ret)
goto err_put;
}
return 0;
err_put:
{
struct kobject *p, *t;
struct ib_port *port;
list_for_each_entry_safe(p, t, &device->port_list, entry) {
list_del(&p->entry);
port = container_of(p, struct ib_port, kobj);
sysfs_remove_group(p, &pma_group);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
kobject_put(p);
}
}
kobject_put(&class_dev->kobj);
err_unregister:
device_unregister(class_dev);
err:
return ret;
}
void ib_device_unregister_sysfs(struct ib_device *device)
{
struct kobject *p, *t;
struct ib_port *port;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
list_for_each_entry_safe(p, t, &device->port_list, entry) {
list_del(&p->entry);
port = container_of(p, struct ib_port, kobj);
sysfs_remove_group(p, &pma_group);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
kobject_put(p);
}
kobject_put(device->ports_parent);
device_unregister(&device->dev);
}
int ib_sysfs_setup(void)
{
return class_register(&ib_class);
}
void ib_sysfs_cleanup(void)
{
class_unregister(&ib_class);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,365 @@
/*
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/string.h>
#include <rdma/ib_pack.h>
#define STRUCT_FIELD(header, field) \
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
.struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
.field_name = #header ":" #field
static const struct ib_field lrh_table[] = {
{ STRUCT_FIELD(lrh, virtual_lane),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 4 },
{ STRUCT_FIELD(lrh, link_version),
.offset_words = 0,
.offset_bits = 4,
.size_bits = 4 },
{ STRUCT_FIELD(lrh, service_level),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 4 },
{ RESERVED,
.offset_words = 0,
.offset_bits = 12,
.size_bits = 2 },
{ STRUCT_FIELD(lrh, link_next_header),
.offset_words = 0,
.offset_bits = 14,
.size_bits = 2 },
{ STRUCT_FIELD(lrh, destination_lid),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 5 },
{ STRUCT_FIELD(lrh, packet_length),
.offset_words = 1,
.offset_bits = 5,
.size_bits = 11 },
{ STRUCT_FIELD(lrh, source_lid),
.offset_words = 1,
.offset_bits = 16,
.size_bits = 16 }
};
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 4 },
{ STRUCT_FIELD(grh, traffic_class),
.offset_words = 0,
.offset_bits = 4,
.size_bits = 8 },
{ STRUCT_FIELD(grh, flow_label),
.offset_words = 0,
.offset_bits = 12,
.size_bits = 20 },
{ STRUCT_FIELD(grh, payload_length),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 16 },
{ STRUCT_FIELD(grh, next_header),
.offset_words = 1,
.offset_bits = 16,
.size_bits = 8 },
{ STRUCT_FIELD(grh, hop_limit),
.offset_words = 1,
.offset_bits = 24,
.size_bits = 8 },
{ STRUCT_FIELD(grh, source_gid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ STRUCT_FIELD(grh, destination_gid),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 }
};
static const struct ib_field bth_table[] = {
{ STRUCT_FIELD(bth, opcode),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 8 },
{ STRUCT_FIELD(bth, solicited_event),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 1 },
{ STRUCT_FIELD(bth, mig_req),
.offset_words = 0,
.offset_bits = 9,
.size_bits = 1 },
{ STRUCT_FIELD(bth, pad_count),
.offset_words = 0,
.offset_bits = 10,
.size_bits = 2 },
{ STRUCT_FIELD(bth, transport_header_version),
.offset_words = 0,
.offset_bits = 12,
.size_bits = 4 },
{ STRUCT_FIELD(bth, pkey),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 8 },
{ STRUCT_FIELD(bth, destination_qpn),
.offset_words = 1,
.offset_bits = 8,
.size_bits = 24 },
{ STRUCT_FIELD(bth, ack_req),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 1 },
{ RESERVED,
.offset_words = 2,
.offset_bits = 1,
.size_bits = 7 },
{ STRUCT_FIELD(bth, psn),
.offset_words = 2,
.offset_bits = 8,
.size_bits = 24 }
};
static const struct ib_field deth_table[] = {
{ STRUCT_FIELD(deth, qkey),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 32 },
{ RESERVED,
.offset_words = 1,
.offset_bits = 0,
.size_bits = 8 },
{ STRUCT_FIELD(deth, source_qpn),
.offset_words = 1,
.offset_bits = 8,
.size_bits = 24 }
};
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
* @grh_present:GRH flag (if non-zero, GRH will be included)
* @header:Structure to initialize
*
* ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
* lrh.packet_length, grh.ip_version, grh.payload_length,
* grh.next_header, bth.opcode, bth.pad_count and
* bth.transport_header_version fields of a &struct ib_ud_header given
* the payload length and whether a GRH will be included.
*/
void ib_ud_header_init(int payload_bytes,
int grh_present,
struct ib_ud_header *header)
{
int header_len;
u16 packet_length;
memset(header, 0, sizeof *header);
header_len =
IB_LRH_BYTES +
IB_BTH_BYTES +
IB_DETH_BYTES;
if (grh_present) {
header_len += IB_GRH_BYTES;
}
header->lrh.link_version = 0;
header->lrh.link_next_header =
grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
packet_length = (IB_LRH_BYTES +
IB_BTH_BYTES +
IB_DETH_BYTES +
payload_bytes +
4 + /* ICRC */
3) / 4; /* round up */
header->grh_present = grh_present;
if (grh_present) {
packet_length += IB_GRH_BYTES / 4;
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
IB_DETH_BYTES +
payload_bytes +
4 + /* ICRC */
3) & ~3); /* round up */
header->grh.next_header = 0x1b;
}
header->lrh.packet_length = cpu_to_be16(packet_length);
if (header->immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
else
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
header->bth.pad_count = (4 - payload_bytes) & 3;
header->bth.transport_header_version = 0;
}
EXPORT_SYMBOL(ib_ud_header_init);
/**
* ib_ud_header_pack - Pack UD header struct into wire format
* @header:UD header struct
* @buf:Buffer to pack into
*
* ib_ud_header_pack() packs the UD header structure @header into wire
* format in the buffer @buf.
*/
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf)
{
int len = 0;
ib_pack(lrh_table, ARRAY_SIZE(lrh_table),
&header->lrh, buf);
len += IB_LRH_BYTES;
if (header->grh_present) {
ib_pack(grh_table, ARRAY_SIZE(grh_table),
&header->grh, buf + len);
len += IB_GRH_BYTES;
}
ib_pack(bth_table, ARRAY_SIZE(bth_table),
&header->bth, buf + len);
len += IB_BTH_BYTES;
ib_pack(deth_table, ARRAY_SIZE(deth_table),
&header->deth, buf + len);
len += IB_DETH_BYTES;
if (header->immediate_present) {
memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data);
len += sizeof header->immediate_data;
}
return len;
}
EXPORT_SYMBOL(ib_ud_header_pack);
/**
* ib_ud_header_unpack - Unpack UD header struct from wire format
* @header:UD header struct
* @buf:Buffer to pack into
*
* ib_ud_header_pack() unpacks the UD header structure @header from wire
* format in the buffer @buf.
*/
int ib_ud_header_unpack(void *buf,
struct ib_ud_header *header)
{
ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
buf, &header->lrh);
buf += IB_LRH_BYTES;
if (header->lrh.link_version != 0) {
printk(KERN_WARNING "Invalid LRH.link_version %d\n",
header->lrh.link_version);
return -EINVAL;
}
switch (header->lrh.link_next_header) {
case IB_LNH_IBA_LOCAL:
header->grh_present = 0;
break;
case IB_LNH_IBA_GLOBAL:
header->grh_present = 1;
ib_unpack(grh_table, ARRAY_SIZE(grh_table),
buf, &header->grh);
buf += IB_GRH_BYTES;
if (header->grh.ip_version != 6) {
printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
header->grh.ip_version);
return -EINVAL;
}
if (header->grh.next_header != 0x1b) {
printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
header->grh.next_header);
return -EINVAL;
}
break;
default:
printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
header->lrh.link_next_header);
return -EINVAL;
}
ib_unpack(bth_table, ARRAY_SIZE(bth_table),
buf, &header->bth);
buf += IB_BTH_BYTES;
switch (header->bth.opcode) {
case IB_OPCODE_UD_SEND_ONLY:
header->immediate_present = 0;
break;
case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
header->immediate_present = 1;
break;
default:
printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
header->bth.opcode);
return -EINVAL;
}
if (header->bth.transport_header_version != 0) {
printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
header->bth.transport_header_version);
return -EINVAL;
}
ib_unpack(deth_table, ARRAY_SIZE(deth_table),
buf, &header->deth);
buf += IB_DETH_BYTES;
if (header->immediate_present)
memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
return 0;
}
EXPORT_SYMBOL(ib_ud_header_unpack);

View File

@@ -0,0 +1,293 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
#include <linux/dma-attrs.h>
#include "uverbs.h"
#define IB_UMEM_MAX_PAGE_CHUNK \
((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
struct ib_umem_chunk *chunk, *tmp;
int i;
list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
ib_dma_unmap_sg(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->nents; ++i) {
struct page *page = sg_page(&chunk->page_list[i]);
if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
kfree(chunk);
}
}
/**
* ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
* @dmasync: flush in-flight DMA when the memory region is written
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync)
{
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
int off;
int i;
DEFINE_DMA_ATTRS(attrs);
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!can_do_mlock())
return ERR_PTR(-EPERM);
umem = kmalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
INIT_LIST_HEAD(&umem->chunk_list);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
return ERR_PTR(-ENOMEM);
}
/*
* if we can't alloc the vma_list, it's not so bad;
* just assume the memory is not hugetlb memory
*/
vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
if (!vma_list)
umem->hugetlb = 0;
npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
locked = npages + current->mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
goto out;
}
cur_base = addr & PAGE_MASK;
ret = 0;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
1, !umem->writable, page_list, vma_list);
if (ret < 0)
goto out;
cur_base += ret * PAGE_SIZE;
npages -= ret;
off = 0;
while (ret) {
chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
GFP_KERNEL);
if (!chunk) {
ret = -ENOMEM;
goto out;
}
chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
sg_init_table(chunk->page_list, chunk->nents);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
}
chunk->nmap = ib_dma_map_sg_attrs(context->device,
&chunk->page_list[0],
chunk->nents,
DMA_BIDIRECTIONAL,
&attrs);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
put_page(sg_page(&chunk->page_list[i]));
kfree(chunk);
ret = -ENOMEM;
goto out;
}
ret -= chunk->nents;
off += chunk->nents;
list_add_tail(&chunk->list, &umem->chunk_list);
}
ret = 0;
}
out:
if (ret < 0) {
__ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem);
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
return ret < 0 ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
static void ib_umem_account(struct work_struct *work)
{
struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->mm->mmap_sem);
umem->mm->locked_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
}
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
unsigned long diff;
__ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current);
if (!mm) {
kfree(umem);
return;
}
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
*/
if (context->closing) {
if (!down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account);
umem->mm = mm;
umem->diff = diff;
schedule_work(&umem->work);
return;
}
} else
down_write(&mm->mmap_sem);
current->mm->locked_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
shift = ilog2(umem->page_size);
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
return n;
}
EXPORT_SYMBOL(ib_umem_page_count);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,198 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef UVERBS_H
#define UVERBS_H
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
/*
* Our lifetime rules for these structs are the following:
*
* struct ib_uverbs_device: One reference is held by the module and
* released in ib_uverbs_remove_one(). Another reference is taken by
* ib_uverbs_open() each time the character special file is opened,
* and released in ib_uverbs_release_file() when the file is released.
*
* struct ib_uverbs_file: One reference is held by the VFS and
* released when the file is closed. Another reference is taken when
* an asynchronous event queue file is created and released when the
* event file is closed.
*
* struct ib_uverbs_event_file: One reference is held by the VFS and
* released when the file is closed. For asynchronous event files,
* another reference is held by the corresponding main context file
* and released when that file is closed. For completion event files,
* a reference is taken when a CQ is created that uses the file, and
* released when the CQ is destroyed.
*/
struct ib_uverbs_device {
struct kref ref;
struct completion comp;
int devnum;
struct cdev *cdev;
struct device *dev;
struct ib_device *ib_dev;
int num_comp_vectors;
};
struct ib_uverbs_event_file {
struct kref ref;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
int is_async;
int is_closed;
};
struct ib_uverbs_file {
struct kref ref;
struct mutex mutex;
struct ib_uverbs_device *device;
struct ib_ucontext *ucontext;
struct ib_event_handler event_handler;
struct ib_uverbs_event_file *async_file;
};
struct ib_uverbs_event {
union {
struct ib_uverbs_async_event_desc async;
struct ib_uverbs_comp_event_desc comp;
} desc;
struct list_head list;
struct list_head obj_list;
u32 *counter;
};
struct ib_uverbs_mcast_entry {
struct list_head list;
union ib_gid gid;
u16 lid;
};
struct ib_uevent_object {
struct ib_uobject uobject;
struct list_head event_list;
u32 events_reported;
};
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
};
struct ib_ucq_object {
struct ib_uobject uobject;
struct ib_uverbs_file *uverbs_file;
struct list_head comp_list;
struct list_head async_list;
u32 comp_events_reported;
u32 async_events_reported;
};
extern spinlock_t ib_uverbs_idr_lock;
extern struct idr ib_uverbs_pd_idr;
extern struct idr ib_uverbs_mr_idr;
extern struct idr ib_uverbs_mw_idr;
extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
int is_async, int *fd);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_ucq_object *uobj);
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
int out_len)
IB_UVERBS_DECLARE_CMD(get_context);
IB_UVERBS_DECLARE_CMD(query_device);
IB_UVERBS_DECLARE_CMD(query_port);
IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
IB_UVERBS_DECLARE_CMD(resize_cq);
IB_UVERBS_DECLARE_CMD(poll_cq);
IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
IB_UVERBS_DECLARE_CMD(query_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
IB_UVERBS_DECLARE_CMD(post_send);
IB_UVERBS_DECLARE_CMD(post_recv);
IB_UVERBS_DECLARE_CMD(post_srq_recv);
IB_UVERBS_DECLARE_CMD(create_ah);
IB_UVERBS_DECLARE_CMD(destroy_ah);
IB_UVERBS_DECLARE_CMD(attach_mcast);
IB_UVERBS_DECLARE_CMD(detach_mcast);
IB_UVERBS_DECLARE_CMD(create_srq);
IB_UVERBS_DECLARE_CMD(modify_srq);
IB_UVERBS_DECLARE_CMD(query_srq);
IB_UVERBS_DECLARE_CMD(destroy_srq);
#endif /* UVERBS_H */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,916 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/cdev.h>
#include <asm/uaccess.h>
#include "uverbs.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");
#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
IB_UVERBS_MAX_DEVICES = 32
};
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
static struct class *uverbs_class;
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
static DEFINE_SPINLOCK(map_lock);
static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len) = {
[IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
[IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
[IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
[IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
[IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
[IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
[IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
[IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
[IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
[IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
[IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
[IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
[IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
[IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
[IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
[IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
[IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
[IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
[IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
[IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
[IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
[IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
[IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
[IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
[IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
};
static struct vfsmount *uverbs_event_mnt;
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
static void ib_uverbs_release_dev(struct kref *ref)
{
struct ib_uverbs_device *dev =
container_of(ref, struct ib_uverbs_device, ref);
complete(&dev->comp);
}
static void ib_uverbs_release_event_file(struct kref *ref)
{
struct ib_uverbs_event_file *file =
container_of(ref, struct ib_uverbs_event_file, ref);
kfree(file);
}
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
struct ib_uverbs_event_file *ev_file,
struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
spin_lock_irq(&ev_file->lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&ev_file->lock);
kref_put(&ev_file->ref, ib_uverbs_release_event_file);
}
spin_lock_irq(&file->async_file->lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file->lock);
}
void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
struct ib_uevent_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
spin_lock_irq(&file->async_file->lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file->lock);
}
static void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
ib_detach_mcast(qp, &mcast->gid, mcast->lid);
list_del(&mcast->list);
kfree(mcast);
}
}
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
struct ib_ucontext *context)
{
struct ib_uobject *uobj, *tmp;
if (!context)
return 0;
context->closing = 1;
list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
struct ib_ah *ah = uobj->object;
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
ib_destroy_ah(ah);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
struct ib_qp *qp = uobj->object;
struct ib_uqp_object *uqp =
container_of(uobj, struct ib_uqp_object, uevent.uobject);
idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
ib_uverbs_detach_umcast(qp, uqp);
ib_destroy_qp(qp);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = uobj->object;
struct ib_uverbs_event_file *ev_file = cq->cq_context;
struct ib_ucq_object *ucq =
container_of(uobj, struct ib_ucq_object, uobject);
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
ib_destroy_cq(cq);
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
/* XXX Free MWs */
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
kfree(uobj);
}
list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
struct ib_pd *pd = uobj->object;
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
ib_dealloc_pd(pd);
kfree(uobj);
}
return context->device->dealloc_ucontext(context);
}
static void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
module_put(file->device->ib_dev->owner);
kref_put(&file->device->ref, ib_uverbs_release_dev);
kfree(file);
}
static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
int eventsz;
int ret = 0;
spin_lock_irq(&file->lock);
while (list_empty(&file->event_list)) {
spin_unlock_irq(&file->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(file->poll_wait,
!list_empty(&file->event_list)))
return -ERESTARTSYS;
spin_lock_irq(&file->lock);
}
event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
if (file->is_async)
eventsz = sizeof (struct ib_uverbs_async_event_desc);
else
eventsz = sizeof (struct ib_uverbs_comp_event_desc);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
list_del(file->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
spin_unlock_irq(&file->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
ret = -EFAULT;
else
ret = eventsz;
}
kfree(event);
return ret;
}
static unsigned int ib_uverbs_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
unsigned int pollflags = 0;
struct ib_uverbs_event_file *file = filp->private_data;
poll_wait(filp, &file->poll_wait, wait);
spin_lock_irq(&file->lock);
if (!list_empty(&file->event_list))
pollflags = POLLIN | POLLRDNORM;
spin_unlock_irq(&file->lock);
return pollflags;
}
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
{
struct ib_uverbs_event_file *file = filp->private_data;
return fasync_helper(fd, filp, on, &file->async_queue);
}
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&file->lock);
file->is_closed = 1;
list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
if (entry->counter)
list_del(&entry->obj_list);
kfree(entry);
}
spin_unlock_irq(&file->lock);
if (file->is_async) {
ib_unregister_event_handler(&file->uverbs_file->event_handler);
kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
}
kref_put(&file->ref, ib_uverbs_release_event_file);
return 0;
}
static const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
.fasync = ib_uverbs_event_fasync
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct ib_uverbs_event_file *file = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
if (!file)
return;
spin_lock_irqsave(&file->lock, flags);
if (file->is_closed) {
spin_unlock_irqrestore(&file->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->lock, flags);
return;
}
uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
list_add_tail(&entry->list, &file->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
spin_unlock_irqrestore(&file->lock, flags);
wake_up_interruptible(&file->poll_wait);
kill_fasync(&file->async_queue, SIGIO, POLL_IN);
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
__u64 element, __u64 event,
struct list_head *obj_list,
u32 *counter)
{
struct ib_uverbs_event *entry;
unsigned long flags;
spin_lock_irqsave(&file->async_file->lock, flags);
if (file->async_file->is_closed) {
spin_unlock_irqrestore(&file->async_file->lock, flags);
return;
}
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry) {
spin_unlock_irqrestore(&file->async_file->lock, flags);
return;
}
entry->desc.async.element = element;
entry->desc.async.event_type = event;
entry->counter = counter;
list_add_tail(&entry->list, &file->async_file->event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
spin_unlock_irqrestore(&file->async_file->lock, flags);
wake_up_interruptible(&file->async_file->poll_wait);
kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
}
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
struct ib_ucq_object, uobject);
ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
uobj = container_of(event->element.qp->uobject,
struct ib_uevent_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
&uobj->events_reported);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
uobj = container_of(event->element.srq->uobject,
struct ib_uevent_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
&uobj->events_reported);
}
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event)
{
struct ib_uverbs_file *file =
container_of(handler, struct ib_uverbs_file, event_handler);
ib_uverbs_async_handler(file, event->element.port_num, event->event,
NULL, NULL);
}
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
int is_async, int *fd)
{
struct ib_uverbs_event_file *ev_file;
struct file *filp;
int ret;
ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
if (!ev_file)
return ERR_PTR(-ENOMEM);
kref_init(&ev_file->ref);
spin_lock_init(&ev_file->lock);
INIT_LIST_HEAD(&ev_file->event_list);
init_waitqueue_head(&ev_file->poll_wait);
ev_file->uverbs_file = uverbs_file;
ev_file->async_queue = NULL;
ev_file->is_async = is_async;
ev_file->is_closed = 0;
*fd = get_unused_fd();
if (*fd < 0) {
ret = *fd;
goto err;
}
/*
* fops_get() can't fail here, because we're coming from a
* system call on a uverbs file, which will already have a
* module reference.
*/
filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
FMODE_READ, fops_get(&uverbs_event_fops));
if (!filp) {
ret = -ENFILE;
goto err_fd;
}
filp->private_data = ev_file;
return filp;
err_fd:
put_unused_fd(*fd);
err:
kfree(ev_file);
return ERR_PTR(ret);
}
/*
* Look up a completion event file by FD. If lookup is successful,
* takes a ref to the event file struct that it returns; if
* unsuccessful, returns NULL.
*/
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
struct ib_uverbs_event_file *ev_file = NULL;
struct file *filp;
filp = fget(fd);
if (!filp)
return NULL;
if (filp->f_op != &uverbs_event_fops)
goto out;
ev_file = filp->private_data;
if (ev_file->is_async) {
ev_file = NULL;
goto out;
}
kref_get(&ev_file->ref);
out:
fput(filp);
return ev_file;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_cmd_hdr hdr;
if (count < sizeof hdr)
return -EINVAL;
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
if (hdr.in_words * 4 != count)
return -EINVAL;
if (hdr.command < 0 ||
hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[hdr.command])
return -EINVAL;
if (!file->ucontext &&
hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -ENOSYS;
return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
hdr.in_words * 4, hdr.out_words * 4);
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
if (!file->ucontext)
return -ENODEV;
else
return file->device->ib_dev->mmap(file->ucontext, vma);
}
/*
* ib_uverbs_open() does not need the BKL:
*
* - dev_table[] accesses are protected by map_lock, the
* ib_uverbs_device structures are properly reference counted, and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - there is no ioctl method to race against;
* - the device is added to dev_table[] as the last part of module
* initialization, the open method will either immediately run
* -ENXIO, or all required initialization will be done.
*/
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
struct ib_uverbs_device *dev;
struct ib_uverbs_file *file;
int ret;
spin_lock(&map_lock);
dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
if (dev)
kref_get(&dev->ref);
spin_unlock(&map_lock);
if (!dev)
return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) {
ret = -ENODEV;
goto err;
}
file = kmalloc(sizeof *file, GFP_KERNEL);
if (!file) {
ret = -ENOMEM;
goto err_module;
}
file->device = dev;
file->ucontext = NULL;
file->async_file = NULL;
kref_init(&file->ref);
mutex_init(&file->mutex);
filp->private_data = file;
return 0;
err_module:
module_put(dev->ib_dev->owner);
err:
kref_put(&dev->ref, ib_uverbs_release_dev);
return ret;
}
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
ib_uverbs_cleanup_ucontext(file, file->ucontext);
if (file->async_file)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
return 0;
}
static const struct file_operations uverbs_fops = {
.owner = THIS_MODULE,
.write = ib_uverbs_write,
.open = ib_uverbs_open,
.release = ib_uverbs_close
};
static const struct file_operations uverbs_mmap_fops = {
.owner = THIS_MODULE,
.write = ib_uverbs_write,
.mmap = ib_uverbs_mmap,
.open = ib_uverbs_open,
.release = ib_uverbs_close
};
static struct ib_client uverbs_client = {
.name = "uverbs",
.add = ib_uverbs_add_one,
.remove = ib_uverbs_remove_one
};
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_uverbs_device *dev = dev_get_drvdata(device);
if (!dev)
return -ENODEV;
return sprintf(buf, "%s\n", dev->ib_dev->name);
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
static ssize_t show_dev_abi_version(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_uverbs_device *dev = dev_get_drvdata(device);
if (!dev)
return -ENODEV;
return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
}
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
static ssize_t show_abi_version(struct class *class, char *buf)
{
return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static void ib_uverbs_add_one(struct ib_device *device)
{
struct ib_uverbs_device *uverbs_dev;
if (!device->alloc_ucontext)
return;
uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
if (!uverbs_dev)
return;
kref_init(&uverbs_dev->ref);
init_completion(&uverbs_dev->comp);
spin_lock(&map_lock);
uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
spin_unlock(&map_lock);
goto err;
}
set_bit(uverbs_dev->devnum, dev_map);
spin_unlock(&map_lock);
uverbs_dev->ib_dev = device;
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
uverbs_dev->cdev = cdev_alloc();
if (!uverbs_dev->cdev)
goto err;
uverbs_dev->cdev->owner = THIS_MODULE;
uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
goto err_cdev;
uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
uverbs_dev->cdev->dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
goto err_cdev;
if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
goto err_class;
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
spin_lock(&map_lock);
dev_table[uverbs_dev->devnum] = uverbs_dev;
spin_unlock(&map_lock);
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
device_destroy(uverbs_class, uverbs_dev->cdev->dev);
err_cdev:
cdev_del(uverbs_dev->cdev);
clear_bit(uverbs_dev->devnum, dev_map);
err:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
return;
}
static void ib_uverbs_remove_one(struct ib_device *device)
{
struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
if (!uverbs_dev)
return;
dev_set_drvdata(uverbs_dev->dev, NULL);
device_destroy(uverbs_class, uverbs_dev->cdev->dev);
cdev_del(uverbs_dev->cdev);
spin_lock(&map_lock);
dev_table[uverbs_dev->devnum] = NULL;
spin_unlock(&map_lock);
clear_bit(uverbs_dev->devnum, dev_map);
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
kfree(uverbs_dev);
}
static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data,
struct vfsmount *mnt)
{
return get_sb_pseudo(fs_type, "infinibandevent:", NULL,
INFINIBANDEVENTFS_MAGIC, mnt);
}
static struct file_system_type uverbs_event_fs = {
/* No owner field so module can be unloaded */
.name = "infinibandeventfs",
.get_sb = uverbs_event_get_sb,
.kill_sb = kill_litter_super
};
static int __init ib_uverbs_init(void)
{
int ret;
ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
"infiniband_verbs");
if (ret) {
printk(KERN_ERR "user_verbs: couldn't register device number\n");
goto out;
}
uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
if (IS_ERR(uverbs_class)) {
ret = PTR_ERR(uverbs_class);
printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
goto out_chrdev;
}
ret = class_create_file(uverbs_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
goto out_class;
}
ret = register_filesystem(&uverbs_event_fs);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n");
goto out_class;
}
uverbs_event_mnt = kern_mount(&uverbs_event_fs);
if (IS_ERR(uverbs_event_mnt)) {
ret = PTR_ERR(uverbs_event_mnt);
printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n");
goto out_fs;
}
ret = ib_register_client(&uverbs_client);
if (ret) {
printk(KERN_ERR "user_verbs: couldn't register client\n");
goto out_mnt;
}
return 0;
out_mnt:
mntput(uverbs_event_mnt);
out_fs:
unregister_filesystem(&uverbs_event_fs);
out_class:
class_destroy(uverbs_class);
out_chrdev:
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
out:
return ret;
}
static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
mntput(uverbs_event_mnt);
unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);
idr_destroy(&ib_uverbs_ah_idr);
idr_destroy(&ib_uverbs_cq_idr);
idr_destroy(&ib_uverbs_qp_idr);
idr_destroy(&ib_uverbs_srq_idr);
}
module_init(ib_uverbs_init);
module_exit(ib_uverbs_cleanup);

View File

@@ -0,0 +1,139 @@
/*
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
struct ib_ah_attr *src)
{
memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
dst->grh.flow_label = src->grh.flow_label;
dst->grh.sgid_index = src->grh.sgid_index;
dst->grh.hop_limit = src->grh.hop_limit;
dst->grh.traffic_class = src->grh.traffic_class;
dst->dlid = src->dlid;
dst->sl = src->sl;
dst->src_path_bits = src->src_path_bits;
dst->static_rate = src->static_rate;
dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
dst->port_num = src->port_num;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
dst->cur_qp_state = src->cur_qp_state;
dst->path_mtu = src->path_mtu;
dst->path_mig_state = src->path_mig_state;
dst->qkey = src->qkey;
dst->rq_psn = src->rq_psn;
dst->sq_psn = src->sq_psn;
dst->dest_qp_num = src->dest_qp_num;
dst->qp_access_flags = src->qp_access_flags;
dst->max_send_wr = src->cap.max_send_wr;
dst->max_recv_wr = src->cap.max_recv_wr;
dst->max_send_sge = src->cap.max_send_sge;
dst->max_recv_sge = src->cap.max_recv_sge;
dst->max_inline_data = src->cap.max_inline_data;
ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
dst->pkey_index = src->pkey_index;
dst->alt_pkey_index = src->alt_pkey_index;
dst->en_sqd_async_notify = src->en_sqd_async_notify;
dst->sq_draining = src->sq_draining;
dst->max_rd_atomic = src->max_rd_atomic;
dst->max_dest_rd_atomic = src->max_dest_rd_atomic;
dst->min_rnr_timer = src->min_rnr_timer;
dst->port_num = src->port_num;
dst->timeout = src->timeout;
dst->retry_cnt = src->retry_cnt;
dst->rnr_retry = src->rnr_retry;
dst->alt_port_num = src->alt_port_num;
dst->alt_timeout = src->alt_timeout;
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
struct ib_sa_path_rec *src)
{
memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
dst->dlid = src->dlid;
dst->slid = src->slid;
dst->raw_traffic = src->raw_traffic;
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
dst->reversible = src->reversible;
dst->numb_path = src->numb_path;
dst->pkey = src->pkey;
dst->sl = src->sl;
dst->mtu_selector = src->mtu_selector;
dst->mtu = src->mtu;
dst->rate_selector = src->rate_selector;
dst->rate = src->rate;
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
struct ib_user_path_rec *src)
{
memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
dst->dlid = src->dlid;
dst->slid = src->slid;
dst->raw_traffic = src->raw_traffic;
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
dst->reversible = src->reversible;
dst->numb_path = src->numb_path;
dst->pkey = src->pkey;
dst->sl = src->sl;
dst->mtu_selector = src->mtu_selector;
dst->mtu = src->mtu;
dst->rate_selector = src->rate_selector;
dst->rate = src->rate;
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);

View File

@@ -0,0 +1,906 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/string.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 1;
case IB_RATE_5_GBPS: return 2;
case IB_RATE_10_GBPS: return 4;
case IB_RATE_20_GBPS: return 8;
case IB_RATE_30_GBPS: return 12;
case IB_RATE_40_GBPS: return 16;
case IB_RATE_60_GBPS: return 24;
case IB_RATE_80_GBPS: return 32;
case IB_RATE_120_GBPS: return 48;
default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mult);
enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
case 1: return IB_RATE_2_5_GBPS;
case 2: return IB_RATE_5_GBPS;
case 4: return IB_RATE_10_GBPS;
case 8: return IB_RATE_20_GBPS;
case 12: return IB_RATE_30_GBPS;
case 16: return IB_RATE_40_GBPS;
case 24: return IB_RATE_60_GBPS;
case 32: return IB_RATE_80_GBPS;
case 48: return IB_RATE_120_GBPS;
default: return IB_RATE_PORT_CURRENT;
}
}
EXPORT_SYMBOL(mult_to_ib_rate);
enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
switch (node_type) {
case RDMA_NODE_IB_CA:
case RDMA_NODE_IB_SWITCH:
case RDMA_NODE_IB_ROUTER:
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
default:
BUG();
return 0;
}
}
EXPORT_SYMBOL(rdma_node_get_transport);
/* Protection domains */
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
pd = device->alloc_pd(device, NULL, NULL);
if (!IS_ERR(pd)) {
pd->device = device;
pd->uobject = NULL;
atomic_set(&pd->usecnt, 0);
}
return pd;
}
EXPORT_SYMBOL(ib_alloc_pd);
int ib_dealloc_pd(struct ib_pd *pd)
{
if (atomic_read(&pd->usecnt))
return -EBUSY;
return pd->device->dealloc_pd(pd);
}
EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct ib_ah *ah;
ah = pd->device->create_ah(pd, ah_attr);
if (!IS_ERR(ah)) {
ah->device = pd->device;
ah->pd = pd;
ah->uobject = NULL;
atomic_inc(&pd->usecnt);
}
return ah;
}
EXPORT_SYMBOL(ib_create_ah);
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
struct ib_grh *grh, struct ib_ah_attr *ah_attr)
{
u32 flow_class;
u16 gid_index;
int ret;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
ah_attr->port_num = port_num;
if (wc->wc_flags & IB_WC_GRH) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = grh->sgid;
ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
&gid_index);
if (ret)
return ret;
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
ah_attr->grh.flow_label = flow_class & 0xFFFFF;
ah_attr->grh.hop_limit = 0xFF;
ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
}
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_wc);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
struct ib_grh *grh, u8 port_num)
{
struct ib_ah_attr ah_attr;
int ret;
ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
return ib_create_ah(pd, &ah_attr);
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
{
return ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_modify_ah);
int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
{
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_query_ah);
int ib_destroy_ah(struct ib_ah *ah)
{
struct ib_pd *pd;
int ret;
pd = ah->pd;
ret = ah->device->destroy_ah(ah);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_destroy_ah);
/* Shared receive queues */
struct ib_srq *ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr)
{
struct ib_srq *srq;
if (!pd->device->create_srq)
return ERR_PTR(-ENOSYS);
srq = pd->device->create_srq(pd, srq_init_attr, NULL);
if (!IS_ERR(srq)) {
srq->device = pd->device;
srq->pd = pd;
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
return srq;
}
EXPORT_SYMBOL(ib_create_srq);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask)
{
return srq->device->modify_srq ?
srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_modify_srq);
int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
return srq->device->query_srq ?
srq->device->query_srq(srq, srq_attr) : -ENOSYS;
}
EXPORT_SYMBOL(ib_query_srq);
int ib_destroy_srq(struct ib_srq *srq)
{
struct ib_pd *pd;
int ret;
if (atomic_read(&srq->usecnt))
return -EBUSY;
pd = srq->pd;
ret = srq->device->destroy_srq(srq);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_destroy_srq);
/* Queue pairs */
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *qp;
qp = pd->device->create_qp(pd, qp_init_attr, NULL);
if (!IS_ERR(qp)) {
qp->device = pd->device;
qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->recv_cq = qp_init_attr->recv_cq;
qp->srq = qp_init_attr->srq;
qp->uobject = NULL;
qp->event_handler = qp_init_attr->event_handler;
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
atomic_inc(&qp_init_attr->recv_cq->usecnt);
if (qp_init_attr->srq)
atomic_inc(&qp_init_attr->srq->usecnt);
}
return qp;
}
EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1];
enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_INIT] = {
.valid = 1,
.req_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
}
},
},
[IB_QPS_INIT] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_INIT] = {
.valid = 1,
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
}
},
[IB_QPS_RTR] = {
.valid = 1,
.req_param = {
[IB_QPT_UC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
IB_QP_RQ_PSN),
[IB_QPT_RC] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_RC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
}
}
},
[IB_QPS_RTR] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_RTS] = {
.valid = 1,
.req_param = {
[IB_QPT_UD] = IB_QP_SQ_PSN,
[IB_QPT_UC] = IB_QP_SQ_PSN,
[IB_QPT_RC] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
[IB_QPT_SMI] = IB_QP_SQ_PSN,
[IB_QPT_GSI] = IB_QP_SQ_PSN,
},
.opt_param = {
[IB_QPT_UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PATH_MIG_STATE),
[IB_QPT_RC] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
}
}
},
[IB_QPS_RTS] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_RTS] = {
.valid = 1,
.opt_param = {
[IB_QPT_UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE),
[IB_QPT_RC] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
IB_QP_MIN_RNR_TIMER),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
}
},
[IB_QPS_SQD] = {
.valid = 1,
.opt_param = {
[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
}
},
},
[IB_QPS_SQD] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_RTS] = {
.valid = 1,
.opt_param = {
[IB_QPT_UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PATH_MIG_STATE),
[IB_QPT_RC] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
}
},
[IB_QPS_SQD] = {
.valid = 1,
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_AV |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX |
IB_QP_PATH_MIG_STATE),
[IB_QPT_RC] = (IB_QP_PORT |
IB_QP_AV |
IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
}
}
},
[IB_QPS_SQE] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_ERR] = { .valid = 1 },
[IB_QPS_RTS] = {
.valid = 1,
.opt_param = {
[IB_QPT_UD] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_UC] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
}
}
},
[IB_QPS_ERR] = {
[IB_QPS_RESET] = { .valid = 1 },
[IB_QPS_ERR] = { .valid = 1 }
}
};
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask)
{
enum ib_qp_attr_mask req_param, opt_param;
if (cur_state < 0 || cur_state > IB_QPS_ERR ||
next_state < 0 || next_state > IB_QPS_ERR)
return 0;
if (mask & IB_QP_CUR_STATE &&
cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
return 0;
if (!qp_state_table[cur_state][next_state].valid)
return 0;
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if ((mask & req_param) != req_param)
return 0;
if (mask & ~(req_param | opt_param | IB_QP_STATE))
return 0;
return 1;
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
int ib_query_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
return qp->device->query_qp ?
qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_query_qp);
int ib_destroy_qp(struct ib_qp *qp)
{
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
int ret;
pd = qp->pd;
scq = qp->send_cq;
rcq = qp->recv_cq;
srq = qp->srq;
ret = qp->device->destroy_qp(qp);
if (!ret) {
atomic_dec(&pd->usecnt);
atomic_dec(&scq->usecnt);
atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
}
return ret;
}
EXPORT_SYMBOL(ib_destroy_qp);
/* Completion queues */
struct ib_cq *ib_create_cq(struct ib_device *device,
ib_comp_handler comp_handler,
void (*event_handler)(struct ib_event *, void *),
void *cq_context, int cqe, int comp_vector)
{
struct ib_cq *cq;
cq = device->create_cq(device, cqe, comp_vector, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
cq->uobject = NULL;
cq->comp_handler = comp_handler;
cq->event_handler = event_handler;
cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0);
}
return cq;
}
EXPORT_SYMBOL(ib_create_cq);
int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
return cq->device->modify_cq ?
cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
}
EXPORT_SYMBOL(ib_modify_cq);
int ib_destroy_cq(struct ib_cq *cq)
{
if (atomic_read(&cq->usecnt))
return -EBUSY;
return cq->device->destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
return cq->device->resize_cq ?
cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
}
EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
{
struct ib_mr *mr;
mr = pd->device->get_dma_mr(pd, mr_access_flags);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_get_dma_mr);
struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start)
{
struct ib_mr *mr;
if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
mr_access_flags, iova_start);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_reg_phys_mr);
int ib_rereg_phys_mr(struct ib_mr *mr,
int mr_rereg_mask,
struct ib_pd *pd,
struct ib_phys_buf *phys_buf_array,
int num_phys_buf,
int mr_access_flags,
u64 *iova_start)
{
struct ib_pd *old_pd;
int ret;
if (!mr->device->rereg_phys_mr)
return -ENOSYS;
if (atomic_read(&mr->usecnt))
return -EBUSY;
old_pd = mr->pd;
ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
phys_buf_array, num_phys_buf,
mr_access_flags, iova_start);
if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) {
atomic_dec(&old_pd->usecnt);
atomic_inc(&pd->usecnt);
}
return ret;
}
EXPORT_SYMBOL(ib_rereg_phys_mr);
int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
{
return mr->device->query_mr ?
mr->device->query_mr(mr, mr_attr) : -ENOSYS;
}
EXPORT_SYMBOL(ib_query_mr);
int ib_dereg_mr(struct ib_mr *mr)
{
struct ib_pd *pd;
int ret;
if (atomic_read(&mr->usecnt))
return -EBUSY;
pd = mr->pd;
ret = mr->device->dereg_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_dereg_mr);
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
if (!pd->device->alloc_fast_reg_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
int max_page_list_len)
{
struct ib_fast_reg_page_list *page_list;
if (!device->alloc_fast_reg_page_list)
return ERR_PTR(-ENOSYS);
page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
if (!IS_ERR(page_list)) {
page_list->device = device;
page_list->max_page_list_len = max_page_list_len;
}
return page_list;
}
EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
{
page_list->device->free_fast_reg_page_list(page_list);
}
EXPORT_SYMBOL(ib_free_fast_reg_page_list);
/* Memory windows */
struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
{
struct ib_mw *mw;
if (!pd->device->alloc_mw)
return ERR_PTR(-ENOSYS);
mw = pd->device->alloc_mw(pd);
if (!IS_ERR(mw)) {
mw->device = pd->device;
mw->pd = pd;
mw->uobject = NULL;
atomic_inc(&pd->usecnt);
}
return mw;
}
EXPORT_SYMBOL(ib_alloc_mw);
int ib_dealloc_mw(struct ib_mw *mw)
{
struct ib_pd *pd;
int ret;
pd = mw->pd;
ret = mw->device->dealloc_mw(mw);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_dealloc_mw);
/* "Fast" memory regions */
struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
struct ib_fmr *fmr;
if (!pd->device->alloc_fmr)
return ERR_PTR(-ENOSYS);
fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
if (!IS_ERR(fmr)) {
fmr->device = pd->device;
fmr->pd = pd;
atomic_inc(&pd->usecnt);
}
return fmr;
}
EXPORT_SYMBOL(ib_alloc_fmr);
int ib_unmap_fmr(struct list_head *fmr_list)
{
struct ib_fmr *fmr;
if (list_empty(fmr_list))
return 0;
fmr = list_entry(fmr_list->next, struct ib_fmr, list);
return fmr->device->unmap_fmr(fmr_list);
}
EXPORT_SYMBOL(ib_unmap_fmr);
int ib_dealloc_fmr(struct ib_fmr *fmr)
{
struct ib_pd *pd;
int ret;
pd = fmr->pd;
ret = fmr->device->dealloc_fmr(fmr);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_dealloc_fmr);
/* Multicast groups */
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
if (!qp->device->attach_mcast)
return -ENOSYS;
if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
return -EINVAL;
return qp->device->attach_mcast(qp, gid, lid);
}
EXPORT_SYMBOL(ib_attach_mcast);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
if (!qp->device->detach_mcast)
return -ENOSYS;
if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
return -EINVAL;
return qp->device->detach_mcast(qp, gid, lid);
}
EXPORT_SYMBOL(ib_detach_mcast);