add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

36
kernel/net/9p/Kconfig Normal file
View File

@@ -0,0 +1,36 @@
#
# 9P protocol configuration
#
menuconfig NET_9P
depends on NET && EXPERIMENTAL
tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
See <http://v9fs.sf.net> for more information.
If unsure, say N.
if NET_9P
config NET_9P_VIRTIO
depends on EXPERIMENTAL && VIRTIO
tristate "9P Virtio Transport (Experimental)"
help
This builds support for a transports between
guest partitions and a host partition.
config NET_9P_RDMA
depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
tristate "9P RDMA Transport (Experimental)"
help
This builds support for an RDMA transport.
config NET_9P_DEBUG
bool "Debug information"
help
Say Y if you want the 9P subsystem to log debug information.
endif

17
kernel/net/9p/Makefile Normal file
View File

@@ -0,0 +1,17 @@
obj-$(CONFIG_NET_9P) := 9pnet.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9pnet-objs := \
mod.o \
client.o \
error.o \
util.o \
protocol.o \
trans_fd.o \
9pnet_virtio-objs := \
trans_virtio.o \
9pnet_rdma-objs := \
trans_rdma.o \

1273
kernel/net/9p/client.c Normal file

File diff suppressed because it is too large Load Diff

247
kernel/net/9p/error.c Normal file
View File

@@ -0,0 +1,247 @@
/*
* linux/fs/9p/error.c
*
* Error string handling
*
* Plan 9 uses error strings, Unix uses error numbers. These functions
* try to help manage that and provide for dynamically adding error
* mappings.
*
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/errno.h>
#include <net/9p/9p.h>
/**
* struct errormap - map string errors from Plan 9 to Linux numeric ids
* @name: string sent over 9P
* @val: numeric id most closely representing @name
* @namelen: length of string
* @list: hash-table list for string lookup
*/
struct errormap {
char *name;
int val;
int namelen;
struct hlist_node list;
};
#define ERRHASHSZ 32
static struct hlist_head hash_errmap[ERRHASHSZ];
/* FixMe - reduce to a reasonable size */
static struct errormap errmap[] = {
{"Operation not permitted", EPERM},
{"wstat prohibited", EPERM},
{"No such file or directory", ENOENT},
{"directory entry not found", ENOENT},
{"file not found", ENOENT},
{"Interrupted system call", EINTR},
{"Input/output error", EIO},
{"No such device or address", ENXIO},
{"Argument list too long", E2BIG},
{"Bad file descriptor", EBADF},
{"Resource temporarily unavailable", EAGAIN},
{"Cannot allocate memory", ENOMEM},
{"Permission denied", EACCES},
{"Bad address", EFAULT},
{"Block device required", ENOTBLK},
{"Device or resource busy", EBUSY},
{"File exists", EEXIST},
{"Invalid cross-device link", EXDEV},
{"No such device", ENODEV},
{"Not a directory", ENOTDIR},
{"Is a directory", EISDIR},
{"Invalid argument", EINVAL},
{"Too many open files in system", ENFILE},
{"Too many open files", EMFILE},
{"Text file busy", ETXTBSY},
{"File too large", EFBIG},
{"No space left on device", ENOSPC},
{"Illegal seek", ESPIPE},
{"Read-only file system", EROFS},
{"Too many links", EMLINK},
{"Broken pipe", EPIPE},
{"Numerical argument out of domain", EDOM},
{"Numerical result out of range", ERANGE},
{"Resource deadlock avoided", EDEADLK},
{"File name too long", ENAMETOOLONG},
{"No locks available", ENOLCK},
{"Function not implemented", ENOSYS},
{"Directory not empty", ENOTEMPTY},
{"Too many levels of symbolic links", ELOOP},
{"No message of desired type", ENOMSG},
{"Identifier removed", EIDRM},
{"No data available", ENODATA},
{"Machine is not on the network", ENONET},
{"Package not installed", ENOPKG},
{"Object is remote", EREMOTE},
{"Link has been severed", ENOLINK},
{"Communication error on send", ECOMM},
{"Protocol error", EPROTO},
{"Bad message", EBADMSG},
{"File descriptor in bad state", EBADFD},
{"Streams pipe error", ESTRPIPE},
{"Too many users", EUSERS},
{"Socket operation on non-socket", ENOTSOCK},
{"Message too long", EMSGSIZE},
{"Protocol not available", ENOPROTOOPT},
{"Protocol not supported", EPROTONOSUPPORT},
{"Socket type not supported", ESOCKTNOSUPPORT},
{"Operation not supported", EOPNOTSUPP},
{"Protocol family not supported", EPFNOSUPPORT},
{"Network is down", ENETDOWN},
{"Network is unreachable", ENETUNREACH},
{"Network dropped connection on reset", ENETRESET},
{"Software caused connection abort", ECONNABORTED},
{"Connection reset by peer", ECONNRESET},
{"No buffer space available", ENOBUFS},
{"Transport endpoint is already connected", EISCONN},
{"Transport endpoint is not connected", ENOTCONN},
{"Cannot send after transport endpoint shutdown", ESHUTDOWN},
{"Connection timed out", ETIMEDOUT},
{"Connection refused", ECONNREFUSED},
{"Host is down", EHOSTDOWN},
{"No route to host", EHOSTUNREACH},
{"Operation already in progress", EALREADY},
{"Operation now in progress", EINPROGRESS},
{"Is a named type file", EISNAM},
{"Remote I/O error", EREMOTEIO},
{"Disk quota exceeded", EDQUOT},
/* errors from fossil, vacfs, and u9fs */
{"fid unknown or out of range", EBADF},
{"permission denied", EACCES},
{"file does not exist", ENOENT},
{"authentication failed", ECONNREFUSED},
{"bad offset in directory read", ESPIPE},
{"bad use of fid", EBADF},
{"wstat can't convert between files and directories", EPERM},
{"directory is not empty", ENOTEMPTY},
{"file exists", EEXIST},
{"file already exists", EEXIST},
{"file or directory already exists", EEXIST},
{"fid already in use", EBADF},
{"file in use", ETXTBSY},
{"i/o error", EIO},
{"file already open for I/O", ETXTBSY},
{"illegal mode", EINVAL},
{"illegal name", ENAMETOOLONG},
{"not a directory", ENOTDIR},
{"not a member of proposed group", EPERM},
{"not owner", EACCES},
{"only owner can change group in wstat", EACCES},
{"read only file system", EROFS},
{"no access to special file", EPERM},
{"i/o count too large", EIO},
{"unknown group", EINVAL},
{"unknown user", EINVAL},
{"bogus wstat buffer", EPROTO},
{"exclusive use file already open", EAGAIN},
{"corrupted directory entry", EIO},
{"corrupted file entry", EIO},
{"corrupted block label", EIO},
{"corrupted meta data", EIO},
{"illegal offset", EINVAL},
{"illegal path element", ENOENT},
{"root of file system is corrupted", EIO},
{"corrupted super block", EIO},
{"protocol botch", EPROTO},
{"file system is full", ENOSPC},
{"file is in use", EAGAIN},
{"directory entry is not allocated", ENOENT},
{"file is read only", EROFS},
{"file has been removed", EIDRM},
{"only support truncation to zero length", EPERM},
{"cannot remove root", EPERM},
{"file too big", EFBIG},
{"venti i/o error", EIO},
/* these are not errors */
{"u9fs rhostsauth: no authentication required", 0},
{"u9fs authnone: no authentication required", 0},
{NULL, -1}
};
/**
* p9_error_init - preload mappings into hash list
*
*/
int p9_error_init(void)
{
struct errormap *c;
int bucket;
/* initialize hash table */
for (bucket = 0; bucket < ERRHASHSZ; bucket++)
INIT_HLIST_HEAD(&hash_errmap[bucket]);
/* load initial error map into hash table */
for (c = errmap; c->name != NULL; c++) {
c->namelen = strlen(c->name);
bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
INIT_HLIST_NODE(&c->list);
hlist_add_head(&c->list, &hash_errmap[bucket]);
}
return 1;
}
EXPORT_SYMBOL(p9_error_init);
/**
* errstr2errno - convert error string to error number
* @errstr: error string
* @len: length of error string
*
*/
int p9_errstr2errno(char *errstr, int len)
{
int errno;
struct hlist_node *p;
struct errormap *c;
int bucket;
errno = 0;
p = NULL;
c = NULL;
bucket = jhash(errstr, len, 0) % ERRHASHSZ;
hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
if (c->namelen == len && !memcmp(c->name, errstr, len)) {
errno = c->val;
break;
}
}
if (errno == 0) {
/* TODO: if error isn't found, add it dynamically */
errstr[len] = 0;
printk(KERN_ERR "%s: server reported unknown error %s\n",
__func__, errstr);
errno = ESERVERFAULT;
}
return -errno;
}
EXPORT_SYMBOL(p9_errstr2errno);

174
kernel/net/9p/mod.c Normal file
View File

@@ -0,0 +1,174 @@
/*
* net/9p/9p.c
*
* 9P entry point
*
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <net/9p/9p.h>
#include <linux/fs.h>
#include <linux/parser.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#ifdef CONFIG_NET_9P_DEBUG
unsigned int p9_debug_level = 0; /* feature-rific global debug level */
EXPORT_SYMBOL(p9_debug_level);
module_param_named(debug, p9_debug_level, uint, 0);
MODULE_PARM_DESC(debug, "9P debugging level");
#endif
/*
* Dynamic Transport Registration Routines
*
*/
static DEFINE_SPINLOCK(v9fs_trans_lock);
static LIST_HEAD(v9fs_trans_list);
/**
* v9fs_register_trans - register a new transport with 9p
* @m: structure describing the transport module and entry points
*
*/
void v9fs_register_trans(struct p9_trans_module *m)
{
spin_lock(&v9fs_trans_lock);
list_add_tail(&m->list, &v9fs_trans_list);
spin_unlock(&v9fs_trans_lock);
}
EXPORT_SYMBOL(v9fs_register_trans);
/**
* v9fs_unregister_trans - unregister a 9p transport
* @m: the transport to remove
*
*/
void v9fs_unregister_trans(struct p9_trans_module *m)
{
spin_lock(&v9fs_trans_lock);
list_del_init(&m->list);
spin_unlock(&v9fs_trans_lock);
}
EXPORT_SYMBOL(v9fs_unregister_trans);
/**
* v9fs_get_trans_by_name - get transport with the matching name
* @name: string identifying transport
*
*/
struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name)
{
struct p9_trans_module *t, *found = NULL;
spin_lock(&v9fs_trans_lock);
list_for_each_entry(t, &v9fs_trans_list, list)
if (strncmp(t->name, name->from, name->to-name->from) == 0 &&
try_module_get(t->owner)) {
found = t;
break;
}
spin_unlock(&v9fs_trans_lock);
return found;
}
EXPORT_SYMBOL(v9fs_get_trans_by_name);
/**
* v9fs_get_default_trans - get the default transport
*
*/
struct p9_trans_module *v9fs_get_default_trans(void)
{
struct p9_trans_module *t, *found = NULL;
spin_lock(&v9fs_trans_lock);
list_for_each_entry(t, &v9fs_trans_list, list)
if (t->def && try_module_get(t->owner)) {
found = t;
break;
}
if (!found)
list_for_each_entry(t, &v9fs_trans_list, list)
if (try_module_get(t->owner)) {
found = t;
break;
}
spin_unlock(&v9fs_trans_lock);
return found;
}
EXPORT_SYMBOL(v9fs_get_default_trans);
/**
* v9fs_put_trans - put trans
* @m: transport to put
*
*/
void v9fs_put_trans(struct p9_trans_module *m)
{
if (m)
module_put(m->owner);
}
/**
* v9fs_init - Initialize module
*
*/
static int __init init_p9(void)
{
int ret = 0;
p9_error_init();
printk(KERN_INFO "Installing 9P2000 support\n");
p9_trans_fd_init();
return ret;
}
/**
* v9fs_init - shutdown module
*
*/
static void __exit exit_p9(void)
{
printk(KERN_INFO "Unloading 9P2000 support\n");
p9_trans_fd_exit();
}
module_init(init_p9)
module_exit(exit_p9)
MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
MODULE_LICENSE("GPL");

571
kernel/net/9p/protocol.c Normal file
View File

@@ -0,0 +1,571 @@
/*
* net/9p/protocol.c
*
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
*
* Base on code from Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2008 by IBM, Corp.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/uaccess.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include "protocol.h"
#ifndef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#endif
#ifndef MAX
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef offset_of
#define offset_of(type, memb) \
((unsigned long)(&((type *)0)->memb))
#endif
#ifndef container_of
#define container_of(obj, type, memb) \
((type *)(((char *)obj) - offset_of(type, memb)))
#endif
static int
p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
#ifdef CONFIG_NET_9P_DEBUG
void
p9pdu_dump(int way, struct p9_fcall *pdu)
{
int i, n;
u8 *data = pdu->sdata;
int datalen = pdu->size;
char buf[255];
int buflen = 255;
i = n = 0;
if (datalen > (buflen-16))
datalen = buflen-16;
while (i < datalen) {
n += scnprintf(buf + n, buflen - n, "%02x ", data[i]);
if (i%4 == 3)
n += scnprintf(buf + n, buflen - n, " ");
if (i%32 == 31)
n += scnprintf(buf + n, buflen - n, "\n");
i++;
}
n += scnprintf(buf + n, buflen - n, "\n");
if (way)
P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf);
else
P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
}
#else
void
p9pdu_dump(int way, struct p9_fcall *pdu)
{
}
#endif
EXPORT_SYMBOL(p9pdu_dump);
void p9stat_free(struct p9_wstat *stbuf)
{
kfree(stbuf->name);
kfree(stbuf->uid);
kfree(stbuf->gid);
kfree(stbuf->muid);
kfree(stbuf->extension);
}
EXPORT_SYMBOL(p9stat_free);
static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
{
size_t len = MIN(pdu->size - pdu->offset, size);
memcpy(data, &pdu->sdata[pdu->offset], len);
pdu->offset += len;
return size - len;
}
static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
{
size_t len = MIN(pdu->capacity - pdu->size, size);
memcpy(&pdu->sdata[pdu->size], data, len);
pdu->size += len;
return size - len;
}
static size_t
pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
{
size_t len = MIN(pdu->capacity - pdu->size, size);
int err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
if (err)
printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
pdu->size += len;
return size - len;
}
/*
b - int8_t
w - int16_t
d - int32_t
q - int64_t
s - string
S - stat
Q - qid
D - data blob (int32_t size followed by void *, results are not freed)
T - array of strings (int16_t count, followed by strings)
R - array of qids (int16_t count, followed by qids)
? - if optional = 1, continue parsing
*/
static int
p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
{
const char *ptr;
int errcode = 0;
for (ptr = fmt; *ptr; ptr++) {
switch (*ptr) {
case 'b':{
int8_t *val = va_arg(ap, int8_t *);
if (pdu_read(pdu, val, sizeof(*val))) {
errcode = -EFAULT;
break;
}
}
break;
case 'w':{
int16_t *val = va_arg(ap, int16_t *);
__le16 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le16_to_cpu(le_val);
}
break;
case 'd':{
int32_t *val = va_arg(ap, int32_t *);
__le32 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le32_to_cpu(le_val);
}
break;
case 'q':{
int64_t *val = va_arg(ap, int64_t *);
__le64 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le64_to_cpu(le_val);
}
break;
case 's':{
char **sptr = va_arg(ap, char **);
int16_t len;
int size;
errcode = p9pdu_readf(pdu, optional, "w", &len);
if (errcode)
break;
size = MAX(len, 0);
*sptr = kmalloc(size + 1, GFP_KERNEL);
if (*sptr == NULL) {
errcode = -EFAULT;
break;
}
if (pdu_read(pdu, *sptr, size)) {
errcode = -EFAULT;
kfree(*sptr);
*sptr = NULL;
} else
(*sptr)[size] = 0;
}
break;
case 'Q':{
struct p9_qid *qid =
va_arg(ap, struct p9_qid *);
errcode = p9pdu_readf(pdu, optional, "bdq",
&qid->type, &qid->version,
&qid->path);
}
break;
case 'S':{
struct p9_wstat *stbuf =
va_arg(ap, struct p9_wstat *);
memset(stbuf, 0, sizeof(struct p9_wstat));
stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
-1;
errcode =
p9pdu_readf(pdu, optional,
"wwdQdddqssss?sddd",
&stbuf->size, &stbuf->type,
&stbuf->dev, &stbuf->qid,
&stbuf->mode, &stbuf->atime,
&stbuf->mtime, &stbuf->length,
&stbuf->name, &stbuf->uid,
&stbuf->gid, &stbuf->muid,
&stbuf->extension,
&stbuf->n_uid, &stbuf->n_gid,
&stbuf->n_muid);
if (errcode)
p9stat_free(stbuf);
}
break;
case 'D':{
int32_t *count = va_arg(ap, int32_t *);
void **data = va_arg(ap, void **);
errcode =
p9pdu_readf(pdu, optional, "d", count);
if (!errcode) {
*count =
MIN(*count,
pdu->size - pdu->offset);
*data = &pdu->sdata[pdu->offset];
}
}
break;
case 'T':{
int16_t *nwname = va_arg(ap, int16_t *);
char ***wnames = va_arg(ap, char ***);
errcode =
p9pdu_readf(pdu, optional, "w", nwname);
if (!errcode) {
*wnames =
kmalloc(sizeof(char *) * *nwname,
GFP_KERNEL);
if (!*wnames)
errcode = -ENOMEM;
}
if (!errcode) {
int i;
for (i = 0; i < *nwname; i++) {
errcode =
p9pdu_readf(pdu, optional,
"s",
&(*wnames)[i]);
if (errcode)
break;
}
}
if (errcode) {
if (*wnames) {
int i;
for (i = 0; i < *nwname; i++)
kfree((*wnames)[i]);
}
kfree(*wnames);
*wnames = NULL;
}
}
break;
case 'R':{
int16_t *nwqid = va_arg(ap, int16_t *);
struct p9_qid **wqids =
va_arg(ap, struct p9_qid **);
*wqids = NULL;
errcode =
p9pdu_readf(pdu, optional, "w", nwqid);
if (!errcode) {
*wqids =
kmalloc(*nwqid *
sizeof(struct p9_qid),
GFP_KERNEL);
if (*wqids == NULL)
errcode = -ENOMEM;
}
if (!errcode) {
int i;
for (i = 0; i < *nwqid; i++) {
errcode =
p9pdu_readf(pdu, optional,
"Q",
&(*wqids)[i]);
if (errcode)
break;
}
}
if (errcode) {
kfree(*wqids);
*wqids = NULL;
}
}
break;
case '?':
if (!optional)
return 0;
break;
default:
BUG();
break;
}
if (errcode)
break;
}
return errcode;
}
int
p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
{
const char *ptr;
int errcode = 0;
for (ptr = fmt; *ptr; ptr++) {
switch (*ptr) {
case 'b':{
int8_t val = va_arg(ap, int);
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'w':{
__le16 val = cpu_to_le16(va_arg(ap, int));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'd':{
__le32 val = cpu_to_le32(va_arg(ap, int32_t));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'q':{
__le64 val = cpu_to_le64(va_arg(ap, int64_t));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 's':{
const char *sptr = va_arg(ap, const char *);
int16_t len = 0;
if (sptr)
len = MIN(strlen(sptr), USHORT_MAX);
errcode = p9pdu_writef(pdu, optional, "w", len);
if (!errcode && pdu_write(pdu, sptr, len))
errcode = -EFAULT;
}
break;
case 'Q':{
const struct p9_qid *qid =
va_arg(ap, const struct p9_qid *);
errcode =
p9pdu_writef(pdu, optional, "bdq",
qid->type, qid->version,
qid->path);
} break;
case 'S':{
const struct p9_wstat *stbuf =
va_arg(ap, const struct p9_wstat *);
errcode =
p9pdu_writef(pdu, optional,
"wwdQdddqssss?sddd",
stbuf->size, stbuf->type,
stbuf->dev, &stbuf->qid,
stbuf->mode, stbuf->atime,
stbuf->mtime, stbuf->length,
stbuf->name, stbuf->uid,
stbuf->gid, stbuf->muid,
stbuf->extension, stbuf->n_uid,
stbuf->n_gid, stbuf->n_muid);
} break;
case 'D':{
int32_t count = va_arg(ap, int32_t);
const void *data = va_arg(ap, const void *);
errcode =
p9pdu_writef(pdu, optional, "d", count);
if (!errcode && pdu_write(pdu, data, count))
errcode = -EFAULT;
}
break;
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
va_arg(ap, const void __user *);
errcode =
p9pdu_writef(pdu, optional, "d", count);
if (!errcode && pdu_write_u(pdu, udata, count))
errcode = -EFAULT;
}
break;
case 'T':{
int16_t nwname = va_arg(ap, int);
const char **wnames = va_arg(ap, const char **);
errcode =
p9pdu_writef(pdu, optional, "w", nwname);
if (!errcode) {
int i;
for (i = 0; i < nwname; i++) {
errcode =
p9pdu_writef(pdu, optional,
"s",
wnames[i]);
if (errcode)
break;
}
}
}
break;
case 'R':{
int16_t nwqid = va_arg(ap, int);
struct p9_qid *wqids =
va_arg(ap, struct p9_qid *);
errcode =
p9pdu_writef(pdu, optional, "w", nwqid);
if (!errcode) {
int i;
for (i = 0; i < nwqid; i++) {
errcode =
p9pdu_writef(pdu, optional,
"Q",
&wqids[i]);
if (errcode)
break;
}
}
}
break;
case '?':
if (!optional)
return 0;
break;
default:
BUG();
break;
}
if (errcode)
break;
}
return errcode;
}
int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = p9pdu_vreadf(pdu, optional, fmt, ap);
va_end(ap);
return ret;
}
static int
p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = p9pdu_vwritef(pdu, optional, fmt, ap);
va_end(ap);
return ret;
}
int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
{
struct p9_fcall fake_pdu;
int ret;
fake_pdu.size = len;
fake_pdu.capacity = len;
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
if (ret) {
P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
p9pdu_dump(1, &fake_pdu);
}
return ret;
}
EXPORT_SYMBOL(p9stat_read);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
{
return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
}
int p9pdu_finalize(struct p9_fcall *pdu)
{
int size = pdu->size;
int err;
pdu->size = 0;
err = p9pdu_writef(pdu, 0, "d", size);
pdu->size = size;
#ifdef CONFIG_NET_9P_DEBUG
if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
p9pdu_dump(0, pdu);
#endif
P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
pdu->id, pdu->tag);
return err;
}
void p9pdu_reset(struct p9_fcall *pdu)
{
pdu->offset = 0;
pdu->size = 0;
}

34
kernel/net/9p/protocol.h Normal file
View File

@@ -0,0 +1,34 @@
/*
* net/9p/protocol.h
*
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
*
* Base on code from Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2008 by IBM, Corp.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
int
p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
int p9pdu_finalize(struct p9_fcall *pdu);
void p9pdu_dump(int, struct p9_fcall *);
void p9pdu_reset(struct p9_fcall *pdu);

1131
kernel/net/9p/trans_fd.c Normal file

File diff suppressed because it is too large Load Diff

713
kernel/net/9p/trans_rdma.c Normal file
View File

@@ -0,0 +1,713 @@
/*
* linux/fs/9p/trans_rdma.c
*
* RDMA transport layer based on the trans_fd.c implementation.
*
* Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
* Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
* Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
* Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/in.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/kthread.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/un.h>
#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/file.h>
#include <linux/parser.h>
#include <linux/semaphore.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#define P9_PORT 5640
#define P9_RDMA_SQ_DEPTH 32
#define P9_RDMA_RQ_DEPTH 32
#define P9_RDMA_SEND_SGE 4
#define P9_RDMA_RECV_SGE 4
#define P9_RDMA_IRD 0
#define P9_RDMA_ORD 0
#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can
* safely advertise a maxsize
* of 64k */
#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
/**
* struct p9_trans_rdma - RDMA transport instance
*
* @state: tracks the transport state machine for connection setup and tear down
* @cm_id: The RDMA CM ID
* @pd: Protection Domain pointer
* @qp: Queue Pair pointer
* @cq: Completion Queue pointer
* @dm_mr: DMA Memory Region pointer
* @lkey: The local access only memory region key
* @timeout: Number of uSecs to wait for connection management events
* @sq_depth: The depth of the Send Queue
* @sq_sem: Semaphore for the SQ
* @rq_depth: The depth of the Receive Queue.
* @rq_count: Count of requests in the Receive Queue.
* @addr: The remote peer's address
* @req_lock: Protects the active request list
* @cm_done: Completion event for connection management tracking
*/
struct p9_trans_rdma {
enum {
P9_RDMA_INIT,
P9_RDMA_ADDR_RESOLVED,
P9_RDMA_ROUTE_RESOLVED,
P9_RDMA_CONNECTED,
P9_RDMA_FLUSHING,
P9_RDMA_CLOSING,
P9_RDMA_CLOSED,
} state;
struct rdma_cm_id *cm_id;
struct ib_pd *pd;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_mr *dma_mr;
u32 lkey;
long timeout;
int sq_depth;
struct semaphore sq_sem;
int rq_depth;
atomic_t rq_count;
struct sockaddr_in addr;
spinlock_t req_lock;
struct completion cm_done;
};
/**
* p9_rdma_context - Keeps track of in-process WR
*
* @wc_op: The original WR op for when the CQE completes in error.
* @busa: Bus address to unmap when the WR completes
* @req: Keeps track of requests (send)
* @rc: Keepts track of replies (receive)
*/
struct p9_rdma_req;
struct p9_rdma_context {
enum ib_wc_opcode wc_op;
dma_addr_t busa;
union {
struct p9_req_t *req;
struct p9_fcall *rc;
};
};
/**
* p9_rdma_opts - Collection of mount options
* @port: port of connection
* @sq_depth: The requested depth of the SQ. This really doesn't need
* to be any deeper than the number of threads used in the client
* @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
* @timeout: Time to wait in msecs for CM events
*/
struct p9_rdma_opts {
short port;
int sq_depth;
int rq_depth;
long timeout;
};
/*
* Option Parsing (code inspired by NFS code)
*/
enum {
/* Options that take integer arguments */
Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err,
};
static match_table_t tokens = {
{Opt_port, "port=%u"},
{Opt_sq_depth, "sq=%u"},
{Opt_rq_depth, "rq=%u"},
{Opt_timeout, "timeout=%u"},
{Opt_err, NULL},
};
/**
* parse_opts - parse mount options into rdma options structure
* @params: options string passed from mount
* @opts: rdma transport-specific structure to parse options into
*
* Returns 0 upon success, -ERRNO upon failure
*/
static int parse_opts(char *params, struct p9_rdma_opts *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
char *options;
int ret;
opts->port = P9_PORT;
opts->sq_depth = P9_RDMA_SQ_DEPTH;
opts->rq_depth = P9_RDMA_RQ_DEPTH;
opts->timeout = P9_RDMA_TIMEOUT;
if (!params)
return 0;
options = kstrdup(params, GFP_KERNEL);
if (!options) {
P9_DPRINTK(P9_DEBUG_ERROR,
"failed to allocate copy of option string\n");
return -ENOMEM;
}
while ((p = strsep(&options, ",")) != NULL) {
int token;
int r;
if (!*p)
continue;
token = match_token(p, tokens, args);
r = match_int(&args[0], &option);
if (r < 0) {
P9_DPRINTK(P9_DEBUG_ERROR,
"integer field, but no integer?\n");
ret = r;
continue;
}
switch (token) {
case Opt_port:
opts->port = option;
break;
case Opt_sq_depth:
opts->sq_depth = option;
break;
case Opt_rq_depth:
opts->rq_depth = option;
break;
case Opt_timeout:
opts->timeout = option;
break;
default:
continue;
}
}
/* RQ must be at least as large as the SQ */
opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
kfree(options);
return 0;
}
static int
p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct p9_client *c = id->context;
struct p9_trans_rdma *rdma = c->trans;
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
BUG_ON(rdma->state != P9_RDMA_INIT);
rdma->state = P9_RDMA_ADDR_RESOLVED;
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
rdma->state = P9_RDMA_ROUTE_RESOLVED;
break;
case RDMA_CM_EVENT_ESTABLISHED:
BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
rdma->state = P9_RDMA_CONNECTED;
break;
case RDMA_CM_EVENT_DISCONNECTED:
if (rdma)
rdma->state = P9_RDMA_CLOSED;
if (c)
c->status = Disconnected;
break;
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
break;
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_MULTICAST_JOIN:
case RDMA_CM_EVENT_MULTICAST_ERROR:
case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_CONNECT_REQUEST:
case RDMA_CM_EVENT_CONNECT_RESPONSE:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
c->status = Disconnected;
rdma_disconnect(rdma->cm_id);
break;
default:
BUG();
}
complete(&rdma->cm_done);
return 0;
}
static void
handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
{
struct p9_req_t *req;
int err = 0;
int16_t tag;
req = NULL;
ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
DMA_FROM_DEVICE);
if (status != IB_WC_SUCCESS)
goto err_out;
err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
if (err)
goto err_out;
req = p9_tag_lookup(client, tag);
if (!req)
goto err_out;
req->rc = c->rc;
req->status = REQ_STATUS_RCVD;
p9_client_cb(client, req);
return;
err_out:
P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n",
req, err, status);
rdma->state = P9_RDMA_FLUSHING;
client->status = Disconnected;
return;
}
static void
handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
{
ib_dma_unmap_single(rdma->cm_id->device,
c->busa, c->req->tc->size,
DMA_TO_DEVICE);
}
static void qp_event_handler(struct ib_event *event, void *context)
{
P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event,
context);
}
static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct p9_client *client = cq_context;
struct p9_trans_rdma *rdma = client->trans;
int ret;
struct ib_wc wc;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
switch (c->wc_op) {
case IB_WC_RECV:
atomic_dec(&rdma->rq_count);
handle_recv(client, rdma, c, wc.status, wc.byte_len);
break;
case IB_WC_SEND:
handle_send(client, rdma, c, wc.status, wc.byte_len);
up(&rdma->sq_sem);
break;
default:
printk(KERN_ERR "9prdma: unexpected completion type, "
"c->wc_op=%d, wc.opcode=%d, status=%d\n",
c->wc_op, wc.opcode, wc.status);
break;
}
kfree(c);
}
}
static void cq_event_handler(struct ib_event *e, void *v)
{
P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
}
static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
{
if (!rdma)
return;
if (rdma->dma_mr && !IS_ERR(rdma->dma_mr))
ib_dereg_mr(rdma->dma_mr);
if (rdma->qp && !IS_ERR(rdma->qp))
ib_destroy_qp(rdma->qp);
if (rdma->pd && !IS_ERR(rdma->pd))
ib_dealloc_pd(rdma->pd);
if (rdma->cq && !IS_ERR(rdma->cq))
ib_destroy_cq(rdma->cq);
if (rdma->cm_id && !IS_ERR(rdma->cm_id))
rdma_destroy_id(rdma->cm_id);
kfree(rdma);
}
static int
post_recv(struct p9_client *client, struct p9_rdma_context *c)
{
struct p9_trans_rdma *rdma = client->trans;
struct ib_recv_wr wr, *bad_wr;
struct ib_sge sge;
c->busa = ib_dma_map_single(rdma->cm_id->device,
c->rc->sdata, client->msize,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
sge.addr = c->busa;
sge.length = client->msize;
sge.lkey = rdma->lkey;
wr.next = NULL;
c->wc_op = IB_WC_RECV;
wr.wr_id = (unsigned long) c;
wr.sg_list = &sge;
wr.num_sge = 1;
return ib_post_recv(rdma->qp, &wr, &bad_wr);
error:
P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
return -EIO;
}
static int rdma_request(struct p9_client *client, struct p9_req_t *req)
{
struct p9_trans_rdma *rdma = client->trans;
struct ib_send_wr wr, *bad_wr;
struct ib_sge sge;
int err = 0;
unsigned long flags;
struct p9_rdma_context *c = NULL;
struct p9_rdma_context *rpl_context = NULL;
/* Allocate an fcall for the reply */
rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
if (!rpl_context)
goto err_close;
/*
* If the request has a buffer, steal it, otherwise
* allocate a new one. Typically, requests should already
* have receive buffers allocated and just swap them around
*/
if (!req->rc) {
req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
GFP_KERNEL);
if (req->rc) {
req->rc->sdata = (char *) req->rc +
sizeof(struct p9_fcall);
req->rc->capacity = client->msize;
}
}
rpl_context->rc = req->rc;
if (!rpl_context->rc) {
kfree(rpl_context);
goto err_close;
}
/*
* Post a receive buffer for this request. We need to ensure
* there is a reply buffer available for every outstanding
* request. A flushed request can result in no reply for an
* outstanding request, so we must keep a count to avoid
* overflowing the RQ.
*/
if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
err = post_recv(client, rpl_context);
if (err) {
kfree(rpl_context->rc);
kfree(rpl_context);
goto err_close;
}
} else
atomic_dec(&rdma->rq_count);
/* remove posted receive buffer from request structure */
req->rc = NULL;
/* Post the request */
c = kmalloc(sizeof *c, GFP_KERNEL);
if (!c)
goto err_close;
c->req = req;
c->busa = ib_dma_map_single(rdma->cm_id->device,
c->req->tc->sdata, c->req->tc->size,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
sge.addr = c->busa;
sge.length = c->req->tc->size;
sge.lkey = rdma->lkey;
wr.next = NULL;
c->wc_op = IB_WC_SEND;
wr.wr_id = (unsigned long) c;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
wr.sg_list = &sge;
wr.num_sge = 1;
if (down_interruptible(&rdma->sq_sem))
goto error;
return ib_post_send(rdma->qp, &wr, &bad_wr);
error:
P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
return -EIO;
err_close:
spin_lock_irqsave(&rdma->req_lock, flags);
if (rdma->state < P9_RDMA_CLOSING) {
rdma->state = P9_RDMA_CLOSING;
spin_unlock_irqrestore(&rdma->req_lock, flags);
rdma_disconnect(rdma->cm_id);
} else
spin_unlock_irqrestore(&rdma->req_lock, flags);
return err;
}
static void rdma_close(struct p9_client *client)
{
struct p9_trans_rdma *rdma;
if (!client)
return;
rdma = client->trans;
if (!rdma)
return;
client->status = Disconnected;
rdma_disconnect(rdma->cm_id);
rdma_destroy_trans(rdma);
}
/**
* alloc_rdma - Allocate and initialize the rdma transport structure
* @opts: Mount options structure
*/
static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
{
struct p9_trans_rdma *rdma;
rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
if (!rdma)
return NULL;
rdma->sq_depth = opts->sq_depth;
rdma->rq_depth = opts->rq_depth;
rdma->timeout = opts->timeout;
spin_lock_init(&rdma->req_lock);
init_completion(&rdma->cm_done);
sema_init(&rdma->sq_sem, rdma->sq_depth);
atomic_set(&rdma->rq_count, 0);
return rdma;
}
/* its not clear to me we can do anything after send has been posted */
static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
{
return 1;
}
/**
* trans_create_rdma - Transport method for creating atransport instance
* @client: client instance
* @addr: IP address string
* @args: Mount options string
*/
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
int err;
struct p9_rdma_opts opts;
struct p9_trans_rdma *rdma;
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
/* Parse the transport specific mount options */
err = parse_opts(args, &opts);
if (err < 0)
return err;
/* Create and initialize the RDMA transport structure */
rdma = alloc_rdma(&opts);
if (!rdma)
return -ENOMEM;
/* Create the RDMA CM ID */
rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
if (IS_ERR(rdma->cm_id))
goto error;
/* Associate the client with the transport */
client->trans = rdma;
/* Resolve the server's address */
rdma->addr.sin_family = AF_INET;
rdma->addr.sin_addr.s_addr = in_aton(addr);
rdma->addr.sin_port = htons(opts.port);
err = rdma_resolve_addr(rdma->cm_id, NULL,
(struct sockaddr *)&rdma->addr,
rdma->timeout);
if (err)
goto error;
err = wait_for_completion_interruptible(&rdma->cm_done);
if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
goto error;
/* Resolve the route to the server */
err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
if (err)
goto error;
err = wait_for_completion_interruptible(&rdma->cm_done);
if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
goto error;
/* Query the device attributes */
err = ib_query_device(rdma->cm_id->device, &devattr);
if (err)
goto error;
/* Create the Completion Queue */
rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
cq_event_handler, client,
opts.sq_depth + opts.rq_depth + 1, 0);
if (IS_ERR(rdma->cq))
goto error;
ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
/* Create the Protection Domain */
rdma->pd = ib_alloc_pd(rdma->cm_id->device);
if (IS_ERR(rdma->pd))
goto error;
/* Cache the DMA lkey in the transport */
rdma->dma_mr = NULL;
if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
rdma->lkey = rdma->cm_id->device->local_dma_lkey;
else {
rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(rdma->dma_mr))
goto error;
rdma->lkey = rdma->dma_mr->lkey;
}
/* Create the Queue Pair */
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = client;
qp_attr.cap.max_send_wr = opts.sq_depth;
qp_attr.cap.max_recv_wr = opts.rq_depth;
qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = rdma->cq;
qp_attr.recv_cq = rdma->cq;
err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
if (err)
goto error;
rdma->qp = rdma->cm_id->qp;
/* Request a connection */
memset(&conn_param, 0, sizeof(conn_param));
conn_param.private_data = NULL;
conn_param.private_data_len = 0;
conn_param.responder_resources = P9_RDMA_IRD;
conn_param.initiator_depth = P9_RDMA_ORD;
err = rdma_connect(rdma->cm_id, &conn_param);
if (err)
goto error;
err = wait_for_completion_interruptible(&rdma->cm_done);
if (err || (rdma->state != P9_RDMA_CONNECTED))
goto error;
client->status = Connected;
return 0;
error:
rdma_destroy_trans(rdma);
return -ENOTCONN;
}
static struct p9_trans_module p9_rdma_trans = {
.name = "rdma",
.maxsize = P9_RDMA_MAXSIZE,
.def = 0,
.owner = THIS_MODULE,
.create = rdma_create_trans,
.close = rdma_close,
.request = rdma_request,
.cancel = rdma_cancel,
};
/**
* p9_trans_rdma_init - Register the 9P RDMA transport driver
*/
static int __init p9_trans_rdma_init(void)
{
v9fs_register_trans(&p9_rdma_trans);
return 0;
}
static void __exit p9_trans_rdma_exit(void)
{
v9fs_unregister_trans(&p9_rdma_trans);
}
module_init(p9_trans_rdma_init);
module_exit(p9_trans_rdma_exit);
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("RDMA Transport for 9P");
MODULE_LICENSE("Dual BSD/GPL");

View File

@@ -0,0 +1,386 @@
/*
* The Virtio 9p transport driver
*
* This is a block based transport driver based on the lguest block driver
* code.
*
* Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
*
* Based on virtio console driver
* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/in.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/un.h>
#include <linux/uaccess.h>
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/file.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <net/9p/client.h>
#include <net/9p/transport.h>
#include <linux/scatterlist.h>
#include <linux/virtio.h>
#include <linux/virtio_9p.h>
#define VIRTQUEUE_NUM 128
/* a single mutex to manage channel initialization and attachment */
static DEFINE_MUTEX(virtio_9p_lock);
/* global which tracks highest initialized channel */
static int chan_index;
/**
* struct virtio_chan - per-instance transport information
* @initialized: whether the channel is initialized
* @inuse: whether the channel is in use
* @lock: protects multiple elements within this structure
* @client: client instance
* @vdev: virtio dev associated with this channel
* @vq: virtio queue associated with this channel
* @sg: scatter gather list which is used to pack a request (protected?)
*
* We keep all per-channel information in a structure.
* This structure is allocated within the devices dev->mem space.
* A pointer to the structure will get put in the transport private.
*
*/
static struct virtio_chan {
bool initialized;
bool inuse;
spinlock_t lock;
struct p9_client *client;
struct virtio_device *vdev;
struct virtqueue *vq;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[VIRTQUEUE_NUM];
} channels[MAX_9P_CHAN];
/* How many bytes left in this page. */
static unsigned int rest_of_page(void *data)
{
return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
}
/**
* p9_virtio_close - reclaim resources of a channel
* @client: client instance
*
* This reclaims a channel by freeing its resources and
* reseting its inuse flag.
*
*/
static void p9_virtio_close(struct p9_client *client)
{
struct virtio_chan *chan = client->trans;
mutex_lock(&virtio_9p_lock);
chan->inuse = false;
mutex_unlock(&virtio_9p_lock);
}
/**
* req_done - callback which signals activity from the server
* @vq: virtio queue activity was received on
*
* This notifies us that the server has triggered some activity
* on the virtio channel - most likely a response to request we
* sent. Figure out which requests now have responses and wake up
* those threads.
*
* Bugs: could do with some additional sanity checking, but appears to work.
*
*/
static void req_done(struct virtqueue *vq)
{
struct virtio_chan *chan = vq->vdev->priv;
struct p9_fcall *rc;
unsigned int len;
struct p9_req_t *req;
P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
req->status = REQ_STATUS_RCVD;
p9_client_cb(chan->client, req);
}
}
/**
* pack_sg_list - pack a scatter gather list from a linear buffer
* @sg: scatter/gather list to pack into
* @start: which segment of the sg_list to start at
* @limit: maximum segment to pack data to
* @data: data to pack into scatter/gather list
* @count: amount of data to pack into the scatter/gather list
*
* sg_lists have multiple segments of various sizes. This will pack
* arbitrary data into an existing scatter gather list, segmenting the
* data as necessary within constraints.
*
*/
static int
pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
int count)
{
int s;
int index = start;
while (count) {
s = rest_of_page(data);
if (s > count)
s = count;
sg_set_buf(&sg[index++], data, s);
count -= s;
data += s;
BUG_ON(index > limit);
}
return index-start;
}
/* We don't currently allow canceling of virtio requests */
static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
{
return 1;
}
/**
* p9_virtio_request - issue a request
* @client: client instance issuing the request
* @req: request to be issued
*
*/
static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
int in, out;
struct virtio_chan *chan = client->trans;
char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
req->tc->size);
in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
client->msize);
req->status = REQ_STATUS_SENT;
if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
P9_DPRINTK(P9_DEBUG_TRANS,
"9p debug: virtio rpc add_buf returned failure");
return -EIO;
}
chan->vq->vq_ops->kick(chan->vq);
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
return 0;
}
/**
* p9_virtio_probe - probe for existence of 9P virtio channels
* @vdev: virtio device to probe
*
* This probes for existing virtio channels. At present only
* a single channel is in use, so in the future more work may need
* to be done here.
*
*/
static int p9_virtio_probe(struct virtio_device *vdev)
{
int err;
struct virtio_chan *chan;
int index;
mutex_lock(&virtio_9p_lock);
index = chan_index++;
chan = &channels[index];
mutex_unlock(&virtio_9p_lock);
if (chan_index > MAX_9P_CHAN) {
printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
BUG();
err = -ENOMEM;
goto fail;
}
chan->vdev = vdev;
/* We expect one virtqueue, for requests. */
chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
if (IS_ERR(chan->vq)) {
err = PTR_ERR(chan->vq);
goto out_free_vq;
}
chan->vq->vdev->priv = chan;
spin_lock_init(&chan->lock);
sg_init_table(chan->sg, VIRTQUEUE_NUM);
chan->inuse = false;
chan->initialized = true;
return 0;
out_free_vq:
vdev->config->del_vqs(vdev);
fail:
mutex_lock(&virtio_9p_lock);
chan_index--;
mutex_unlock(&virtio_9p_lock);
return err;
}
/**
* p9_virtio_create - allocate a new virtio channel
* @client: client instance invoking this transport
* @devname: string identifying the channel to connect to (unused)
* @args: args passed from sys_mount() for per-transport options (unused)
*
* This sets up a transport channel for 9p communication. Right now
* we only match the first available channel, but eventually we couldlook up
* alternate channels by matching devname versus a virtio_config entry.
* We use a simple reference count mechanism to ensure that only a single
* mount has a channel open at a time.
*
* Bugs: doesn't allow identification of a specific channel
* to allocate, channels are allocated sequentially. This was
* a pragmatic decision to get things rolling, but ideally some
* way of identifying the channel to attach to would be nice
* if we are going to support multiple channels.
*
*/
static int
p9_virtio_create(struct p9_client *client, const char *devname, char *args)
{
struct virtio_chan *chan = channels;
int index = 0;
mutex_lock(&virtio_9p_lock);
while (index < MAX_9P_CHAN) {
if (chan->initialized && !chan->inuse) {
chan->inuse = true;
break;
} else {
index++;
chan = &channels[index];
}
}
mutex_unlock(&virtio_9p_lock);
if (index >= MAX_9P_CHAN) {
printk(KERN_ERR "9p: no channels available\n");
return -ENODEV;
}
client->trans = (void *)chan;
chan->client = client;
return 0;
}
/**
* p9_virtio_remove - clean up resources associated with a virtio device
* @vdev: virtio device to remove
*
*/
static void p9_virtio_remove(struct virtio_device *vdev)
{
struct virtio_chan *chan = vdev->priv;
BUG_ON(chan->inuse);
if (chan->initialized) {
vdev->config->del_vqs(vdev);
chan->initialized = false;
}
}
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID },
{ 0 },
};
/* The standard "struct lguest_driver": */
static struct virtio_driver p9_virtio_drv = {
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = p9_virtio_probe,
.remove = p9_virtio_remove,
};
static struct p9_trans_module p9_virtio_trans = {
.name = "virtio",
.create = p9_virtio_create,
.close = p9_virtio_close,
.request = p9_virtio_request,
.cancel = p9_virtio_cancel,
.maxsize = PAGE_SIZE*16,
.def = 0,
.owner = THIS_MODULE,
};
/* The standard init function */
static int __init p9_virtio_init(void)
{
int count;
for (count = 0; count < MAX_9P_CHAN; count++)
channels[count].initialized = false;
v9fs_register_trans(&p9_virtio_trans);
return register_virtio_driver(&p9_virtio_drv);
}
static void __exit p9_virtio_cleanup(void)
{
unregister_virtio_driver(&p9_virtio_drv);
v9fs_unregister_trans(&p9_virtio_trans);
}
module_init(p9_virtio_init);
module_exit(p9_virtio_cleanup);
MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
MODULE_DESCRIPTION("Virtio 9p Transport");
MODULE_LICENSE("GPL");

145
kernel/net/9p/util.c Normal file
View File

@@ -0,0 +1,145 @@
/*
* net/9p/util.c
*
* This file contains some helper functions
*
* Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net>
* Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
* Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/parser.h>
#include <linux/idr.h>
#include <net/9p/9p.h>
/**
* struct p9_idpool - per-connection accounting for tag idpool
* @lock: protects the pool
* @pool: idr to allocate tag id from
*
*/
struct p9_idpool {
spinlock_t lock;
struct idr pool;
};
/**
* p9_idpool_create - create a new per-connection id pool
*
*/
struct p9_idpool *p9_idpool_create(void)
{
struct p9_idpool *p;
p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL);
if (!p)
return ERR_PTR(-ENOMEM);
spin_lock_init(&p->lock);
idr_init(&p->pool);
return p;
}
EXPORT_SYMBOL(p9_idpool_create);
/**
* p9_idpool_destroy - create a new per-connection id pool
* @p: idpool to destory
*/
void p9_idpool_destroy(struct p9_idpool *p)
{
idr_destroy(&p->pool);
kfree(p);
}
EXPORT_SYMBOL(p9_idpool_destroy);
/**
* p9_idpool_get - allocate numeric id from pool
* @p: pool to allocate from
*
* Bugs: This seems to be an awful generic function, should it be in idr.c with
* the lock included in struct idr?
*/
int p9_idpool_get(struct p9_idpool *p)
{
int i = 0;
int error;
unsigned long flags;
retry:
if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
return 0;
spin_lock_irqsave(&p->lock, flags);
/* no need to store exactly p, we just need something non-null */
error = idr_get_new(&p->pool, p, &i);
spin_unlock_irqrestore(&p->lock, flags);
if (error == -EAGAIN)
goto retry;
else if (error)
return -1;
P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p);
return i;
}
EXPORT_SYMBOL(p9_idpool_get);
/**
* p9_idpool_put - release numeric id from pool
* @id: numeric id which is being released
* @p: pool to release id into
*
* Bugs: This seems to be an awful generic function, should it be in idr.c with
* the lock included in struct idr?
*/
void p9_idpool_put(int id, struct p9_idpool *p)
{
unsigned long flags;
P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p);
spin_lock_irqsave(&p->lock, flags);
idr_remove(&p->pool, id);
spin_unlock_irqrestore(&p->lock, flags);
}
EXPORT_SYMBOL(p9_idpool_put);
/**
* p9_idpool_check - check if the specified id is available
* @id: id to check
* @p: pool to check
*/
int p9_idpool_check(int id, struct p9_idpool *p)
{
return idr_find(&p->pool, id) != NULL;
}
EXPORT_SYMBOL(p9_idpool_check);