add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

12
kernel/ipc/Makefile Normal file
View File

@@ -0,0 +1,12 @@
#
# Makefile for the linux ipc.
#
obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o
obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
obj_mq-$(CONFIG_COMPAT) += compat_mq.o
obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
obj-$(CONFIG_IPC_NS) += namespace.o
obj-$(CONFIG_POSIX_MQUEUE_SYSCTL) += mq_sysctl.o

688
kernel/ipc/compat.c Normal file
View File

@@ -0,0 +1,688 @@
/*
* 32 bit compatibility code for System V IPC
*
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
* Copyright (C) 2000 VA Linux Co
* Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2000 Gerhard Tonn (ton@de.ibm.com)
* Copyright (C) 2000-2002 Andi Kleen, SuSE Labs (x86-64 port)
* Copyright (C) 2000 Silicon Graphics, Inc.
* Copyright (C) 2001 IBM
* Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Copyright (C) 2004 Arnd Bergmann (arnd@arndb.de)
*
* This code is collected from the versions for sparc64, mips64, s390x, ia64,
* ppc64 and x86_64, all of which are based on the original sparc64 version
* by Jakub Jelinek.
*
*/
#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/highuid.h>
#include <linux/init.h>
#include <linux/msg.h>
#include <linux/shm.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
#include "util.h"
struct compat_msgbuf {
compat_long_t mtype;
char mtext[1];
};
struct compat_ipc_perm {
key_t key;
__compat_uid_t uid;
__compat_gid_t gid;
__compat_uid_t cuid;
__compat_gid_t cgid;
compat_mode_t mode;
unsigned short seq;
};
struct compat_semid_ds {
struct compat_ipc_perm sem_perm;
compat_time_t sem_otime;
compat_time_t sem_ctime;
compat_uptr_t sem_base;
compat_uptr_t sem_pending;
compat_uptr_t sem_pending_last;
compat_uptr_t undo;
unsigned short sem_nsems;
};
struct compat_msqid_ds {
struct compat_ipc_perm msg_perm;
compat_uptr_t msg_first;
compat_uptr_t msg_last;
compat_time_t msg_stime;
compat_time_t msg_rtime;
compat_time_t msg_ctime;
compat_ulong_t msg_lcbytes;
compat_ulong_t msg_lqbytes;
unsigned short msg_cbytes;
unsigned short msg_qnum;
unsigned short msg_qbytes;
compat_ipc_pid_t msg_lspid;
compat_ipc_pid_t msg_lrpid;
};
struct compat_shmid_ds {
struct compat_ipc_perm shm_perm;
int shm_segsz;
compat_time_t shm_atime;
compat_time_t shm_dtime;
compat_time_t shm_ctime;
compat_ipc_pid_t shm_cpid;
compat_ipc_pid_t shm_lpid;
unsigned short shm_nattch;
unsigned short shm_unused;
compat_uptr_t shm_unused2;
compat_uptr_t shm_unused3;
};
struct compat_ipc_kludge {
compat_uptr_t msgp;
compat_long_t msgtyp;
};
struct compat_shminfo64 {
compat_ulong_t shmmax;
compat_ulong_t shmmin;
compat_ulong_t shmmni;
compat_ulong_t shmseg;
compat_ulong_t shmall;
compat_ulong_t __unused1;
compat_ulong_t __unused2;
compat_ulong_t __unused3;
compat_ulong_t __unused4;
};
struct compat_shm_info {
compat_int_t used_ids;
compat_ulong_t shm_tot, shm_rss, shm_swp;
compat_ulong_t swap_attempts, swap_successes;
};
extern int sem_ctls[];
#define sc_semopm (sem_ctls[2])
static inline int compat_ipc_parse_version(int *cmd)
{
int version = *cmd & IPC_64;
/* this is tricky: architectures that have support for the old
* ipc structures in 64 bit binaries need to have IPC_64 set
* in cmd, the others need to have it cleared */
#ifndef ipc_parse_version
*cmd |= IPC_64;
#else
*cmd &= ~IPC_64;
#endif
return version;
}
static inline int __get_compat_ipc64_perm(struct ipc64_perm *p64,
struct compat_ipc64_perm __user *up64)
{
int err;
err = __get_user(p64->uid, &up64->uid);
err |= __get_user(p64->gid, &up64->gid);
err |= __get_user(p64->mode, &up64->mode);
return err;
}
static inline int __get_compat_ipc_perm(struct ipc64_perm *p,
struct compat_ipc_perm __user *up)
{
int err;
err = __get_user(p->uid, &up->uid);
err |= __get_user(p->gid, &up->gid);
err |= __get_user(p->mode, &up->mode);
return err;
}
static inline int __put_compat_ipc64_perm(struct ipc64_perm *p64,
struct compat_ipc64_perm __user *up64)
{
int err;
err = __put_user(p64->key, &up64->key);
err |= __put_user(p64->uid, &up64->uid);
err |= __put_user(p64->gid, &up64->gid);
err |= __put_user(p64->cuid, &up64->cuid);
err |= __put_user(p64->cgid, &up64->cgid);
err |= __put_user(p64->mode, &up64->mode);
err |= __put_user(p64->seq, &up64->seq);
return err;
}
static inline int __put_compat_ipc_perm(struct ipc64_perm *p,
struct compat_ipc_perm __user *up)
{
int err;
__compat_uid_t u;
__compat_gid_t g;
err = __put_user(p->key, &up->key);
SET_UID(u, p->uid);
err |= __put_user(u, &up->uid);
SET_GID(g, p->gid);
err |= __put_user(g, &up->gid);
SET_UID(u, p->cuid);
err |= __put_user(u, &up->cuid);
SET_GID(g, p->cgid);
err |= __put_user(g, &up->cgid);
err |= __put_user(p->mode, &up->mode);
err |= __put_user(p->seq, &up->seq);
return err;
}
static inline int get_compat_semid64_ds(struct semid64_ds *s64,
struct compat_semid64_ds __user *up64)
{
if (!access_ok (VERIFY_READ, up64, sizeof(*up64)))
return -EFAULT;
return __get_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm);
}
static inline int get_compat_semid_ds(struct semid64_ds *s,
struct compat_semid_ds __user *up)
{
if (!access_ok (VERIFY_READ, up, sizeof(*up)))
return -EFAULT;
return __get_compat_ipc_perm(&s->sem_perm, &up->sem_perm);
}
static inline int put_compat_semid64_ds(struct semid64_ds *s64,
struct compat_semid64_ds __user *up64)
{
int err;
if (!access_ok (VERIFY_WRITE, up64, sizeof(*up64)))
return -EFAULT;
err = __put_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm);
err |= __put_user(s64->sem_otime, &up64->sem_otime);
err |= __put_user(s64->sem_ctime, &up64->sem_ctime);
err |= __put_user(s64->sem_nsems, &up64->sem_nsems);
return err;
}
static inline int put_compat_semid_ds(struct semid64_ds *s,
struct compat_semid_ds __user *up)
{
int err;
if (!access_ok (VERIFY_WRITE, up, sizeof(*up)))
return -EFAULT;
err = __put_compat_ipc_perm(&s->sem_perm, &up->sem_perm);
err |= __put_user(s->sem_otime, &up->sem_otime);
err |= __put_user(s->sem_ctime, &up->sem_ctime);
err |= __put_user(s->sem_nsems, &up->sem_nsems);
return err;
}
long compat_sys_semctl(int first, int second, int third, void __user *uptr)
{
union semun fourth;
u32 pad;
int err, err2;
struct semid64_ds s64;
struct semid64_ds __user *up64;
int version = compat_ipc_parse_version(&third);
memset(&s64, 0, sizeof(s64));
if (!uptr)
return -EINVAL;
if (get_user(pad, (u32 __user *) uptr))
return -EFAULT;
if ((third & (~IPC_64)) == SETVAL)
fourth.val = (int) pad;
else
fourth.__pad = compat_ptr(pad);
switch (third & (~IPC_64)) {
case IPC_INFO:
case IPC_RMID:
case SEM_INFO:
case GETVAL:
case GETPID:
case GETNCNT:
case GETZCNT:
case GETALL:
case SETVAL:
case SETALL:
err = sys_semctl(first, second, third, fourth);
break;
case IPC_STAT:
case SEM_STAT:
up64 = compat_alloc_user_space(sizeof(s64));
fourth.__pad = up64;
err = sys_semctl(first, second, third, fourth);
if (err < 0)
break;
if (copy_from_user(&s64, up64, sizeof(s64)))
err2 = -EFAULT;
else if (version == IPC_64)
err2 = put_compat_semid64_ds(&s64, compat_ptr(pad));
else
err2 = put_compat_semid_ds(&s64, compat_ptr(pad));
if (err2)
err = -EFAULT;
break;
case IPC_SET:
if (version == IPC_64) {
err = get_compat_semid64_ds(&s64, compat_ptr(pad));
} else {
err = get_compat_semid_ds(&s64, compat_ptr(pad));
}
up64 = compat_alloc_user_space(sizeof(s64));
if (copy_to_user(up64, &s64, sizeof(s64)))
err = -EFAULT;
if (err)
break;
fourth.__pad = up64;
err = sys_semctl(first, second, third, fourth);
break;
default:
err = -EINVAL;
break;
}
return err;
}
long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
{
struct compat_msgbuf __user *up = uptr;
long type;
if (first < 0)
return -EINVAL;
if (second < 0)
return -EINVAL;
if (get_user(type, &up->mtype))
return -EFAULT;
return do_msgsnd(first, type, up->mtext, second, third);
}
long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
int version, void __user *uptr)
{
struct compat_msgbuf __user *up;
long type;
int err;
if (first < 0)
return -EINVAL;
if (second < 0)
return -EINVAL;
if (!version) {
struct compat_ipc_kludge ipck;
err = -EINVAL;
if (!uptr)
goto out;
err = -EFAULT;
if (copy_from_user (&ipck, uptr, sizeof(ipck)))
goto out;
uptr = compat_ptr(ipck.msgp);
msgtyp = ipck.msgtyp;
}
up = uptr;
err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third);
if (err < 0)
goto out;
if (put_user(type, &up->mtype))
err = -EFAULT;
out:
return err;
}
static inline int get_compat_msqid64(struct msqid64_ds *m64,
struct compat_msqid64_ds __user *up64)
{
int err;
if (!access_ok(VERIFY_READ, up64, sizeof(*up64)))
return -EFAULT;
err = __get_compat_ipc64_perm(&m64->msg_perm, &up64->msg_perm);
err |= __get_user(m64->msg_qbytes, &up64->msg_qbytes);
return err;
}
static inline int get_compat_msqid(struct msqid64_ds *m,
struct compat_msqid_ds __user *up)
{
int err;
if (!access_ok(VERIFY_READ, up, sizeof(*up)))
return -EFAULT;
err = __get_compat_ipc_perm(&m->msg_perm, &up->msg_perm);
err |= __get_user(m->msg_qbytes, &up->msg_qbytes);
return err;
}
static inline int put_compat_msqid64_ds(struct msqid64_ds *m64,
struct compat_msqid64_ds __user *up64)
{
int err;
if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
return -EFAULT;
err = __put_compat_ipc64_perm(&m64->msg_perm, &up64->msg_perm);
err |= __put_user(m64->msg_stime, &up64->msg_stime);
err |= __put_user(m64->msg_rtime, &up64->msg_rtime);
err |= __put_user(m64->msg_ctime, &up64->msg_ctime);
err |= __put_user(m64->msg_cbytes, &up64->msg_cbytes);
err |= __put_user(m64->msg_qnum, &up64->msg_qnum);
err |= __put_user(m64->msg_qbytes, &up64->msg_qbytes);
err |= __put_user(m64->msg_lspid, &up64->msg_lspid);
err |= __put_user(m64->msg_lrpid, &up64->msg_lrpid);
return err;
}
static inline int put_compat_msqid_ds(struct msqid64_ds *m,
struct compat_msqid_ds __user *up)
{
int err;
if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
return -EFAULT;
err = __put_compat_ipc_perm(&m->msg_perm, &up->msg_perm);
err |= __put_user(m->msg_stime, &up->msg_stime);
err |= __put_user(m->msg_rtime, &up->msg_rtime);
err |= __put_user(m->msg_ctime, &up->msg_ctime);
err |= __put_user(m->msg_cbytes, &up->msg_cbytes);
err |= __put_user(m->msg_qnum, &up->msg_qnum);
err |= __put_user(m->msg_qbytes, &up->msg_qbytes);
err |= __put_user(m->msg_lspid, &up->msg_lspid);
err |= __put_user(m->msg_lrpid, &up->msg_lrpid);
return err;
}
long compat_sys_msgctl(int first, int second, void __user *uptr)
{
int err, err2;
struct msqid64_ds m64;
int version = compat_ipc_parse_version(&second);
void __user *p;
memset(&m64, 0, sizeof(m64));
switch (second & (~IPC_64)) {
case IPC_INFO:
case IPC_RMID:
case MSG_INFO:
err = sys_msgctl(first, second, uptr);
break;
case IPC_SET:
if (version == IPC_64) {
err = get_compat_msqid64(&m64, uptr);
} else {
err = get_compat_msqid(&m64, uptr);
}
if (err)
break;
p = compat_alloc_user_space(sizeof(m64));
if (copy_to_user(p, &m64, sizeof(m64)))
err = -EFAULT;
else
err = sys_msgctl(first, second, p);
break;
case IPC_STAT:
case MSG_STAT:
p = compat_alloc_user_space(sizeof(m64));
err = sys_msgctl(first, second, p);
if (err < 0)
break;
if (copy_from_user(&m64, p, sizeof(m64)))
err2 = -EFAULT;
else if (version == IPC_64)
err2 = put_compat_msqid64_ds(&m64, uptr);
else
err2 = put_compat_msqid_ds(&m64, uptr);
if (err2)
err = -EFAULT;
break;
default:
err = -EINVAL;
break;
}
return err;
}
long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
void __user *uptr)
{
int err;
unsigned long raddr;
compat_ulong_t __user *uaddr;
if (version == 1)
return -EINVAL;
err = do_shmat(first, uptr, second, &raddr);
if (err < 0)
return err;
uaddr = compat_ptr(third);
return put_user(raddr, uaddr);
}
static inline int get_compat_shmid64_ds(struct shmid64_ds *s64,
struct compat_shmid64_ds __user *up64)
{
if (!access_ok(VERIFY_READ, up64, sizeof(*up64)))
return -EFAULT;
return __get_compat_ipc64_perm(&s64->shm_perm, &up64->shm_perm);
}
static inline int get_compat_shmid_ds(struct shmid64_ds *s,
struct compat_shmid_ds __user *up)
{
if (!access_ok(VERIFY_READ, up, sizeof(*up)))
return -EFAULT;
return __get_compat_ipc_perm(&s->shm_perm, &up->shm_perm);
}
static inline int put_compat_shmid64_ds(struct shmid64_ds *s64,
struct compat_shmid64_ds __user *up64)
{
int err;
if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
return -EFAULT;
err = __put_compat_ipc64_perm(&s64->shm_perm, &up64->shm_perm);
err |= __put_user(s64->shm_atime, &up64->shm_atime);
err |= __put_user(s64->shm_dtime, &up64->shm_dtime);
err |= __put_user(s64->shm_ctime, &up64->shm_ctime);
err |= __put_user(s64->shm_segsz, &up64->shm_segsz);
err |= __put_user(s64->shm_nattch, &up64->shm_nattch);
err |= __put_user(s64->shm_cpid, &up64->shm_cpid);
err |= __put_user(s64->shm_lpid, &up64->shm_lpid);
return err;
}
static inline int put_compat_shmid_ds(struct shmid64_ds *s,
struct compat_shmid_ds __user *up)
{
int err;
if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
return -EFAULT;
err = __put_compat_ipc_perm(&s->shm_perm, &up->shm_perm);
err |= __put_user(s->shm_atime, &up->shm_atime);
err |= __put_user(s->shm_dtime, &up->shm_dtime);
err |= __put_user(s->shm_ctime, &up->shm_ctime);
err |= __put_user(s->shm_segsz, &up->shm_segsz);
err |= __put_user(s->shm_nattch, &up->shm_nattch);
err |= __put_user(s->shm_cpid, &up->shm_cpid);
err |= __put_user(s->shm_lpid, &up->shm_lpid);
return err;
}
static inline int put_compat_shminfo64(struct shminfo64 *smi,
struct compat_shminfo64 __user *up64)
{
int err;
if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64)))
return -EFAULT;
if (smi->shmmax > INT_MAX)
smi->shmmax = INT_MAX;
err = __put_user(smi->shmmax, &up64->shmmax);
err |= __put_user(smi->shmmin, &up64->shmmin);
err |= __put_user(smi->shmmni, &up64->shmmni);
err |= __put_user(smi->shmseg, &up64->shmseg);
err |= __put_user(smi->shmall, &up64->shmall);
return err;
}
static inline int put_compat_shminfo(struct shminfo64 *smi,
struct shminfo __user *up)
{
int err;
if (!access_ok(VERIFY_WRITE, up, sizeof(*up)))
return -EFAULT;
if (smi->shmmax > INT_MAX)
smi->shmmax = INT_MAX;
err = __put_user(smi->shmmax, &up->shmmax);
err |= __put_user(smi->shmmin, &up->shmmin);
err |= __put_user(smi->shmmni, &up->shmmni);
err |= __put_user(smi->shmseg, &up->shmseg);
err |= __put_user(smi->shmall, &up->shmall);
return err;
}
static inline int put_compat_shm_info(struct shm_info __user *ip,
struct compat_shm_info __user *uip)
{
int err;
struct shm_info si;
if (!access_ok(VERIFY_WRITE, uip, sizeof(*uip)) ||
copy_from_user(&si, ip, sizeof(si)))
return -EFAULT;
err = __put_user(si.used_ids, &uip->used_ids);
err |= __put_user(si.shm_tot, &uip->shm_tot);
err |= __put_user(si.shm_rss, &uip->shm_rss);
err |= __put_user(si.shm_swp, &uip->shm_swp);
err |= __put_user(si.swap_attempts, &uip->swap_attempts);
err |= __put_user(si.swap_successes, &uip->swap_successes);
return err;
}
long compat_sys_shmctl(int first, int second, void __user *uptr)
{
void __user *p;
struct shmid64_ds s64;
struct shminfo64 smi;
int err, err2;
int version = compat_ipc_parse_version(&second);
memset(&s64, 0, sizeof(s64));
switch (second & (~IPC_64)) {
case IPC_RMID:
case SHM_LOCK:
case SHM_UNLOCK:
err = sys_shmctl(first, second, uptr);
break;
case IPC_INFO:
p = compat_alloc_user_space(sizeof(smi));
err = sys_shmctl(first, second, p);
if (err < 0)
break;
if (copy_from_user(&smi, p, sizeof(smi)))
err2 = -EFAULT;
else if (version == IPC_64)
err2 = put_compat_shminfo64(&smi, uptr);
else
err2 = put_compat_shminfo(&smi, uptr);
if (err2)
err = -EFAULT;
break;
case IPC_SET:
if (version == IPC_64) {
err = get_compat_shmid64_ds(&s64, uptr);
} else {
err = get_compat_shmid_ds(&s64, uptr);
}
if (err)
break;
p = compat_alloc_user_space(sizeof(s64));
if (copy_to_user(p, &s64, sizeof(s64)))
err = -EFAULT;
else
err = sys_shmctl(first, second, p);
break;
case IPC_STAT:
case SHM_STAT:
p = compat_alloc_user_space(sizeof(s64));
err = sys_shmctl(first, second, p);
if (err < 0)
break;
if (copy_from_user(&s64, p, sizeof(s64)))
err2 = -EFAULT;
else if (version == IPC_64)
err2 = put_compat_shmid64_ds(&s64, uptr);
else
err2 = put_compat_shmid_ds(&s64, uptr);
if (err2)
err = -EFAULT;
break;
case SHM_INFO:
p = compat_alloc_user_space(sizeof(struct shm_info));
err = sys_shmctl(first, second, p);
if (err < 0)
break;
err2 = put_compat_shm_info(p, uptr);
if (err2)
err = -EFAULT;
break;
default:
err = -EINVAL;
break;
}
return err;
}
long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
unsigned nsops, const struct compat_timespec __user *timeout)
{
struct timespec __user *ts64 = NULL;
if (timeout) {
struct timespec ts;
ts64 = compat_alloc_user_space(sizeof(*ts64));
if (get_compat_timespec(&ts, timeout))
return -EFAULT;
if (copy_to_user(ts64, &ts, sizeof(ts)))
return -EFAULT;
}
return sys_semtimedop(semid, tsems, nsops, ts64);
}

151
kernel/ipc/compat_mq.c Normal file
View File

@@ -0,0 +1,151 @@
/*
* ipc/compat_mq.c
* 32 bit emulation for POSIX message queue system calls
*
* Copyright (C) 2004 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author: Arnd Bergmann <arnd@arndb.de>
*/
#include <linux/compat.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/mqueue.h>
#include <linux/syscalls.h>
#include <asm/uaccess.h>
struct compat_mq_attr {
compat_long_t mq_flags; /* message queue flags */
compat_long_t mq_maxmsg; /* maximum number of messages */
compat_long_t mq_msgsize; /* maximum message size */
compat_long_t mq_curmsgs; /* number of messages currently queued */
compat_long_t __reserved[4]; /* ignored for input, zeroed for output */
};
static inline int get_compat_mq_attr(struct mq_attr *attr,
const struct compat_mq_attr __user *uattr)
{
if (!access_ok(VERIFY_READ, uattr, sizeof *uattr))
return -EFAULT;
return __get_user(attr->mq_flags, &uattr->mq_flags)
| __get_user(attr->mq_maxmsg, &uattr->mq_maxmsg)
| __get_user(attr->mq_msgsize, &uattr->mq_msgsize)
| __get_user(attr->mq_curmsgs, &uattr->mq_curmsgs);
}
static inline int put_compat_mq_attr(const struct mq_attr *attr,
struct compat_mq_attr __user *uattr)
{
if (clear_user(uattr, sizeof *uattr))
return -EFAULT;
return __put_user(attr->mq_flags, &uattr->mq_flags)
| __put_user(attr->mq_maxmsg, &uattr->mq_maxmsg)
| __put_user(attr->mq_msgsize, &uattr->mq_msgsize)
| __put_user(attr->mq_curmsgs, &uattr->mq_curmsgs);
}
asmlinkage long compat_sys_mq_open(const char __user *u_name,
int oflag, compat_mode_t mode,
struct compat_mq_attr __user *u_attr)
{
void __user *p = NULL;
if (u_attr && oflag & O_CREAT) {
struct mq_attr attr;
memset(&attr, 0, sizeof(attr));
p = compat_alloc_user_space(sizeof(attr));
if (get_compat_mq_attr(&attr, u_attr) ||
copy_to_user(p, &attr, sizeof(attr)))
return -EFAULT;
}
return sys_mq_open(u_name, oflag, mode, p);
}
static int compat_prepare_timeout(struct timespec __user * *p,
const struct compat_timespec __user *u)
{
struct timespec ts;
if (!u) {
*p = NULL;
return 0;
}
*p = compat_alloc_user_space(sizeof(ts));
if (get_compat_timespec(&ts, u) || copy_to_user(*p, &ts, sizeof(ts)))
return -EFAULT;
return 0;
}
asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
const char __user *u_msg_ptr,
size_t msg_len, unsigned int msg_prio,
const struct compat_timespec __user *u_abs_timeout)
{
struct timespec __user *u_ts;
if (compat_prepare_timeout(&u_ts, u_abs_timeout))
return -EFAULT;
return sys_mq_timedsend(mqdes, u_msg_ptr, msg_len,
msg_prio, u_ts);
}
asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
char __user *u_msg_ptr,
size_t msg_len, unsigned int __user *u_msg_prio,
const struct compat_timespec __user *u_abs_timeout)
{
struct timespec __user *u_ts;
if (compat_prepare_timeout(&u_ts, u_abs_timeout))
return -EFAULT;
return sys_mq_timedreceive(mqdes, u_msg_ptr, msg_len,
u_msg_prio, u_ts);
}
asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
const struct compat_sigevent __user *u_notification)
{
struct sigevent __user *p = NULL;
if (u_notification) {
struct sigevent n;
p = compat_alloc_user_space(sizeof(*p));
if (get_compat_sigevent(&n, u_notification))
return -EFAULT;
if (n.sigev_notify == SIGEV_THREAD)
n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
if (copy_to_user(p, &n, sizeof(*p)))
return -EFAULT;
}
return sys_mq_notify(mqdes, p);
}
asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
const struct compat_mq_attr __user *u_mqstat,
struct compat_mq_attr __user *u_omqstat)
{
struct mq_attr mqstat;
struct mq_attr __user *p = compat_alloc_user_space(2 * sizeof(*p));
long ret;
memset(&mqstat, 0, sizeof(mqstat));
if (u_mqstat) {
if (get_compat_mq_attr(&mqstat, u_mqstat) ||
copy_to_user(p, &mqstat, sizeof(mqstat)))
return -EFAULT;
}
ret = sys_mq_getsetattr(mqdes,
u_mqstat ? p : NULL,
u_omqstat ? p + 1 : NULL);
if (ret)
return ret;
if (u_omqstat) {
if (copy_from_user(&mqstat, p + 1, sizeof(mqstat)) ||
put_compat_mq_attr(&mqstat, u_omqstat))
return -EFAULT;
}
return 0;
}

289
kernel/ipc/ipc_sysctl.c Normal file
View File

@@ -0,0 +1,289 @@
/*
* Copyright (C) 2007
*
* Author: Eric Biederman <ebiederm@xmision.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2 of the
* License.
*/
#include <linux/module.h>
#include <linux/ipc.h>
#include <linux/nsproxy.h>
#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <linux/ipc_namespace.h>
#include <linux/msg.h>
#include "util.h"
static void *get_ipc(ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
return which;
}
#ifdef CONFIG_PROC_SYSCTL
static int proc_ipc_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_callback_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
int rc;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp)
/*
* Tunable has successfully been changed by hand. Disable its
* automatic adjustment. This simply requires unregistering
* the notifiers that trigger recalculation.
*/
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_doulongvec_minmax(&ipc_table, write, buffer,
lenp, ppos);
}
/*
* Routine that is called when the file "auto_msgmni" has successfully been
* written.
* Two values are allowed:
* 0: unregister msgmni's callback routine from the ipc namespace notifier
* chain. This means that msgmni won't be recomputed anymore upon memory
* add/remove or ipc namespace creation/removal.
* 1: register back the callback routine.
*/
static void ipc_auto_callback(int val)
{
if (!val)
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
else {
/*
* Re-enable automatic recomputing only if not already
* enabled.
*/
recompute_msgmni(current->nsproxy->ipc_ns);
cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
}
}
static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
int oldval;
int rc;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
oldval = *((int *)(ipc_table.data));
rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp) {
int newval = *((int *)(ipc_table.data));
/*
* The file "auto_msgmni" has correctly been set.
* React by (un)registering the corresponding tunable, if the
* value has changed.
*/
if (newval != oldval)
ipc_auto_callback(newval);
}
return rc;
}
#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
#define proc_ipc_callback_dointvec NULL
#define proc_ipcauto_dointvec_minmax NULL
#endif
#ifdef CONFIG_SYSCTL_SYSCALL
/* The generic sysctl ipc data routine. */
static int sysctl_ipc_data(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
size_t len;
void *data;
/* Get out of I don't have a variable */
if (!table->data || !table->maxlen)
return -ENOTDIR;
data = get_ipc(table);
if (!data)
return -ENOTDIR;
if (oldval && oldlenp) {
if (get_user(len, oldlenp))
return -EFAULT;
if (len) {
if (len > table->maxlen)
len = table->maxlen;
if (copy_to_user(oldval, data, len))
return -EFAULT;
if (put_user(len, oldlenp))
return -EFAULT;
}
}
if (newval && newlen) {
if (newlen > table->maxlen)
newlen = table->maxlen;
if (copy_from_user(data, newval, newlen))
return -EFAULT;
}
return 1;
}
static int sysctl_ipc_registered_data(ctl_table *table,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen)
{
int rc;
rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen);
if (newval && newlen && rc > 0)
/*
* Tunable has successfully been changed from userland
*/
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
#else
#define sysctl_ipc_data NULL
#define sysctl_ipc_registered_data NULL
#endif
static int zero;
static int one = 1;
static struct ctl_table ipc_kern_table[] = {
{
.ctl_name = KERN_SHMMAX,
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof (init_ipc_ns.shm_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
.strategy = sysctl_ipc_data,
},
{
.ctl_name = KERN_SHMALL,
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof (init_ipc_ns.shm_ctlall),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
.strategy = sysctl_ipc_data,
},
{
.ctl_name = KERN_SHMMNI,
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof (init_ipc_ns.shm_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
.strategy = sysctl_ipc_data,
},
{
.ctl_name = KERN_MSGMAX,
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof (init_ipc_ns.msg_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
.strategy = sysctl_ipc_data,
},
{
.ctl_name = KERN_MSGMNI,
.procname = "msgmni",
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof (init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_callback_dointvec,
.strategy = sysctl_ipc_registered_data,
},
{
.ctl_name = KERN_MSGMNB,
.procname = "msgmnb",
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof (init_ipc_ns.msg_ctlmnb),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
.strategy = sysctl_ipc_data,
},
{
.ctl_name = KERN_SEM,
.procname = "sem",
.data = &init_ipc_ns.sem_ctls,
.maxlen = 4*sizeof (int),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
.strategy = sysctl_ipc_data,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "auto_msgmni",
.data = &init_ipc_ns.auto_msgmni,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_ipcauto_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{}
};
static struct ctl_table ipc_root_table[] = {
{
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = ipc_kern_table,
},
{}
};
static int __init ipc_sysctl_init(void)
{
register_sysctl_table(ipc_root_table);
return 0;
}
__initcall(ipc_sysctl_init);

View File

@@ -0,0 +1,92 @@
/*
* linux/ipc/ipcns_notifier.c
* Copyright (C) 2007 BULL SA. Nadia Derbey
*
* Notification mechanism for ipc namespaces:
* The callback routine registered in the memory chain invokes the ipcns
* notifier chain with the IPCNS_MEMCHANGED event.
* Each callback routine registered in the ipcns namespace recomputes msgmni
* for the owning namespace.
*/
#include <linux/msg.h>
#include <linux/rcupdate.h>
#include <linux/notifier.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include "util.h"
static BLOCKING_NOTIFIER_HEAD(ipcns_chain);
static int ipcns_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct ipc_namespace *ns;
switch (action) {
case IPCNS_MEMCHANGED: /* amount of lowmem has changed */
case IPCNS_CREATED:
case IPCNS_REMOVED:
/*
* It's time to recompute msgmni
*/
ns = container_of(self, struct ipc_namespace, ipcns_nb);
/*
* No need to get a reference on the ns: the 1st job of
* free_ipc_ns() is to unregister the callback routine.
* blocking_notifier_chain_unregister takes the wr lock to do
* it.
* When this callback routine is called the rd lock is held by
* blocking_notifier_call_chain.
* So the ipc ns cannot be freed while we are here.
*/
recompute_msgmni(ns);
break;
default:
break;
}
return NOTIFY_OK;
}
int register_ipcns_notifier(struct ipc_namespace *ns)
{
int rc;
memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
ns->ipcns_nb.notifier_call = ipcns_callback;
ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
if (!rc)
ns->auto_msgmni = 1;
return rc;
}
int cond_register_ipcns_notifier(struct ipc_namespace *ns)
{
int rc;
memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
ns->ipcns_nb.notifier_call = ipcns_callback;
ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
rc = blocking_notifier_chain_cond_register(&ipcns_chain,
&ns->ipcns_nb);
if (!rc)
ns->auto_msgmni = 1;
return rc;
}
void unregister_ipcns_notifier(struct ipc_namespace *ns)
{
blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
ns->auto_msgmni = 0;
}
int ipcns_notify(unsigned long val)
{
return blocking_notifier_call_chain(&ipcns_chain, val, NULL);
}

116
kernel/ipc/mq_sysctl.c Normal file
View File

@@ -0,0 +1,116 @@
/*
* Copyright (C) 2007 IBM Corporation
*
* Author: Cedric Le Goater <clg@fr.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2 of the
* License.
*/
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <linux/sysctl.h>
/*
* Define the ranges various user-specified maximum values can
* be set to.
*/
#define MIN_MSGMAX 1 /* min value for msg_max */
#define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */
#define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */
#define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */
#ifdef CONFIG_PROC_SYSCTL
static void *get_mq(ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
return which;
}
static int proc_mq_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
}
static int proc_mq_dointvec_minmax(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
return proc_dointvec_minmax(&mq_table, write, buffer,
lenp, ppos);
}
#else
#define proc_mq_dointvec NULL
#define proc_mq_dointvec_minmax NULL
#endif
static int msg_max_limit_min = MIN_MSGMAX;
static int msg_max_limit_max = MAX_MSGMAX;
static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
static ctl_table mq_sysctls[] = {
{
.procname = "queues_max",
.data = &init_ipc_ns.mq_queues_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec,
},
{
.procname = "msg_max",
.data = &init_ipc_ns.mq_msg_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec_minmax,
.extra1 = &msg_max_limit_min,
.extra2 = &msg_max_limit_max,
},
{
.procname = "msgsize_max",
.data = &init_ipc_ns.mq_msgsize_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec_minmax,
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
{ .ctl_name = 0 }
};
static ctl_table mq_sysctl_dir[] = {
{
.procname = "mqueue",
.mode = 0555,
.child = mq_sysctls,
},
{ .ctl_name = 0 }
};
static ctl_table mq_sysctl_root[] = {
{
.ctl_name = CTL_FS,
.procname = "fs",
.mode = 0555,
.child = mq_sysctl_dir,
},
{ .ctl_name = 0 }
};
struct ctl_table_header *mq_register_sysctl_table(void)
{
return register_sysctl_table(mq_sysctl_root);
}

1302
kernel/ipc/mqueue.c Normal file

File diff suppressed because it is too large Load Diff

945
kernel/ipc/msg.c Normal file
View File

@@ -0,0 +1,945 @@
/*
* linux/ipc/msg.c
* Copyright (C) 1992 Krishna Balasubramanian
*
* Removed all the remaining kerneld mess
* Catch the -EFAULT stuff properly
* Use GFP_KERNEL for messages as in 1.2
* Fixed up the unchecked user space derefs
* Copyright (C) 1998 Alan Cox & Andi Kleen
*
* /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
*
* mostly rewritten, threaded and wake-one semantics added
* MSGMAX limit removed, sysctl's added
* (c) 1999 Manfred Spraul <manfred@colorfullife.com>
*
* support for audit of ipc object properties and permission changes
* Dustin Kirkland <dustin.kirkland@us.ibm.com>
*
* namespaces support
* OpenVZ, SWsoft Inc.
* Pavel Emelianov <xemul@openvz.org>
*/
#include <linux/capability.h>
#include <linux/slab.h>
#include <linux/msg.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
#include <linux/security.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
#include <linux/audit.h>
#include <linux/seq_file.h>
#include <linux/rwsem.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include "util.h"
/*
* one msg_receiver structure for each sleeping receiver:
*/
struct msg_receiver {
struct list_head r_list;
struct task_struct *r_tsk;
int r_mode;
long r_msgtype;
long r_maxsize;
struct msg_msg *volatile r_msg;
};
/* one msg_sender for each sleeping sender */
struct msg_sender {
struct list_head list;
struct task_struct *tsk;
};
#define SEARCH_ANY 1
#define SEARCH_EQUAL 2
#define SEARCH_NOTEQUAL 3
#define SEARCH_LESSEQUAL 4
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
static int newque(struct ipc_namespace *, struct ipc_params *);
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
#endif
/*
* Scale msgmni with the available lowmem size: the memory dedicated to msg
* queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
* Also take into account the number of nsproxies created so far.
* This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
*/
void recompute_msgmni(struct ipc_namespace *ns)
{
struct sysinfo i;
unsigned long allowed;
int nb_ns;
si_meminfo(&i);
allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
/ MSGMNB;
nb_ns = atomic_read(&nr_ipc_ns);
allowed /= nb_ns;
if (allowed < MSGMNI) {
ns->msg_ctlmni = MSGMNI;
return;
}
if (allowed > IPCMNI / nb_ns) {
ns->msg_ctlmni = IPCMNI / nb_ns;
return;
}
ns->msg_ctlmni = allowed;
}
void msg_init_ns(struct ipc_namespace *ns)
{
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
recompute_msgmni(ns);
atomic_set(&ns->msg_bytes, 0);
atomic_set(&ns->msg_hdrs, 0);
ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
}
#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
}
#endif
void __init msg_init(void)
{
msg_init_ns(&init_ipc_ns);
printk(KERN_INFO "msgmni has been set to %d\n",
init_ipc_ns.msg_ctlmni);
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
IPC_MSG_IDS, sysvipc_msg_proc_show);
}
/*
* msg_lock_(check_) routines are called in the paths where the rw_mutex
* is not held.
*/
static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
{
struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
if (IS_ERR(ipcp))
return (struct msg_queue *)ipcp;
return container_of(ipcp, struct msg_queue, q_perm);
}
static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
int id)
{
struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
if (IS_ERR(ipcp))
return (struct msg_queue *)ipcp;
return container_of(ipcp, struct msg_queue, q_perm);
}
static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
{
ipc_rmid(&msg_ids(ns), &s->q_perm);
}
/**
* newque - Create a new msg queue
* @ns: namespace
* @params: ptr to the structure that contains the key and msgflg
*
* Called with msg_ids.rw_mutex held (writer)
*/
static int newque(struct ipc_namespace *ns, struct ipc_params *params)
{
struct msg_queue *msq;
int id, retval;
key_t key = params->key;
int msgflg = params->flg;
msq = ipc_rcu_alloc(sizeof(*msq));
if (!msq)
return -ENOMEM;
msq->q_perm.mode = msgflg & S_IRWXUGO;
msq->q_perm.key = key;
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
if (retval) {
ipc_rcu_putref(msq);
return retval;
}
/*
* ipc_addid() locks msq
*/
id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (id < 0) {
security_msg_queue_free(msq);
ipc_rcu_putref(msq);
return id;
}
msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = get_seconds();
msq->q_cbytes = msq->q_qnum = 0;
msq->q_qbytes = ns->msg_ctlmnb;
msq->q_lspid = msq->q_lrpid = 0;
INIT_LIST_HEAD(&msq->q_messages);
INIT_LIST_HEAD(&msq->q_receivers);
INIT_LIST_HEAD(&msq->q_senders);
msg_unlock(msq);
return msq->q_perm.id;
}
static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
{
mss->tsk = current;
current->state = TASK_INTERRUPTIBLE;
list_add_tail(&mss->list, &msq->q_senders);
}
static inline void ss_del(struct msg_sender *mss)
{
if (mss->list.next != NULL)
list_del(&mss->list);
}
static void ss_wakeup(struct list_head *h, int kill)
{
struct list_head *tmp;
tmp = h->next;
while (tmp != h) {
struct msg_sender *mss;
mss = list_entry(tmp, struct msg_sender, list);
tmp = tmp->next;
if (kill)
mss->list.next = NULL;
wake_up_process(mss->tsk);
}
}
static void expunge_all(struct msg_queue *msq, int res)
{
struct list_head *tmp;
tmp = msq->q_receivers.next;
while (tmp != &msq->q_receivers) {
struct msg_receiver *msr;
msr = list_entry(tmp, struct msg_receiver, r_list);
tmp = tmp->next;
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
smp_mb();
msr->r_msg = ERR_PTR(res);
}
}
/*
* freeque() wakes up waiters on the sender and receiver waiting queue,
* removes the message queue from message queue ID IDR, and cleans up all the
* messages associated with this queue.
*
* msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
* before freeque() is called. msg_ids.rw_mutex remains locked on exit.
*/
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{
struct list_head *tmp;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
expunge_all(msq, -EIDRM);
ss_wakeup(&msq->q_senders, 1);
msg_rmid(ns, msq);
msg_unlock(msq);
tmp = msq->q_messages.next;
while (tmp != &msq->q_messages) {
struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
tmp = tmp->next;
atomic_dec(&ns->msg_hdrs);
free_msg(msg);
}
atomic_sub(msq->q_cbytes, &ns->msg_bytes);
security_msg_queue_free(msq);
ipc_rcu_putref(msq);
}
/*
* Called with msg_ids.rw_mutex and ipcp locked.
*/
static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
{
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
return security_msg_queue_associate(msq, msgflg);
}
SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
{
struct ipc_namespace *ns;
struct ipc_ops msg_ops;
struct ipc_params msg_params;
ns = current->nsproxy->ipc_ns;
msg_ops.getnew = newque;
msg_ops.associate = msg_security;
msg_ops.more_checks = NULL;
msg_params.key = key;
msg_params.flg = msgflg;
return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
}
static inline unsigned long
copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
{
switch(version) {
case IPC_64:
return copy_to_user(buf, in, sizeof(*in));
case IPC_OLD:
{
struct msqid_ds out;
memset(&out, 0, sizeof(out));
ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
out.msg_stime = in->msg_stime;
out.msg_rtime = in->msg_rtime;
out.msg_ctime = in->msg_ctime;
if (in->msg_cbytes > USHORT_MAX)
out.msg_cbytes = USHORT_MAX;
else
out.msg_cbytes = in->msg_cbytes;
out.msg_lcbytes = in->msg_cbytes;
if (in->msg_qnum > USHORT_MAX)
out.msg_qnum = USHORT_MAX;
else
out.msg_qnum = in->msg_qnum;
if (in->msg_qbytes > USHORT_MAX)
out.msg_qbytes = USHORT_MAX;
else
out.msg_qbytes = in->msg_qbytes;
out.msg_lqbytes = in->msg_qbytes;
out.msg_lspid = in->msg_lspid;
out.msg_lrpid = in->msg_lrpid;
return copy_to_user(buf, &out, sizeof(out));
}
default:
return -EINVAL;
}
}
static inline unsigned long
copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
{
switch(version) {
case IPC_64:
if (copy_from_user(out, buf, sizeof(*out)))
return -EFAULT;
return 0;
case IPC_OLD:
{
struct msqid_ds tbuf_old;
if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
return -EFAULT;
out->msg_perm.uid = tbuf_old.msg_perm.uid;
out->msg_perm.gid = tbuf_old.msg_perm.gid;
out->msg_perm.mode = tbuf_old.msg_perm.mode;
if (tbuf_old.msg_qbytes == 0)
out->msg_qbytes = tbuf_old.msg_lqbytes;
else
out->msg_qbytes = tbuf_old.msg_qbytes;
return 0;
}
default:
return -EINVAL;
}
}
/*
* This function handles some msgctl commands which require the rw_mutex
* to be held in write mode.
* NOTE: no locks must be held, the rw_mutex is taken inside this function.
*/
static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
struct msqid_ds __user *buf, int version)
{
struct kern_ipc_perm *ipcp;
struct msqid64_ds msqid64;
struct msg_queue *msq;
int err;
if (cmd == IPC_SET) {
if (copy_msqid_from_user(&msqid64, buf, version))
return -EFAULT;
}
ipcp = ipcctl_pre_down(&msg_ids(ns), msqid, cmd,
&msqid64.msg_perm, msqid64.msg_qbytes);
if (IS_ERR(ipcp))
return PTR_ERR(ipcp);
msq = container_of(ipcp, struct msg_queue, q_perm);
err = security_msg_queue_msgctl(msq, cmd);
if (err)
goto out_unlock;
switch (cmd) {
case IPC_RMID:
freeque(ns, ipcp);
goto out_up;
case IPC_SET:
if (msqid64.msg_qbytes > ns->msg_ctlmnb &&
!capable(CAP_SYS_RESOURCE)) {
err = -EPERM;
goto out_unlock;
}
msq->q_qbytes = msqid64.msg_qbytes;
ipc_update_perm(&msqid64.msg_perm, ipcp);
msq->q_ctime = get_seconds();
/* sleeping receivers might be excluded by
* stricter permissions.
*/
expunge_all(msq, -EAGAIN);
/* sleeping senders might be able to send
* due to a larger queue size.
*/
ss_wakeup(&msq->q_senders, 0);
break;
default:
err = -EINVAL;
}
out_unlock:
msg_unlock(msq);
out_up:
up_write(&msg_ids(ns).rw_mutex);
return err;
}
SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
{
struct msg_queue *msq;
int err, version;
struct ipc_namespace *ns;
if (msqid < 0 || cmd < 0)
return -EINVAL;
version = ipc_parse_version(&cmd);
ns = current->nsproxy->ipc_ns;
switch (cmd) {
case IPC_INFO:
case MSG_INFO:
{
struct msginfo msginfo;
int max_id;
if (!buf)
return -EFAULT;
/*
* We must not return kernel stack data.
* due to padding, it's not enough
* to set all member fields.
*/
err = security_msg_queue_msgctl(NULL, cmd);
if (err)
return err;
memset(&msginfo, 0, sizeof(msginfo));
msginfo.msgmni = ns->msg_ctlmni;
msginfo.msgmax = ns->msg_ctlmax;
msginfo.msgmnb = ns->msg_ctlmnb;
msginfo.msgssz = MSGSSZ;
msginfo.msgseg = MSGSEG;
down_read(&msg_ids(ns).rw_mutex);
if (cmd == MSG_INFO) {
msginfo.msgpool = msg_ids(ns).in_use;
msginfo.msgmap = atomic_read(&ns->msg_hdrs);
msginfo.msgtql = atomic_read(&ns->msg_bytes);
} else {
msginfo.msgmap = MSGMAP;
msginfo.msgpool = MSGPOOL;
msginfo.msgtql = MSGTQL;
}
max_id = ipc_get_maxid(&msg_ids(ns));
up_read(&msg_ids(ns).rw_mutex);
if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
return -EFAULT;
return (max_id < 0) ? 0 : max_id;
}
case MSG_STAT: /* msqid is an index rather than a msg queue id */
case IPC_STAT:
{
struct msqid64_ds tbuf;
int success_return;
if (!buf)
return -EFAULT;
if (cmd == MSG_STAT) {
msq = msg_lock(ns, msqid);
if (IS_ERR(msq))
return PTR_ERR(msq);
success_return = msq->q_perm.id;
} else {
msq = msg_lock_check(ns, msqid);
if (IS_ERR(msq))
return PTR_ERR(msq);
success_return = 0;
}
err = -EACCES;
if (ipcperms(&msq->q_perm, S_IRUGO))
goto out_unlock;
err = security_msg_queue_msgctl(msq, cmd);
if (err)
goto out_unlock;
memset(&tbuf, 0, sizeof(tbuf));
kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
tbuf.msg_stime = msq->q_stime;
tbuf.msg_rtime = msq->q_rtime;
tbuf.msg_ctime = msq->q_ctime;
tbuf.msg_cbytes = msq->q_cbytes;
tbuf.msg_qnum = msq->q_qnum;
tbuf.msg_qbytes = msq->q_qbytes;
tbuf.msg_lspid = msq->q_lspid;
tbuf.msg_lrpid = msq->q_lrpid;
msg_unlock(msq);
if (copy_msqid_to_user(buf, &tbuf, version))
return -EFAULT;
return success_return;
}
case IPC_SET:
case IPC_RMID:
err = msgctl_down(ns, msqid, cmd, buf, version);
return err;
default:
return -EINVAL;
}
out_unlock:
msg_unlock(msq);
return err;
}
static int testmsg(struct msg_msg *msg, long type, int mode)
{
switch(mode)
{
case SEARCH_ANY:
return 1;
case SEARCH_LESSEQUAL:
if (msg->m_type <=type)
return 1;
break;
case SEARCH_EQUAL:
if (msg->m_type == type)
return 1;
break;
case SEARCH_NOTEQUAL:
if (msg->m_type != type)
return 1;
break;
}
return 0;
}
static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
{
struct list_head *tmp;
tmp = msq->q_receivers.next;
while (tmp != &msq->q_receivers) {
struct msg_receiver *msr;
msr = list_entry(tmp, struct msg_receiver, r_list);
tmp = tmp->next;
if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
!security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
msr->r_msgtype, msr->r_mode)) {
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) {
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
smp_mb();
msr->r_msg = ERR_PTR(-E2BIG);
} else {
msr->r_msg = NULL;
msq->q_lrpid = task_pid_vnr(msr->r_tsk);
msq->q_rtime = get_seconds();
wake_up_process(msr->r_tsk);
smp_mb();
msr->r_msg = msg;
return 1;
}
}
}
return 0;
}
long do_msgsnd(int msqid, long mtype, void __user *mtext,
size_t msgsz, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
int err;
struct ipc_namespace *ns;
ns = current->nsproxy->ipc_ns;
if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
return -EINVAL;
if (mtype < 1)
return -EINVAL;
msg = load_msg(mtext, msgsz);
if (IS_ERR(msg))
return PTR_ERR(msg);
msg->m_type = mtype;
msg->m_ts = msgsz;
msq = msg_lock_check(ns, msqid);
if (IS_ERR(msq)) {
err = PTR_ERR(msq);
goto out_free;
}
for (;;) {
struct msg_sender s;
err = -EACCES;
if (ipcperms(&msq->q_perm, S_IWUGO))
goto out_unlock_free;
err = security_msg_queue_msgsnd(msq, msg, msgflg);
if (err)
goto out_unlock_free;
if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
1 + msq->q_qnum <= msq->q_qbytes) {
break;
}
/* queue full, wait: */
if (msgflg & IPC_NOWAIT) {
err = -EAGAIN;
goto out_unlock_free;
}
ss_add(msq, &s);
ipc_rcu_getref(msq);
msg_unlock(msq);
schedule();
ipc_lock_by_ptr(&msq->q_perm);
ipc_rcu_putref(msq);
if (msq->q_perm.deleted) {
err = -EIDRM;
goto out_unlock_free;
}
ss_del(&s);
if (signal_pending(current)) {
err = -ERESTARTNOHAND;
goto out_unlock_free;
}
}
msq->q_lspid = task_tgid_vnr(current);
msq->q_stime = get_seconds();
if (!pipelined_send(msq, msg)) {
/* noone is waiting for this message, enqueue it */
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
atomic_add(msgsz, &ns->msg_bytes);
atomic_inc(&ns->msg_hdrs);
}
err = 0;
msg = NULL;
out_unlock_free:
msg_unlock(msq);
out_free:
if (msg != NULL)
free_msg(msg);
return err;
}
SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
int, msgflg)
{
long mtype;
if (get_user(mtype, &msgp->mtype))
return -EFAULT;
return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
}
static inline int convert_mode(long *msgtyp, int msgflg)
{
/*
* find message of correct type.
* msgtyp = 0 => get first.
* msgtyp > 0 => get first message of matching type.
* msgtyp < 0 => get message with least type must be < abs(msgtype).
*/
if (*msgtyp == 0)
return SEARCH_ANY;
if (*msgtyp < 0) {
*msgtyp = -*msgtyp;
return SEARCH_LESSEQUAL;
}
if (msgflg & MSG_EXCEPT)
return SEARCH_NOTEQUAL;
return SEARCH_EQUAL;
}
long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
size_t msgsz, long msgtyp, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
int mode;
struct ipc_namespace *ns;
if (msqid < 0 || (long) msgsz < 0)
return -EINVAL;
mode = convert_mode(&msgtyp, msgflg);
ns = current->nsproxy->ipc_ns;
msq = msg_lock_check(ns, msqid);
if (IS_ERR(msq))
return PTR_ERR(msq);
for (;;) {
struct msg_receiver msr_d;
struct list_head *tmp;
msg = ERR_PTR(-EACCES);
if (ipcperms(&msq->q_perm, S_IRUGO))
goto out_unlock;
msg = ERR_PTR(-EAGAIN);
tmp = msq->q_messages.next;
while (tmp != &msq->q_messages) {
struct msg_msg *walk_msg;
walk_msg = list_entry(tmp, struct msg_msg, m_list);
if (testmsg(walk_msg, msgtyp, mode) &&
!security_msg_queue_msgrcv(msq, walk_msg, current,
msgtyp, mode)) {
msg = walk_msg;
if (mode == SEARCH_LESSEQUAL &&
walk_msg->m_type != 1) {
msg = walk_msg;
msgtyp = walk_msg->m_type - 1;
} else {
msg = walk_msg;
break;
}
}
tmp = tmp->next;
}
if (!IS_ERR(msg)) {
/*
* Found a suitable message.
* Unlink it from the queue.
*/
if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
msg = ERR_PTR(-E2BIG);
goto out_unlock;
}
list_del(&msg->m_list);
msq->q_qnum--;
msq->q_rtime = get_seconds();
msq->q_lrpid = task_tgid_vnr(current);
msq->q_cbytes -= msg->m_ts;
atomic_sub(msg->m_ts, &ns->msg_bytes);
atomic_dec(&ns->msg_hdrs);
ss_wakeup(&msq->q_senders, 0);
msg_unlock(msq);
break;
}
/* No message waiting. Wait for a message */
if (msgflg & IPC_NOWAIT) {
msg = ERR_PTR(-ENOMSG);
goto out_unlock;
}
list_add_tail(&msr_d.r_list, &msq->q_receivers);
msr_d.r_tsk = current;
msr_d.r_msgtype = msgtyp;
msr_d.r_mode = mode;
if (msgflg & MSG_NOERROR)
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = msgsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
current->state = TASK_INTERRUPTIBLE;
msg_unlock(msq);
schedule();
/* Lockless receive, part 1:
* Disable preemption. We don't hold a reference to the queue
* and getting a reference would defeat the idea of a lockless
* operation, thus the code relies on rcu to guarantee the
* existance of msq:
* Prior to destruction, expunge_all(-EIRDM) changes r_msg.
* Thus if r_msg is -EAGAIN, then the queue not yet destroyed.
* rcu_read_lock() prevents preemption between reading r_msg
* and the spin_lock() inside ipc_lock_by_ptr().
*/
rcu_read_lock();
/* Lockless receive, part 2:
* Wait until pipelined_send or expunge_all are outside of
* wake_up_process(). There is a race with exit(), see
* ipc/mqueue.c for the details.
*/
msg = (struct msg_msg*)msr_d.r_msg;
while (msg == NULL) {
cpu_relax();
msg = (struct msg_msg *)msr_d.r_msg;
}
/* Lockless receive, part 3:
* If there is a message or an error then accept it without
* locking.
*/
if (msg != ERR_PTR(-EAGAIN)) {
rcu_read_unlock();
break;
}
/* Lockless receive, part 3:
* Acquire the queue spinlock.
*/
ipc_lock_by_ptr(&msq->q_perm);
rcu_read_unlock();
/* Lockless receive, part 4:
* Repeat test after acquiring the spinlock.
*/
msg = (struct msg_msg*)msr_d.r_msg;
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock;
list_del(&msr_d.r_list);
if (signal_pending(current)) {
msg = ERR_PTR(-ERESTARTNOHAND);
out_unlock:
msg_unlock(msq);
break;
}
}
if (IS_ERR(msg))
return PTR_ERR(msg);
msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
*pmtype = msg->m_type;
if (store_msg(mtext, msg, msgsz))
msgsz = -EFAULT;
free_msg(msg);
return msgsz;
}
SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
long, msgtyp, int, msgflg)
{
long err, mtype;
err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
if (err < 0)
goto out;
if (put_user(mtype, &msgp->mtype))
err = -EFAULT;
out:
return err;
}
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
{
struct msg_queue *msq = it;
return seq_printf(s,
"%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
msq->q_perm.key,
msq->q_perm.id,
msq->q_perm.mode,
msq->q_cbytes,
msq->q_qnum,
msq->q_lspid,
msq->q_lrpid,
msq->q_perm.uid,
msq->q_perm.gid,
msq->q_perm.cuid,
msq->q_perm.cgid,
msq->q_stime,
msq->q_rtime,
msq->q_ctime);
}
#endif

146
kernel/ipc/msgutil.c Normal file
View File

@@ -0,0 +1,146 @@
/*
* linux/ipc/msgutil.c
* Copyright (C) 1999, 2004 Manfred Spraul
*
* This file is released under GNU General Public Licence version 2 or
* (at your option) any later version.
*
* See the file COPYING for more details.
*/
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/ipc.h>
#include <linux/ipc_namespace.h>
#include <asm/uaccess.h>
#include "util.h"
DEFINE_SPINLOCK(mq_lock);
/*
* The next 2 defines are here bc this is the only file
* compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE
* and not CONFIG_IPC_NS.
*/
struct ipc_namespace init_ipc_ns = {
.count = ATOMIC_INIT(1),
#ifdef CONFIG_POSIX_MQUEUE
.mq_queues_max = DFLT_QUEUESMAX,
.mq_msg_max = DFLT_MSGMAX,
.mq_msgsize_max = DFLT_MSGSIZEMAX,
#endif
};
atomic_t nr_ipc_ns = ATOMIC_INIT(1);
struct msg_msgseg {
struct msg_msgseg* next;
/* the next part of the message follows immediately */
};
#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
struct msg_msg *load_msg(const void __user *src, int len)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
int err;
int alen;
alen = len;
if (alen > DATALEN_MSG)
alen = DATALEN_MSG;
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
return ERR_PTR(-ENOMEM);
msg->next = NULL;
msg->security = NULL;
if (copy_from_user(msg + 1, src, alen)) {
err = -EFAULT;
goto out_err;
}
len -= alen;
src = ((char __user *)src) + alen;
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;
alen = len;
if (alen > DATALEN_SEG)
alen = DATALEN_SEG;
seg = kmalloc(sizeof(*seg) + alen,
GFP_KERNEL);
if (seg == NULL) {
err = -ENOMEM;
goto out_err;
}
*pseg = seg;
seg->next = NULL;
if (copy_from_user(seg + 1, src, alen)) {
err = -EFAULT;
goto out_err;
}
pseg = &seg->next;
len -= alen;
src = ((char __user *)src) + alen;
}
err = security_msg_msg_alloc(msg);
if (err)
goto out_err;
return msg;
out_err:
free_msg(msg);
return ERR_PTR(err);
}
int store_msg(void __user *dest, struct msg_msg *msg, int len)
{
int alen;
struct msg_msgseg *seg;
alen = len;
if (alen > DATALEN_MSG)
alen = DATALEN_MSG;
if (copy_to_user(dest, msg + 1, alen))
return -1;
len -= alen;
dest = ((char __user *)dest) + alen;
seg = msg->next;
while (len > 0) {
alen = len;
if (alen > DATALEN_SEG)
alen = DATALEN_SEG;
if (copy_to_user(dest, seg + 1, alen))
return -1;
len -= alen;
dest = ((char __user *)dest) + alen;
seg = seg->next;
}
return 0;
}
void free_msg(struct msg_msg *msg)
{
struct msg_msgseg *seg;
security_msg_msg_free(msg);
seg = msg->next;
kfree(msg);
while (seg != NULL) {
struct msg_msgseg *tmp = seg->next;
kfree(seg);
seg = tmp;
}
}

134
kernel/ipc/namespace.c Normal file
View File

@@ -0,0 +1,134 @@
/*
* linux/ipc/namespace.c
* Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc.
*/
#include <linux/ipc.h>
#include <linux/msg.h>
#include <linux/ipc_namespace.h>
#include <linux/rcupdate.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include "util.h"
static struct ipc_namespace *create_ipc_ns(void)
{
struct ipc_namespace *ns;
int err;
ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
if (ns == NULL)
return ERR_PTR(-ENOMEM);
atomic_set(&ns->count, 1);
err = mq_init_ns(ns);
if (err) {
kfree(ns);
return ERR_PTR(err);
}
atomic_inc(&nr_ipc_ns);
sem_init_ns(ns);
msg_init_ns(ns);
shm_init_ns(ns);
/*
* msgmni has already been computed for the new ipc ns.
* Thus, do the ipcns creation notification before registering that
* new ipcns in the chain.
*/
ipcns_notify(IPCNS_CREATED);
register_ipcns_notifier(ns);
return ns;
}
struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
{
if (!(flags & CLONE_NEWIPC))
return get_ipc_ns(ns);
return create_ipc_ns();
}
/*
* free_ipcs - free all ipcs of one type
* @ns: the namespace to remove the ipcs from
* @ids: the table of ipcs to free
* @free: the function called to free each individual ipc
*
* Called for each kind of ipc when an ipc_namespace exits.
*/
void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
void (*free)(struct ipc_namespace *, struct kern_ipc_perm *))
{
struct kern_ipc_perm *perm;
int next_id;
int total, in_use;
down_write(&ids->rw_mutex);
in_use = ids->in_use;
for (total = 0, next_id = 0; total < in_use; next_id++) {
perm = idr_find(&ids->ipcs_idr, next_id);
if (perm == NULL)
continue;
ipc_lock_by_ptr(perm);
free(ns, perm);
total++;
}
up_write(&ids->rw_mutex);
}
static void free_ipc_ns(struct ipc_namespace *ns)
{
/*
* Unregistering the hotplug notifier at the beginning guarantees
* that the ipc namespace won't be freed while we are inside the
* callback routine. Since the blocking_notifier_chain_XXX routines
* hold a rw lock on the notifier list, unregister_ipcns_notifier()
* won't take the rw lock before blocking_notifier_call_chain() has
* released the rd lock.
*/
unregister_ipcns_notifier(ns);
sem_exit_ns(ns);
msg_exit_ns(ns);
shm_exit_ns(ns);
kfree(ns);
atomic_dec(&nr_ipc_ns);
/*
* Do the ipcns removal notification after decrementing nr_ipc_ns in
* order to have a correct value when recomputing msgmni.
*/
ipcns_notify(IPCNS_REMOVED);
}
/*
* put_ipc_ns - drop a reference to an ipc namespace.
* @ns: the namespace to put
*
* If this is the last task in the namespace exiting, and
* it is dropping the refcount to 0, then it can race with
* a task in another ipc namespace but in a mounts namespace
* which has this ipcns's mqueuefs mounted, doing some action
* with one of the mqueuefs files. That can raise the refcount.
* So dropping the refcount, and raising the refcount when
* accessing it through the VFS, are protected with mq_lock.
*
* (Clearly, a task raising the refcount on its own ipc_ns
* needn't take mq_lock since it can't race with the last task
* in the ipcns exiting).
*/
void put_ipc_ns(struct ipc_namespace *ns)
{
if (atomic_dec_and_lock(&ns->count, &mq_lock)) {
mq_clear_sbinfo(ns);
spin_unlock(&mq_lock);
mq_put_mnt(ns);
free_ipc_ns(ns);
}
}

1383
kernel/ipc/sem.c Normal file

File diff suppressed because it is too large Load Diff

1102
kernel/ipc/shm.c Normal file

File diff suppressed because it is too large Load Diff

993
kernel/ipc/util.c Normal file
View File

@@ -0,0 +1,993 @@
/*
* linux/ipc/util.c
* Copyright (C) 1992 Krishna Balasubramanian
*
* Sep 1997 - Call suser() last after "normal" permission checks so we
* get BSD style process accounting right.
* Occurs in several places in the IPC code.
* Chris Evans, <chris@ferret.lmh.ox.ac.uk>
* Nov 1999 - ipc helper functions, unified SMP locking
* Manfred Spraul <manfred@colorfullife.com>
* Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary().
* Mingming Cao <cmm@us.ibm.com>
* Mar 2006 - support for audit of ipc object properties
* Dustin Kirkland <dustin.kirkland@us.ibm.com>
* Jun 2006 - namespaces ssupport
* OpenVZ, SWsoft Inc.
* Pavel Emelianov <xemul@openvz.org>
*/
#include <linux/mm.h>
#include <linux/shm.h>
#include <linux/init.h>
#include <linux/msg.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/capability.h>
#include <linux/highuid.h>
#include <linux/security.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/audit.h>
#include <linux/nsproxy.h>
#include <linux/rwsem.h>
#include <linux/memory.h>
#include <linux/ipc_namespace.h>
#include <asm/unistd.h>
#include "util.h"
struct ipc_proc_iface {
const char *path;
const char *header;
int ids;
int (*show)(struct seq_file *, void *);
};
#ifdef CONFIG_MEMORY_HOTPLUG
static void ipc_memory_notifier(struct work_struct *work)
{
ipcns_notify(IPCNS_MEMCHANGED);
}
static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier);
static int ipc_memory_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
switch (action) {
case MEM_ONLINE: /* memory successfully brought online */
case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */
/*
* This is done by invoking the ipcns notifier chain with the
* IPC_MEMCHANGED event.
* In order not to keep the lock on the hotplug memory chain
* for too long, queue a work item that will, when waken up,
* activate the ipcns notification chain.
* No need to keep several ipc work items on the queue.
*/
if (!work_pending(&ipc_memory_wq))
schedule_work(&ipc_memory_wq);
break;
case MEM_GOING_ONLINE:
case MEM_GOING_OFFLINE:
case MEM_CANCEL_ONLINE:
case MEM_CANCEL_OFFLINE:
default:
break;
}
return NOTIFY_OK;
}
#endif /* CONFIG_MEMORY_HOTPLUG */
/**
* ipc_init - initialise IPC subsystem
*
* The various system5 IPC resources (semaphores, messages and shared
* memory) are initialised
* A callback routine is registered into the memory hotplug notifier
* chain: since msgmni scales to lowmem this callback routine will be
* called upon successful memory add / remove to recompute msmgni.
*/
static int __init ipc_init(void)
{
sem_init();
msg_init();
shm_init();
hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI);
register_ipcns_notifier(&init_ipc_ns);
return 0;
}
__initcall(ipc_init);
/**
* ipc_init_ids - initialise IPC identifiers
* @ids: Identifier set
*
* Set up the sequence range to use for the ipc identifier range (limited
* below IPCMNI) then initialise the ids idr.
*/
void ipc_init_ids(struct ipc_ids *ids)
{
init_rwsem(&ids->rw_mutex);
ids->in_use = 0;
ids->seq = 0;
{
int seq_limit = INT_MAX/SEQ_MULTIPLIER;
if (seq_limit > USHORT_MAX)
ids->seq_max = USHORT_MAX;
else
ids->seq_max = seq_limit;
}
idr_init(&ids->ipcs_idr);
}
#ifdef CONFIG_PROC_FS
static const struct file_operations sysvipc_proc_fops;
/**
* ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface.
* @path: Path in procfs
* @header: Banner to be printed at the beginning of the file.
* @ids: ipc id table to iterate.
* @show: show routine.
*/
void __init ipc_init_proc_interface(const char *path, const char *header,
int ids, int (*show)(struct seq_file *, void *))
{
struct proc_dir_entry *pde;
struct ipc_proc_iface *iface;
iface = kmalloc(sizeof(*iface), GFP_KERNEL);
if (!iface)
return;
iface->path = path;
iface->header = header;
iface->ids = ids;
iface->show = show;
pde = proc_create_data(path,
S_IRUGO, /* world readable */
NULL, /* parent dir */
&sysvipc_proc_fops,
iface);
if (!pde) {
kfree(iface);
}
}
#endif
/**
* ipc_findkey - find a key in an ipc identifier set
* @ids: Identifier set
* @key: The key to find
*
* Requires ipc_ids.rw_mutex locked.
* Returns the LOCKED pointer to the ipc structure if found or NULL
* if not.
* If key is found ipc points to the owning ipc structure
*/
static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
{
struct kern_ipc_perm *ipc;
int next_id;
int total;
for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
ipc = idr_find(&ids->ipcs_idr, next_id);
if (ipc == NULL)
continue;
if (ipc->key != key) {
total++;
continue;
}
ipc_lock_by_ptr(ipc);
return ipc;
}
return NULL;
}
/**
* ipc_get_maxid - get the last assigned id
* @ids: IPC identifier set
*
* Called with ipc_ids.rw_mutex held.
*/
int ipc_get_maxid(struct ipc_ids *ids)
{
struct kern_ipc_perm *ipc;
int max_id = -1;
int total, id;
if (ids->in_use == 0)
return -1;
if (ids->in_use == IPCMNI)
return IPCMNI - 1;
/* Look for the last assigned id */
total = 0;
for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
ipc = idr_find(&ids->ipcs_idr, id);
if (ipc != NULL) {
max_id = id;
total++;
}
}
return max_id;
}
/**
* ipc_addid - add an IPC identifier
* @ids: IPC identifier set
* @new: new IPC permission set
* @size: limit for the number of used ids
*
* Add an entry 'new' to the IPC ids idr. The permissions object is
* initialised and the first free entry is set up and the id assigned
* is returned. The 'new' entry is returned in a locked state on success.
* On failure the entry is not locked and a negative err-code is returned.
*
* Called with ipc_ids.rw_mutex held as a writer.
*/
int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
{
uid_t euid;
gid_t egid;
int id, err;
if (size > IPCMNI)
size = IPCMNI;
if (ids->in_use >= size)
return -ENOSPC;
spin_lock_init(&new->lock);
new->deleted = 0;
rcu_read_lock();
spin_lock(&new->lock);
err = idr_get_new(&ids->ipcs_idr, new, &id);
if (err) {
spin_unlock(&new->lock);
rcu_read_unlock();
return err;
}
ids->in_use++;
current_euid_egid(&euid, &egid);
new->cuid = new->uid = euid;
new->gid = new->cgid = egid;
new->seq = ids->seq++;
if(ids->seq > ids->seq_max)
ids->seq = 0;
new->id = ipc_buildid(id, new->seq);
return id;
}
/**
* ipcget_new - create a new ipc object
* @ns: namespace
* @ids: IPC identifer set
* @ops: the actual creation routine to call
* @params: its parameters
*
* This routine is called by sys_msgget, sys_semget() and sys_shmget()
* when the key is IPC_PRIVATE.
*/
static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params)
{
int err;
retry:
err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
if (!err)
return -ENOMEM;
down_write(&ids->rw_mutex);
err = ops->getnew(ns, params);
up_write(&ids->rw_mutex);
if (err == -EAGAIN)
goto retry;
return err;
}
/**
* ipc_check_perms - check security and permissions for an IPC
* @ipcp: ipc permission set
* @ops: the actual security routine to call
* @params: its parameters
*
* This routine is called by sys_msgget(), sys_semget() and sys_shmget()
* when the key is not IPC_PRIVATE and that key already exists in the
* ids IDR.
*
* On success, the IPC id is returned.
*
* It is called with ipc_ids.rw_mutex and ipcp->lock held.
*/
static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops,
struct ipc_params *params)
{
int err;
if (ipcperms(ipcp, params->flg))
err = -EACCES;
else {
err = ops->associate(ipcp, params->flg);
if (!err)
err = ipcp->id;
}
return err;
}
/**
* ipcget_public - get an ipc object or create a new one
* @ns: namespace
* @ids: IPC identifer set
* @ops: the actual creation routine to call
* @params: its parameters
*
* This routine is called by sys_msgget, sys_semget() and sys_shmget()
* when the key is not IPC_PRIVATE.
* It adds a new entry if the key is not found and does some permission
* / security checkings if the key is found.
*
* On success, the ipc id is returned.
*/
static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params)
{
struct kern_ipc_perm *ipcp;
int flg = params->flg;
int err;
retry:
err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
/*
* Take the lock as a writer since we are potentially going to add
* a new entry + read locks are not "upgradable"
*/
down_write(&ids->rw_mutex);
ipcp = ipc_findkey(ids, params->key);
if (ipcp == NULL) {
/* key not used */
if (!(flg & IPC_CREAT))
err = -ENOENT;
else if (!err)
err = -ENOMEM;
else
err = ops->getnew(ns, params);
} else {
/* ipc object has been locked by ipc_findkey() */
if (flg & IPC_CREAT && flg & IPC_EXCL)
err = -EEXIST;
else {
err = 0;
if (ops->more_checks)
err = ops->more_checks(ipcp, params);
if (!err)
/*
* ipc_check_perms returns the IPC id on
* success
*/
err = ipc_check_perms(ipcp, ops, params);
}
ipc_unlock(ipcp);
}
up_write(&ids->rw_mutex);
if (err == -EAGAIN)
goto retry;
return err;
}
/**
* ipc_rmid - remove an IPC identifier
* @ids: IPC identifier set
* @ipcp: ipc perm structure containing the identifier to remove
*
* ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
* before this function is called, and remain locked on the exit.
*/
void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
{
int lid = ipcid_to_idx(ipcp->id);
idr_remove(&ids->ipcs_idr, lid);
ids->in_use--;
ipcp->deleted = 1;
return;
}
/**
* ipc_alloc - allocate ipc space
* @size: size desired
*
* Allocate memory from the appropriate pools and return a pointer to it.
* NULL is returned if the allocation fails
*/
void* ipc_alloc(int size)
{
void* out;
if(size > PAGE_SIZE)
out = vmalloc(size);
else
out = kmalloc(size, GFP_KERNEL);
return out;
}
/**
* ipc_free - free ipc space
* @ptr: pointer returned by ipc_alloc
* @size: size of block
*
* Free a block created with ipc_alloc(). The caller must know the size
* used in the allocation call.
*/
void ipc_free(void* ptr, int size)
{
if(size > PAGE_SIZE)
vfree(ptr);
else
kfree(ptr);
}
/*
* rcu allocations:
* There are three headers that are prepended to the actual allocation:
* - during use: ipc_rcu_hdr.
* - during the rcu grace period: ipc_rcu_grace.
* - [only if vmalloc]: ipc_rcu_sched.
* Their lifetime doesn't overlap, thus the headers share the same memory.
* Unlike a normal union, they are right-aligned, thus some container_of
* forward/backward casting is necessary:
*/
struct ipc_rcu_hdr
{
int refcount;
int is_vmalloc;
void *data[0];
};
struct ipc_rcu_grace
{
struct rcu_head rcu;
/* "void *" makes sure alignment of following data is sane. */
void *data[0];
};
struct ipc_rcu_sched
{
struct work_struct work;
/* "void *" makes sure alignment of following data is sane. */
void *data[0];
};
#define HDRLEN_KMALLOC (sizeof(struct ipc_rcu_grace) > sizeof(struct ipc_rcu_hdr) ? \
sizeof(struct ipc_rcu_grace) : sizeof(struct ipc_rcu_hdr))
#define HDRLEN_VMALLOC (sizeof(struct ipc_rcu_sched) > HDRLEN_KMALLOC ? \
sizeof(struct ipc_rcu_sched) : HDRLEN_KMALLOC)
static inline int rcu_use_vmalloc(int size)
{
/* Too big for a single page? */
if (HDRLEN_KMALLOC + size > PAGE_SIZE)
return 1;
return 0;
}
/**
* ipc_rcu_alloc - allocate ipc and rcu space
* @size: size desired
*
* Allocate memory for the rcu header structure + the object.
* Returns the pointer to the object.
* NULL is returned if the allocation fails.
*/
void* ipc_rcu_alloc(int size)
{
void* out;
/*
* We prepend the allocation with the rcu struct, and
* workqueue if necessary (for vmalloc).
*/
if (rcu_use_vmalloc(size)) {
out = vmalloc(HDRLEN_VMALLOC + size);
if (out) {
out += HDRLEN_VMALLOC;
container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
}
} else {
out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
if (out) {
out += HDRLEN_KMALLOC;
container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
}
}
return out;
}
void ipc_rcu_getref(void *ptr)
{
container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
}
static void ipc_do_vfree(struct work_struct *work)
{
vfree(container_of(work, struct ipc_rcu_sched, work));
}
/**
* ipc_schedule_free - free ipc + rcu space
* @head: RCU callback structure for queued work
*
* Since RCU callback function is called in bh,
* we need to defer the vfree to schedule_work().
*/
static void ipc_schedule_free(struct rcu_head *head)
{
struct ipc_rcu_grace *grace;
struct ipc_rcu_sched *sched;
grace = container_of(head, struct ipc_rcu_grace, rcu);
sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
data[0]);
INIT_WORK(&sched->work, ipc_do_vfree);
schedule_work(&sched->work);
}
/**
* ipc_immediate_free - free ipc + rcu space
* @head: RCU callback structure that contains pointer to be freed
*
* Free from the RCU callback context.
*/
static void ipc_immediate_free(struct rcu_head *head)
{
struct ipc_rcu_grace *free =
container_of(head, struct ipc_rcu_grace, rcu);
kfree(free);
}
void ipc_rcu_putref(void *ptr)
{
if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
return;
if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {
call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
ipc_schedule_free);
} else {
call_rcu(&container_of(ptr, struct ipc_rcu_grace, data)->rcu,
ipc_immediate_free);
}
}
/**
* ipcperms - check IPC permissions
* @ipcp: IPC permission set
* @flag: desired permission set.
*
* Check user, group, other permissions for access
* to ipc resources. return 0 if allowed
*/
int ipcperms (struct kern_ipc_perm *ipcp, short flag)
{ /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
uid_t euid = current_euid();
int requested_mode, granted_mode;
audit_ipc_obj(ipcp);
requested_mode = (flag >> 6) | (flag >> 3) | flag;
granted_mode = ipcp->mode;
if (euid == ipcp->cuid ||
euid == ipcp->uid)
granted_mode >>= 6;
else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid))
granted_mode >>= 3;
/* is there some bit set in requested_mode but not in granted_mode? */
if ((requested_mode & ~granted_mode & 0007) &&
!capable(CAP_IPC_OWNER))
return -1;
return security_ipc_permission(ipcp, flag);
}
/*
* Functions to convert between the kern_ipc_perm structure and the
* old/new ipc_perm structures
*/
/**
* kernel_to_ipc64_perm - convert kernel ipc permissions to user
* @in: kernel permissions
* @out: new style IPC permissions
*
* Turn the kernel object @in into a set of permissions descriptions
* for returning to userspace (@out).
*/
void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
{
out->key = in->key;
out->uid = in->uid;
out->gid = in->gid;
out->cuid = in->cuid;
out->cgid = in->cgid;
out->mode = in->mode;
out->seq = in->seq;
}
/**
* ipc64_perm_to_ipc_perm - convert new ipc permissions to old
* @in: new style IPC permissions
* @out: old style IPC permissions
*
* Turn the new style permissions object @in into a compatibility
* object and store it into the @out pointer.
*/
void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
{
out->key = in->key;
SET_UID(out->uid, in->uid);
SET_GID(out->gid, in->gid);
SET_UID(out->cuid, in->cuid);
SET_GID(out->cgid, in->cgid);
out->mode = in->mode;
out->seq = in->seq;
}
/**
* ipc_lock - Lock an ipc structure without rw_mutex held
* @ids: IPC identifier set
* @id: ipc id to look for
*
* Look for an id in the ipc ids idr and lock the associated ipc object.
*
* The ipc object is locked on exit.
*/
struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
rcu_read_lock();
out = idr_find(&ids->ipcs_idr, lid);
if (out == NULL) {
rcu_read_unlock();
return ERR_PTR(-EINVAL);
}
spin_lock(&out->lock);
/* ipc_rmid() may have already freed the ID while ipc_lock
* was spinning: here verify that the structure is still valid
*/
if (out->deleted) {
spin_unlock(&out->lock);
rcu_read_unlock();
return ERR_PTR(-EINVAL);
}
return out;
}
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
out = ipc_lock(ids, id);
if (IS_ERR(out))
return out;
if (ipc_checkid(out, id)) {
ipc_unlock(out);
return ERR_PTR(-EIDRM);
}
return out;
}
/**
* ipcget - Common sys_*get() code
* @ns : namsepace
* @ids : IPC identifier set
* @ops : operations to be called on ipc object creation, permission checks
* and further checks
* @params : the parameters needed by the previous operations.
*
* Common routine called by sys_msgget(), sys_semget() and sys_shmget().
*/
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params)
{
if (params->key == IPC_PRIVATE)
return ipcget_new(ns, ids, ops, params);
else
return ipcget_public(ns, ids, ops, params);
}
/**
* ipc_update_perm - update the permissions of an IPC.
* @in: the permission given as input.
* @out: the permission of the ipc to set.
*/
void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
{
out->uid = in->uid;
out->gid = in->gid;
out->mode = (out->mode & ~S_IRWXUGO)
| (in->mode & S_IRWXUGO);
}
/**
* ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
* @ids: the table of ids where to look for the ipc
* @id: the id of the ipc to retrieve
* @cmd: the cmd to check
* @perm: the permission to set
* @extra_perm: one extra permission parameter used by msq
*
* This function does some common audit and permissions check for some IPC_XXX
* cmd and is called from semctl_down, shmctl_down and msgctl_down.
* It must be called without any lock held and
* - retrieves the ipc with the given id in the given table.
* - performs some audit and permission check, depending on the given cmd
* - returns the ipc with both ipc and rw_mutex locks held in case of success
* or an err-code without any lock held otherwise.
*/
struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm)
{
struct kern_ipc_perm *ipcp;
uid_t euid;
int err;
down_write(&ids->rw_mutex);
ipcp = ipc_lock_check(ids, id);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_up;
}
audit_ipc_obj(ipcp);
if (cmd == IPC_SET)
audit_ipc_set_perm(extra_perm, perm->uid,
perm->gid, perm->mode);
euid = current_euid();
if (euid == ipcp->cuid ||
euid == ipcp->uid || capable(CAP_SYS_ADMIN))
return ipcp;
err = -EPERM;
ipc_unlock(ipcp);
out_up:
up_write(&ids->rw_mutex);
return ERR_PTR(err);
}
#ifdef __ARCH_WANT_IPC_PARSE_VERSION
/**
* ipc_parse_version - IPC call version
* @cmd: pointer to command
*
* Return IPC_64 for new style IPC and IPC_OLD for old style IPC.
* The @cmd value is turned from an encoding command and version into
* just the command code.
*/
int ipc_parse_version (int *cmd)
{
if (*cmd & IPC_64) {
*cmd ^= IPC_64;
return IPC_64;
} else {
return IPC_OLD;
}
}
#endif /* __ARCH_WANT_IPC_PARSE_VERSION */
#ifdef CONFIG_PROC_FS
struct ipc_proc_iter {
struct ipc_namespace *ns;
struct ipc_proc_iface *iface;
};
/*
* This routine locks the ipc structure found at least at position pos.
*/
static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
loff_t *new_pos)
{
struct kern_ipc_perm *ipc;
int total, id;
total = 0;
for (id = 0; id < pos && total < ids->in_use; id++) {
ipc = idr_find(&ids->ipcs_idr, id);
if (ipc != NULL)
total++;
}
if (total >= ids->in_use)
return NULL;
for ( ; pos < IPCMNI; pos++) {
ipc = idr_find(&ids->ipcs_idr, pos);
if (ipc != NULL) {
*new_pos = pos + 1;
ipc_lock_by_ptr(ipc);
return ipc;
}
}
/* Out of range - return NULL to terminate iteration */
return NULL;
}
static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
{
struct ipc_proc_iter *iter = s->private;
struct ipc_proc_iface *iface = iter->iface;
struct kern_ipc_perm *ipc = it;
/* If we had an ipc id locked before, unlock it */
if (ipc && ipc != SEQ_START_TOKEN)
ipc_unlock(ipc);
return sysvipc_find_ipc(&iter->ns->ids[iface->ids], *pos, pos);
}
/*
* File positions: pos 0 -> header, pos n -> ipc id = n - 1.
* SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
*/
static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
{
struct ipc_proc_iter *iter = s->private;
struct ipc_proc_iface *iface = iter->iface;
struct ipc_ids *ids;
ids = &iter->ns->ids[iface->ids];
/*
* Take the lock - this will be released by the corresponding
* call to stop().
*/
down_read(&ids->rw_mutex);
/* pos < 0 is invalid */
if (*pos < 0)
return NULL;
/* pos == 0 means header */
if (*pos == 0)
return SEQ_START_TOKEN;
/* Find the (pos-1)th ipc */
return sysvipc_find_ipc(ids, *pos - 1, pos);
}
static void sysvipc_proc_stop(struct seq_file *s, void *it)
{
struct kern_ipc_perm *ipc = it;
struct ipc_proc_iter *iter = s->private;
struct ipc_proc_iface *iface = iter->iface;
struct ipc_ids *ids;
/* If we had a locked structure, release it */
if (ipc && ipc != SEQ_START_TOKEN)
ipc_unlock(ipc);
ids = &iter->ns->ids[iface->ids];
/* Release the lock we took in start() */
up_read(&ids->rw_mutex);
}
static int sysvipc_proc_show(struct seq_file *s, void *it)
{
struct ipc_proc_iter *iter = s->private;
struct ipc_proc_iface *iface = iter->iface;
if (it == SEQ_START_TOKEN)
return seq_puts(s, iface->header);
return iface->show(s, it);
}
static const struct seq_operations sysvipc_proc_seqops = {
.start = sysvipc_proc_start,
.stop = sysvipc_proc_stop,
.next = sysvipc_proc_next,
.show = sysvipc_proc_show,
};
static int sysvipc_proc_open(struct inode *inode, struct file *file)
{
int ret;
struct seq_file *seq;
struct ipc_proc_iter *iter;
ret = -ENOMEM;
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
goto out;
ret = seq_open(file, &sysvipc_proc_seqops);
if (ret)
goto out_kfree;
seq = file->private_data;
seq->private = iter;
iter->iface = PDE(inode)->data;
iter->ns = get_ipc_ns(current->nsproxy->ipc_ns);
out:
return ret;
out_kfree:
kfree(iter);
goto out;
}
static int sysvipc_proc_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct ipc_proc_iter *iter = seq->private;
put_ipc_ns(iter->ns);
return seq_release_private(inode, file);
}
static const struct file_operations sysvipc_proc_fops = {
.open = sysvipc_proc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = sysvipc_proc_release,
};
#endif /* CONFIG_PROC_FS */

177
kernel/ipc/util.h Normal file
View File

@@ -0,0 +1,177 @@
/*
* linux/ipc/util.h
* Copyright (C) 1999 Christoph Rohland
*
* ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com>
* namespaces support. 2006 OpenVZ, SWsoft Inc.
* Pavel Emelianov <xemul@openvz.org>
*/
#ifndef _IPC_UTIL_H
#define _IPC_UTIL_H
#include <linux/unistd.h>
#include <linux/err.h>
#define SEQ_MULTIPLIER (IPCMNI)
void sem_init (void);
void msg_init (void);
void shm_init (void);
struct ipc_namespace;
#ifdef CONFIG_POSIX_MQUEUE
extern void mq_clear_sbinfo(struct ipc_namespace *ns);
extern void mq_put_mnt(struct ipc_namespace *ns);
#else
static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { }
static inline void mq_put_mnt(struct ipc_namespace *ns) { }
#endif
#ifdef CONFIG_SYSVIPC
void sem_init_ns(struct ipc_namespace *ns);
void msg_init_ns(struct ipc_namespace *ns);
void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
static inline void sem_init_ns(struct ipc_namespace *ns) { }
static inline void msg_init_ns(struct ipc_namespace *ns) { }
static inline void shm_init_ns(struct ipc_namespace *ns) { }
static inline void sem_exit_ns(struct ipc_namespace *ns) { }
static inline void msg_exit_ns(struct ipc_namespace *ns) { }
static inline void shm_exit_ns(struct ipc_namespace *ns) { }
#endif
/*
* Structure that holds the parameters needed by the ipc operations
* (see after)
*/
struct ipc_params {
key_t key;
int flg;
union {
size_t size; /* for shared memories */
int nsems; /* for semaphores */
} u; /* holds the getnew() specific param */
};
/*
* Structure that holds some ipc operations. This structure is used to unify
* the calls to sys_msgget(), sys_semget(), sys_shmget()
* . routine to call to create a new ipc object. Can be one of newque,
* newary, newseg
* . routine to call to check permissions for a new ipc object.
* Can be one of security_msg_associate, security_sem_associate,
* security_shm_associate
* . routine to call for an extra check if needed
*/
struct ipc_ops {
int (*getnew) (struct ipc_namespace *, struct ipc_params *);
int (*associate) (struct kern_ipc_perm *, int);
int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
};
struct seq_file;
struct ipc_ids;
void ipc_init_ids(struct ipc_ids *);
#ifdef CONFIG_PROC_FS
void __init ipc_init_proc_interface(const char *path, const char *header,
int ids, int (*show)(struct seq_file *, void *));
#else
#define ipc_init_proc_interface(path, header, ids, show) do {} while (0)
#endif
#define IPC_SEM_IDS 0
#define IPC_MSG_IDS 1
#define IPC_SHM_IDS 2
#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
/* must be called with ids->rw_mutex acquired for writing */
int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
/* must be called with ids->rw_mutex acquired for reading */
int ipc_get_maxid(struct ipc_ids *);
/* must be called with both locks acquired. */
void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
/* must be called with ipcp locked */
int ipcperms(struct kern_ipc_perm *ipcp, short flg);
/* for rare, potentially huge allocations.
* both function can sleep
*/
void* ipc_alloc(int size);
void ipc_free(void* ptr, int size);
/*
* For allocation that need to be freed by RCU.
* Objects are reference counted, they start with reference count 1.
* getref increases the refcount, the putref call that reduces the recount
* to 0 schedules the rcu destruction. Caller must guarantee locking.
*/
void* ipc_rcu_alloc(int size);
void ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
#ifndef __ARCH_WANT_IPC_PARSE_VERSION
/* On IA-64, we always use the "64-bit version" of the IPC structures. */
# define ipc_parse_version(cmd) IPC_64
#else
int ipc_parse_version (int *cmd);
#endif
extern void free_msg(struct msg_msg *msg);
extern struct msg_msg *load_msg(const void __user *src, int len);
extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
extern void recompute_msgmni(struct ipc_namespace *);
static inline int ipc_buildid(int id, int seq)
{
return SEQ_MULTIPLIER * seq + id;
}
/*
* Must be called with ipcp locked
*/
static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
{
if (uid / SEQ_MULTIPLIER != ipcp->seq)
return 1;
return 0;
}
static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
{
rcu_read_lock();
spin_lock(&perm->lock);
}
static inline void ipc_unlock(struct kern_ipc_perm *perm)
{
spin_unlock(&perm->lock);
rcu_read_unlock();
}
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params);
void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
void (*free)(struct ipc_namespace *, struct kern_ipc_perm *));
#endif