add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

5
kernel/fs/notify/Kconfig Normal file
View File

@@ -0,0 +1,5 @@
config FSNOTIFY
def_bool n
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"

View File

@@ -0,0 +1,4 @@
obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o
obj-y += dnotify/
obj-y += inotify/

View File

@@ -0,0 +1,11 @@
config DNOTIFY
bool "Dnotify support"
select FSNOTIFY
default y
help
Dnotify is a directory-based per-fd file change notification system
that uses signals to communicate events to user-space. There exist
superior alternatives, but some applications may still rely on
dnotify.
If unsure, say Y.

View File

@@ -0,0 +1 @@
obj-$(CONFIG_DNOTIFY) += dnotify.o

View File

@@ -0,0 +1,442 @@
/*
* Directory notifications for Linux.
*
* Copyright (C) 2000,2001,2002 Stephen Rothwell
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
* dnotify was largly rewritten to use the new fsnotify infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/dnotify.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
int dir_notify_enable __read_mostly = 1;
static struct kmem_cache *dnotify_struct_cache __read_mostly;
static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
static struct fsnotify_group *dnotify_group __read_mostly;
static DEFINE_MUTEX(dnotify_mark_mutex);
/*
* dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
* is being watched by dnotify. If multiple userspace applications are watching
* the same directory with dnotify their information is chained in dn
*/
struct dnotify_mark_entry {
struct fsnotify_mark_entry fsn_entry;
struct dnotify_struct *dn;
};
/*
* When a process starts or stops watching an inode the set of events which
* dnotify cares about for that inode may change. This function runs the
* list of everything receiving dnotify events about this directory and calculates
* the set of all those events. After it updates what dnotify is interested in
* it calls the fsnotify function so it can update the set of all events relevant
* to this inode.
*/
static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
{
__u32 new_mask, old_mask;
struct dnotify_struct *dn;
struct dnotify_mark_entry *dnentry = container_of(entry,
struct dnotify_mark_entry,
fsn_entry);
assert_spin_locked(&entry->lock);
old_mask = entry->mask;
new_mask = 0;
for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
entry->mask = new_mask;
if (old_mask == new_mask)
return;
if (entry->inode)
fsnotify_recalc_inode_mask(entry->inode);
}
/*
* Mains fsnotify call where events are delivered to dnotify.
* Find the dnotify mark on the relevant inode, run the list of dnotify structs
* on that mark and determine which of them has expressed interest in receiving
* events of this type. When found send the correct process and signal and
* destroy the dnotify struct if it was not registered to receive multiple
* events.
*/
static int dnotify_handle_event(struct fsnotify_group *group,
struct fsnotify_event *event)
{
struct fsnotify_mark_entry *entry = NULL;
struct dnotify_mark_entry *dnentry;
struct inode *to_tell;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct fown_struct *fown;
__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
to_tell = event->to_tell;
spin_lock(&to_tell->i_lock);
entry = fsnotify_find_mark_entry(group, to_tell);
spin_unlock(&to_tell->i_lock);
/* unlikely since we alreay passed dnotify_should_send_event() */
if (unlikely(!entry))
return 0;
dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
spin_lock(&entry->lock);
prev = &dnentry->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_mask & test_mask) == 0) {
prev = &dn->dn_next;
continue;
}
fown = &dn->dn_filp->f_owner;
send_sigio(fown, dn->dn_fd, POLL_MSG);
if (dn->dn_mask & FS_DN_MULTISHOT)
prev = &dn->dn_next;
else {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(entry);
}
}
spin_unlock(&entry->lock);
fsnotify_put_mark(entry);
return 0;
}
/*
* Given an inode and mask determine if dnotify would be interested in sending
* userspace notification for that pair.
*/
static bool dnotify_should_send_event(struct fsnotify_group *group,
struct inode *inode, __u32 mask)
{
struct fsnotify_mark_entry *entry;
bool send;
/* !dir_notify_enable should never get here, don't waste time checking
if (!dir_notify_enable)
return 0; */
/* not a dir, dnotify doesn't care */
if (!S_ISDIR(inode->i_mode))
return false;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
/* no mark means no dnotify watch */
if (!entry)
return false;
mask = (mask & ~FS_EVENT_ON_CHILD);
send = (mask & entry->mask);
fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
return send;
}
static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
{
struct dnotify_mark_entry *dnentry = container_of(entry,
struct dnotify_mark_entry,
fsn_entry);
BUG_ON(dnentry->dn);
kmem_cache_free(dnotify_mark_entry_cache, dnentry);
}
static struct fsnotify_ops dnotify_fsnotify_ops = {
.handle_event = dnotify_handle_event,
.should_send_event = dnotify_should_send_event,
.free_group_priv = NULL,
.freeing_mark = NULL,
.free_event_priv = NULL,
};
/*
* Called every time a file is closed. Looks first for a dnotify mark on the
* inode. If one is found run all of the ->dn entries attached to that
* mark for one relevant to this process closing the file and remove that
* dnotify_struct. If that was the last dnotify_struct also remove the
* fsnotify_mark_entry.
*/
void dnotify_flush(struct file *filp, fl_owner_t id)
{
struct fsnotify_mark_entry *entry;
struct dnotify_mark_entry *dnentry;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct inode *inode;
inode = filp->f_path.dentry->d_inode;
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(dnotify_group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
return;
dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
mutex_lock(&dnotify_mark_mutex);
spin_lock(&entry->lock);
prev = &dnentry->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
dnotify_recalc_inode_mask(entry);
break;
}
prev = &dn->dn_next;
}
spin_unlock(&entry->lock);
/* nothing else could have found us thanks to the dnotify_mark_mutex */
if (dnentry->dn == NULL)
fsnotify_destroy_mark_by_entry(entry);
fsnotify_recalc_group_mask(dnotify_group);
mutex_unlock(&dnotify_mark_mutex);
fsnotify_put_mark(entry);
}
/* this conversion is done only at watch creation */
static __u32 convert_arg(unsigned long arg)
{
__u32 new_mask = FS_EVENT_ON_CHILD;
if (arg & DN_MULTISHOT)
new_mask |= FS_DN_MULTISHOT;
if (arg & DN_DELETE)
new_mask |= (FS_DELETE | FS_MOVED_FROM);
if (arg & DN_MODIFY)
new_mask |= FS_MODIFY;
if (arg & DN_ACCESS)
new_mask |= FS_ACCESS;
if (arg & DN_ATTRIB)
new_mask |= FS_ATTRIB;
if (arg & DN_RENAME)
new_mask |= FS_DN_RENAME;
if (arg & DN_CREATE)
new_mask |= (FS_CREATE | FS_MOVED_TO);
return new_mask;
}
/*
* If multiple processes watch the same inode with dnotify there is only one
* dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
* onto that mark. This function either attaches the new dnotify_struct onto
* that list, or it |= the mask onto an existing dnofiy_struct.
*/
static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
fl_owner_t id, int fd, struct file *filp, __u32 mask)
{
struct dnotify_struct *odn;
odn = dnentry->dn;
while (odn != NULL) {
/* adding more events to existing dnofiy_struct? */
if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
odn->dn_fd = fd;
odn->dn_mask |= mask;
return -EEXIST;
}
odn = odn->dn_next;
}
dn->dn_mask = mask;
dn->dn_fd = fd;
dn->dn_filp = filp;
dn->dn_owner = id;
dn->dn_next = dnentry->dn;
dnentry->dn = dn;
return 0;
}
/*
* When a process calls fcntl to attach a dnotify watch to a directory it ends
* up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be
* attached to the fsnotify_mark.
*/
int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
{
struct dnotify_mark_entry *new_dnentry, *dnentry;
struct fsnotify_mark_entry *new_entry, *entry;
struct dnotify_struct *dn;
struct inode *inode;
fl_owner_t id = current->files;
struct file *f;
int destroy = 0, error = 0;
__u32 mask;
/* we use these to tell if we need to kfree */
new_entry = NULL;
dn = NULL;
if (!dir_notify_enable) {
error = -EINVAL;
goto out_err;
}
/* a 0 mask means we are explicitly removing the watch */
if ((arg & ~DN_MULTISHOT) == 0) {
dnotify_flush(filp, id);
error = 0;
goto out_err;
}
/* dnotify only works on directories */
inode = filp->f_path.dentry->d_inode;
if (!S_ISDIR(inode->i_mode)) {
error = -ENOTDIR;
goto out_err;
}
/* expect most fcntl to add new rather than augment old */
dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
if (!dn) {
error = -ENOMEM;
goto out_err;
}
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
if (!new_dnentry) {
error = -ENOMEM;
goto out_err;
}
/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
mask = convert_arg(arg);
/* set up the new_entry and new_dnentry */
new_entry = &new_dnentry->fsn_entry;
fsnotify_init_mark(new_entry, dnotify_free_mark);
new_entry->mask = mask;
new_dnentry->dn = NULL;
/* this is needed to prevent the fcntl/close race described below */
mutex_lock(&dnotify_mark_mutex);
/* add the new_entry or find an old one. */
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(dnotify_group, inode);
spin_unlock(&inode->i_lock);
if (entry) {
dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
spin_lock(&entry->lock);
} else {
fsnotify_add_mark(new_entry, dnotify_group, inode);
spin_lock(&new_entry->lock);
entry = new_entry;
dnentry = new_dnentry;
/* we used new_entry, so don't free it */
new_entry = NULL;
}
rcu_read_lock();
f = fcheck(fd);
rcu_read_unlock();
/* if (f != filp) means that we lost a race and another task/thread
* actually closed the fd we are still playing with before we grabbed
* the dnotify_mark_mutex and entry->lock. Since closing the fd is the
* only time we clean up the mark entries we need to get our mark off
* the list. */
if (f != filp) {
/* if we added ourselves, shoot ourselves, it's possible that
* the flush actually did shoot this entry. That's fine too
* since multiple calls to destroy_mark is perfectly safe, if
* we found a dnentry already attached to the inode, just sod
* off silently as the flush at close time dealt with it.
*/
if (dnentry == new_dnentry)
destroy = 1;
goto out;
}
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error) {
/* if we added, we must shoot */
if (dnentry == new_dnentry)
destroy = 1;
goto out;
}
error = attach_dn(dn, dnentry, id, fd, filp, mask);
/* !error means that we attached the dn to the dnentry, so don't free it */
if (!error)
dn = NULL;
/* -EEXIST means that we didn't add this new dn and used an old one.
* that isn't an error (and the unused dn should be freed) */
else if (error == -EEXIST)
error = 0;
dnotify_recalc_inode_mask(entry);
out:
spin_unlock(&entry->lock);
if (destroy)
fsnotify_destroy_mark_by_entry(entry);
fsnotify_recalc_group_mask(dnotify_group);
mutex_unlock(&dnotify_mark_mutex);
fsnotify_put_mark(entry);
out_err:
if (new_entry)
fsnotify_put_mark(new_entry);
if (dn)
kmem_cache_free(dnotify_struct_cache, dn);
return error;
}
static int __init dnotify_init(void)
{
dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
0, &dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
return 0;
}
module_init(dnotify_init)

188
kernel/fs/notify/fsnotify.c Normal file
View File

@@ -0,0 +1,188 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/dcache.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/srcu.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
/*
* Clear all of the marks on an inode when it is being evicted from core
*/
void __fsnotify_inode_delete(struct inode *inode)
{
fsnotify_clear_marks_by_inode(inode);
}
EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
* parent cares. Thus when an event happens on a child it can quickly tell if
* if there is a need to find a parent and send the event to the parent.
*/
void __fsnotify_update_child_dentry_flags(struct inode *inode)
{
struct dentry *alias;
int watched;
if (!S_ISDIR(inode->i_mode))
return;
/* determine if the children should tell inode about their events */
watched = fsnotify_inode_watches_children(inode);
spin_lock(&dcache_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
spin_lock(&child->d_lock);
if (watched)
child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
}
spin_unlock(&dcache_lock);
}
/* Notify this dentry's parent about a child's events. */
void __fsnotify_parent(struct dentry *dentry, __u32 mask)
{
struct dentry *parent;
struct inode *p_inode;
bool send = false;
bool should_update_children = false;
if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
return;
spin_lock(&dentry->d_lock);
parent = dentry->d_parent;
p_inode = parent->d_inode;
if (fsnotify_inode_watches_children(p_inode)) {
if (p_inode->i_fsnotify_mask & mask) {
dget(parent);
send = true;
}
} else {
/*
* The parent doesn't care about events on it's children but
* at least one child thought it did. We need to run all the
* children and update their d_flags to let them know p_inode
* doesn't care about them any more.
*/
dget(parent);
should_update_children = true;
}
spin_unlock(&dentry->d_lock);
if (send) {
/* we are notifying a parent so come up with the new mask which
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
dentry->d_name.name, 0);
dput(parent);
}
if (unlikely(should_update_children)) {
__fsnotify_update_child_dentry_flags(p_inode);
dput(parent);
}
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
/*
* This is the main call to fsnotify. The VFS calls into hook specific functions
* in linux/fsnotify.h. Those functions then in turn call here. Here will call
* out to all of the registered fsnotify_group. Those groups can then use the
* notification event in whatever means they feel necessary.
*/
void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
{
struct fsnotify_group *group;
struct fsnotify_event *event = NULL;
int idx;
/* global tests shouldn't care about events on child only the specific event */
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
if (list_empty(&fsnotify_groups))
return;
if (!(test_mask & fsnotify_mask))
return;
if (!(test_mask & to_tell->i_fsnotify_mask))
return;
/*
* SRCU!! the groups list is very very much read only and the path is
* very hot. The VAST majority of events are not going to need to do
* anything other than walk the list so it's crazy to pre-allocate.
*/
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
if (test_mask & group->mask) {
if (!group->ops->should_send_event(group, to_tell, mask))
continue;
if (!event) {
event = fsnotify_create_event(to_tell, mask, data,
data_is, file_name, cookie,
GFP_KERNEL);
/* shit, we OOM'd and now we can't tell, maybe
* someday someone else will want to do something
* here */
if (!event)
break;
}
group->ops->handle_event(group, event);
}
}
srcu_read_unlock(&fsnotify_grp_srcu, idx);
/*
* fsnotify_create_event() took a reference so the event can't be cleaned
* up while we are still trying to add it to lists, drop that one.
*/
if (event)
fsnotify_put_event(event);
}
EXPORT_SYMBOL_GPL(fsnotify);
static __init int fsnotify_init(void)
{
return init_srcu_struct(&fsnotify_grp_srcu);
}
subsys_initcall(fsnotify_init);

View File

@@ -0,0 +1,34 @@
#ifndef __FS_NOTIFY_FSNOTIFY_H_
#define __FS_NOTIFY_FSNOTIFY_H_
#include <linux/list.h>
#include <linux/fsnotify.h>
#include <linux/srcu.h>
#include <linux/types.h>
/* protects reads of fsnotify_groups */
extern struct srcu_struct fsnotify_grp_srcu;
/* all groups which receive fsnotify events */
extern struct list_head fsnotify_groups;
/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
extern __u32 fsnotify_mask;
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* final kfree of a group */
extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
/* run the list of all marks associated with inode and flag them to be freed */
extern void fsnotify_clear_marks_by_inode(struct inode *inode);
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
* about events that happen to its children.
*/
extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
/* allocate and destroy and event holder to attach events to notification/access queues */
extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
#endif /* __FS_NOTIFY_FSNOTIFY_H_ */

254
kernel/fs/notify/group.c Normal file
View File

@@ -0,0 +1,254 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/rculist.h>
#include <linux/wait.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include <asm/atomic.h>
/* protects writes to fsnotify_groups and fsnotify_mask */
static DEFINE_MUTEX(fsnotify_grp_mutex);
/* protects reads while running the fsnotify_groups list */
struct srcu_struct fsnotify_grp_srcu;
/* all groups registered to receive filesystem notifications */
LIST_HEAD(fsnotify_groups);
/* bitwise OR of all events (FS_*) interesting to some group on this system */
__u32 fsnotify_mask;
/*
* When a new group registers or changes it's set of interesting events
* this function updates the fsnotify_mask to contain all interesting events
*/
void fsnotify_recalc_global_mask(void)
{
struct fsnotify_group *group;
__u32 mask = 0;
int idx;
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
mask |= group->mask;
srcu_read_unlock(&fsnotify_grp_srcu, idx);
fsnotify_mask = mask;
}
/*
* Update the group->mask by running all of the marks associated with this
* group and finding the bitwise | of all of the mark->mask. If we change
* the group->mask we need to update the global mask of events interesting
* to the system.
*/
void fsnotify_recalc_group_mask(struct fsnotify_group *group)
{
__u32 mask = 0;
__u32 old_mask = group->mask;
struct fsnotify_mark_entry *entry;
spin_lock(&group->mark_lock);
list_for_each_entry(entry, &group->mark_entries, g_list)
mask |= entry->mask;
spin_unlock(&group->mark_lock);
group->mask = mask;
if (old_mask != mask)
fsnotify_recalc_global_mask();
}
/*
* Take a reference to a group so things found under the fsnotify_grp_mutex
* can't get freed under us
*/
static void fsnotify_get_group(struct fsnotify_group *group)
{
atomic_inc(&group->refcnt);
}
/*
* Final freeing of a group
*/
void fsnotify_final_destroy_group(struct fsnotify_group *group)
{
/* clear the notification queue of all events */
fsnotify_flush_notify(group);
if (group->ops->free_group_priv)
group->ops->free_group_priv(group);
kfree(group);
}
/*
* Trying to get rid of a group. We need to first get rid of any outstanding
* allocations and then free the group. Remember that fsnotify_clear_marks_by_group
* could miss marks that are being freed by inode and those marks could still
* hold a reference to this group (via group->num_marks) If we get into that
* situtation, the fsnotify_final_destroy_group will get called when that final
* mark is freed.
*/
static void fsnotify_destroy_group(struct fsnotify_group *group)
{
/* clear all inode mark entries for this group */
fsnotify_clear_marks_by_group(group);
/* past the point of no return, matches the initial value of 1 */
if (atomic_dec_and_test(&group->num_marks))
fsnotify_final_destroy_group(group);
}
/*
* Remove this group from the global list of groups that will get events
* this can be done even if there are still references and things still using
* this group. This just stops the group from getting new events.
*/
static void __fsnotify_evict_group(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
if (group->on_group_list)
list_del_rcu(&group->group_list);
group->on_group_list = 0;
}
/*
* Called when a group is no longer interested in getting events. This can be
* used if a group is misbehaving or if for some reason a group should no longer
* get any filesystem events.
*/
void fsnotify_evict_group(struct fsnotify_group *group)
{
mutex_lock(&fsnotify_grp_mutex);
__fsnotify_evict_group(group);
mutex_unlock(&fsnotify_grp_mutex);
}
/*
* Drop a reference to a group. Free it if it's through.
*/
void fsnotify_put_group(struct fsnotify_group *group)
{
if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
return;
/*
* OK, now we know that there's no other users *and* we hold mutex,
* so no new references will appear
*/
__fsnotify_evict_group(group);
/*
* now it's off the list, so the only thing we might care about is
* srcu access....
*/
mutex_unlock(&fsnotify_grp_mutex);
synchronize_srcu(&fsnotify_grp_srcu);
/* and now it is really dead. _Nothing_ could be seeing it */
fsnotify_recalc_global_mask();
fsnotify_destroy_group(group);
}
/*
* Simply run the fsnotify_groups list and find a group which matches
* the given parameters. If a group is found we take a reference to that
* group.
*/
static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
{
struct fsnotify_group *group_iter;
struct fsnotify_group *group = NULL;
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
if (group_iter->group_num == group_num) {
if ((group_iter->mask == mask) &&
(group_iter->ops == ops)) {
fsnotify_get_group(group_iter);
group = group_iter;
} else
group = ERR_PTR(-EEXIST);
}
}
return group;
}
/*
* Either finds an existing group which matches the group_num, mask, and ops or
* creates a new group and adds it to the global group list. In either case we
* take a reference for the group returned.
*/
struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
{
struct fsnotify_group *group, *tgroup;
/* very low use, simpler locking if we just always alloc */
group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
atomic_set(&group->refcnt, 1);
group->on_group_list = 0;
group->group_num = group_num;
group->mask = mask;
mutex_init(&group->notification_mutex);
INIT_LIST_HEAD(&group->notification_list);
init_waitqueue_head(&group->notification_waitq);
group->q_len = 0;
group->max_events = UINT_MAX;
spin_lock_init(&group->mark_lock);
atomic_set(&group->num_marks, 0);
INIT_LIST_HEAD(&group->mark_entries);
group->ops = ops;
mutex_lock(&fsnotify_grp_mutex);
tgroup = fsnotify_find_group(group_num, mask, ops);
if (tgroup) {
/* group already exists */
mutex_unlock(&fsnotify_grp_mutex);
/* destroy the new one we made */
fsnotify_put_group(group);
return tgroup;
}
/* group not found, add a new one */
list_add_rcu(&group->group_list, &fsnotify_groups);
group->on_group_list = 1;
/* being on the fsnotify_groups list holds one num_marks */
atomic_inc(&group->num_marks);
mutex_unlock(&fsnotify_grp_mutex);
if (mask)
fsnotify_recalc_global_mask();
return group;
}

View File

@@ -0,0 +1,426 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* fsnotify inode mark locking/lifetime/and refcnting
*
* REFCNT:
* The mark->refcnt tells how many "things" in the kernel currently are
* referencing this object. The object typically will live inside the kernel
* with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
* which can find this object holding the appropriete locks, can take a reference
* and the object itself is guarenteed to survive until the reference is dropped.
*
* LOCKING:
* There are 3 spinlocks involved with fsnotify inode marks and they MUST
* be taken in order as follows:
*
* entry->lock
* group->mark_lock
* inode->i_lock
*
* entry->lock protects 2 things, entry->group and entry->inode. You must hold
* that lock to dereference either of these things (they could be NULL even with
* the lock)
*
* group->mark_lock protects the mark_entries list anchored inside a given group
* and each entry is hooked via the g_list. It also sorta protects the
* free_g_list, which when used is anchored by a private list on the stack of the
* task which held the group->mark_lock.
*
* inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
* given inode and each entry is hooked via the i_list. (and sorta the
* free_i_list)
*
*
* LIFETIME:
* Inode marks survive between when they are added to an inode and when their
* refcnt==0.
*
* The inode mark can be cleared for a number of different reasons including:
* - The inode is unlinked for the last time. (fsnotify_inode_remove)
* - The inode is being evicted from cache. (fsnotify_inode_delete)
* - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
* - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry)
* - The fsnotify_group associated with the mark is going away and all such marks
* need to be cleaned up. (fsnotify_clear_marks_by_group)
*
* Worst case we are given an inode and need to clean up all the marks on that
* inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each
* mark on the list we take a reference (so the mark can't disappear under us).
* We remove that mark form the inode's list of marks and we add this mark to a
* private list anchored on the stack using i_free_list; At this point we no
* longer fear anything finding the mark using the inode's list of marks.
*
* We can safely and locklessly run the private list on the stack of everything
* we just unattached from the original inode. For each mark on the private list
* we grab the mark-> and can thus dereference mark->group and mark->inode. If
* we see the group and inode are not NULL we take those locks. Now holding all
* 3 locks we can completely remove the mark from other tasks finding it in the
* future. Remember, 10 things might already be referencing this mark, but they
* better be holding a ref. We drop our reference we took before we unhooked it
* from the inode. When the ref hits 0 we can free the mark.
*
* Very similarly for freeing by group, except we use free_g_list.
*
* This has the very interesting property of being able to run concurrently with
* any (or all) other directions.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/writeback.h> /* for inode_lock */
#include <asm/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
{
atomic_inc(&entry->refcnt);
}
void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
{
if (atomic_dec_and_test(&entry->refcnt))
entry->free_mark(entry);
}
/*
* Recalculate the mask of events relevant to a given inode locked.
*/
static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
{
struct fsnotify_mark_entry *entry;
struct hlist_node *pos;
__u32 new_mask = 0;
assert_spin_locked(&inode->i_lock);
hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
new_mask |= entry->mask;
inode->i_fsnotify_mask = new_mask;
}
/*
* Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
* any notifier is interested in hearing for this inode.
*/
void fsnotify_recalc_inode_mask(struct inode *inode)
{
spin_lock(&inode->i_lock);
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
__fsnotify_update_child_dentry_flags(inode);
}
/*
* Any time a mark is getting freed we end up here.
* The caller had better be holding a reference to this mark so we don't actually
* do the final put under the entry->lock
*/
void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
{
struct fsnotify_group *group;
struct inode *inode;
spin_lock(&entry->lock);
group = entry->group;
inode = entry->inode;
BUG_ON(group && !inode);
BUG_ON(!group && inode);
/* if !group something else already marked this to die */
if (!group) {
spin_unlock(&entry->lock);
return;
}
/* 1 from caller and 1 for being on i_list/g_list */
BUG_ON(atomic_read(&entry->refcnt) < 2);
spin_lock(&group->mark_lock);
spin_lock(&inode->i_lock);
hlist_del_init(&entry->i_list);
entry->inode = NULL;
list_del_init(&entry->g_list);
entry->group = NULL;
fsnotify_put_mark(entry); /* for i_list and g_list */
/*
* this mark is now off the inode->i_fsnotify_mark_entries list and we
* hold the inode->i_lock, so this is the perfect time to update the
* inode->i_fsnotify_mask
*/
fsnotify_recalc_inode_mask_locked(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&group->mark_lock);
spin_unlock(&entry->lock);
/*
* Some groups like to know that marks are being freed. This is a
* callback to the group function to let it know that this entry
* is being freed.
*/
if (group->ops->freeing_mark)
group->ops->freeing_mark(entry, group);
/*
* __fsnotify_update_child_dentry_flags(inode);
*
* I really want to call that, but we can't, we have no idea if the inode
* still exists the second we drop the entry->lock.
*
* The next time an event arrive to this inode from one of it's children
* __fsnotify_parent will see that the inode doesn't care about it's
* children and will update all of these flags then. So really this
* is just a lazy update (and could be a perf win...)
*/
iput(inode);
/*
* it's possible that this group tried to destroy itself, but this
* this mark was simultaneously being freed by inode. If that's the
* case, we finish freeing the group here.
*/
if (unlikely(atomic_dec_and_test(&group->num_marks)))
fsnotify_final_destroy_group(group);
}
/*
* Given a group, destroy all of the marks associated with that group.
*/
void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
{
struct fsnotify_mark_entry *lentry, *entry;
LIST_HEAD(free_list);
spin_lock(&group->mark_lock);
list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
list_add(&entry->free_g_list, &free_list);
list_del_init(&entry->g_list);
fsnotify_get_mark(entry);
}
spin_unlock(&group->mark_lock);
list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
fsnotify_destroy_mark_by_entry(entry);
fsnotify_put_mark(entry);
}
}
/*
* Given an inode, destroy all of the marks associated with that inode.
*/
void fsnotify_clear_marks_by_inode(struct inode *inode)
{
struct fsnotify_mark_entry *entry, *lentry;
struct hlist_node *pos, *n;
LIST_HEAD(free_list);
spin_lock(&inode->i_lock);
hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
list_add(&entry->free_i_list, &free_list);
hlist_del_init(&entry->i_list);
fsnotify_get_mark(entry);
}
spin_unlock(&inode->i_lock);
list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
fsnotify_destroy_mark_by_entry(entry);
fsnotify_put_mark(entry);
}
}
/*
* given a group and inode, find the mark associated with that combination.
* if found take a reference to that mark and return it, else return NULL
*/
struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
struct inode *inode)
{
struct fsnotify_mark_entry *entry;
struct hlist_node *pos;
assert_spin_locked(&inode->i_lock);
hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
if (entry->group == group) {
fsnotify_get_mark(entry);
return entry;
}
}
return NULL;
}
/*
* Nothing fancy, just initialize lists and locks and counters.
*/
void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
void (*free_mark)(struct fsnotify_mark_entry *entry))
{
spin_lock_init(&entry->lock);
atomic_set(&entry->refcnt, 1);
INIT_HLIST_NODE(&entry->i_list);
entry->group = NULL;
entry->mask = 0;
entry->inode = NULL;
entry->free_mark = free_mark;
}
/*
* Attach an initialized mark entry to a given group and inode.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group and for which inodes.
*/
int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
struct fsnotify_group *group, struct inode *inode)
{
struct fsnotify_mark_entry *lentry;
int ret = 0;
inode = igrab(inode);
if (unlikely(!inode))
return -EINVAL;
/*
* LOCKING ORDER!!!!
* entry->lock
* group->mark_lock
* inode->i_lock
*/
spin_lock(&entry->lock);
spin_lock(&group->mark_lock);
spin_lock(&inode->i_lock);
lentry = fsnotify_find_mark_entry(group, inode);
if (!lentry) {
entry->group = group;
entry->inode = inode;
hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
list_add(&entry->g_list, &group->mark_entries);
fsnotify_get_mark(entry); /* for i_list and g_list */
atomic_inc(&group->num_marks);
fsnotify_recalc_inode_mask_locked(inode);
}
spin_unlock(&inode->i_lock);
spin_unlock(&group->mark_lock);
spin_unlock(&entry->lock);
if (lentry) {
ret = -EEXIST;
iput(inode);
fsnotify_put_mark(lentry);
} else {
__fsnotify_update_child_dentry_flags(inode);
}
return ret;
}
/**
* fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes)
*
* Called with inode_lock held, protecting the unmounting super block's list
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
void fsnotify_unmount_inodes(struct list_head *list)
{
struct inode *inode, *next_i, *need_iput = NULL;
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inode *need_iput_tmp;
/*
* We cannot __iget() an inode in state I_CLEAR, I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
continue;
/*
* If i_count is zero, the inode cannot have any watches and
* doing an __iget/iput with MS_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
if (!atomic_read(&inode->i_count))
continue;
need_iput_tmp = need_iput;
need_iput = NULL;
/* In case fsnotify_inode_delete() drops a reference. */
if (inode != need_iput_tmp)
__iget(inode);
else
need_iput_tmp = NULL;
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) &&
atomic_read(&next_i->i_count) &&
!(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
__iget(next_i);
need_iput = next_i;
}
/*
* We can safely drop inode_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
spin_unlock(&inode_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
/* for each watch, send FS_UNMOUNT and then remove it */
fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
fsnotify_inode_delete(inode);
iput(inode);
spin_lock(&inode_lock);
}
}

View File

@@ -0,0 +1,31 @@
config INOTIFY
bool "Inotify file change notification support"
default n
---help---
Say Y here to enable legacy in kernel inotify support. Inotify is a
file change notification system. It is a replacement for dnotify.
This option only provides the legacy inotify in kernel API. There
are no in tree kernel users of this interface since it is deprecated.
You only need this if you are loading an out of tree kernel module
that uses inotify.
For more information, see <file:Documentation/filesystems/inotify.txt>
If unsure, say N.
config INOTIFY_USER
bool "Inotify support for userspace"
select FSNOTIFY
default y
---help---
Say Y here to enable inotify support for userspace, including the
associated system calls. Inotify allows monitoring of both files and
directories via a single open fd. Events are read from the file
descriptor, which is also select()- and poll()-able.
Inotify fixes numerous shortcomings in dnotify and introduces several
new features including multiple file events, one-shot support, and
unmount notification.
For more information, see <file:Documentation/filesystems/inotify.txt>
If unsure, say Y.

View File

@@ -0,0 +1,2 @@
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o

View File

@@ -0,0 +1,933 @@
/*
* fs/inotify.c - inode-based file event notifications
*
* Authors:
* John McCutchan <ttb@tentacle.dhs.org>
* Robert Love <rml@novell.com>
*
* Kernel API added by: Amy Griffis <amy.griffis@hp.com>
*
* Copyright (C) 2005 John McCutchan
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/writeback.h>
#include <linux/inotify.h>
#include <linux/fsnotify_backend.h>
static atomic_t inotify_cookie;
/*
* Lock ordering:
*
* dentry->d_lock (used to keep d_move() away from dentry->d_parent)
* iprune_mutex (synchronize shrink_icache_memory())
* inode_lock (protects the super_block->s_inodes list)
* inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
* inotify_handle->mutex (protects inotify_handle and watches->h_list)
*
* The inode->inotify_mutex and inotify_handle->mutex and held during execution
* of a caller's event handler. Thus, the caller must not hold any locks
* taken in their event handler while calling any of the published inotify
* interfaces.
*/
/*
* Lifetimes of the three main data structures--inotify_handle, inode, and
* inotify_watch--are managed by reference count.
*
* inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
* Additional references can bump the count via get_inotify_handle() and drop
* the count via put_inotify_handle().
*
* inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
* to remove_watch_no_event(). Additional references can bump the count via
* get_inotify_watch() and drop the count via put_inotify_watch(). The caller
* is reponsible for the final put after receiving IN_IGNORED, or when using
* IN_ONESHOT after receiving the first event. Inotify does the final put if
* inotify_destroy() is called.
*
* inode: Pinned so long as the inode is associated with a watch, from
* inotify_add_watch() to the final put_inotify_watch().
*/
/*
* struct inotify_handle - represents an inotify instance
*
* This structure is protected by the mutex 'mutex'.
*/
struct inotify_handle {
struct idr idr; /* idr mapping wd -> watch */
struct mutex mutex; /* protects this bad boy */
struct list_head watches; /* list of watches */
atomic_t count; /* reference count */
u32 last_wd; /* the last wd allocated */
const struct inotify_operations *in_ops; /* inotify caller operations */
};
static inline void get_inotify_handle(struct inotify_handle *ih)
{
atomic_inc(&ih->count);
}
static inline void put_inotify_handle(struct inotify_handle *ih)
{
if (atomic_dec_and_test(&ih->count)) {
idr_destroy(&ih->idr);
kfree(ih);
}
}
/**
* get_inotify_watch - grab a reference to an inotify_watch
* @watch: watch to grab
*/
void get_inotify_watch(struct inotify_watch *watch)
{
atomic_inc(&watch->count);
}
EXPORT_SYMBOL_GPL(get_inotify_watch);
int pin_inotify_watch(struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
spin_lock(&sb_lock);
if (sb->s_count >= S_BIAS) {
atomic_inc(&sb->s_active);
spin_unlock(&sb_lock);
atomic_inc(&watch->count);
return 1;
}
spin_unlock(&sb_lock);
return 0;
}
/**
* put_inotify_watch - decrements the ref count on a given watch. cleans up
* watch references if the count reaches zero. inotify_watch is freed by
* inotify callers via the destroy_watch() op.
* @watch: watch to release
*/
void put_inotify_watch(struct inotify_watch *watch)
{
if (atomic_dec_and_test(&watch->count)) {
struct inotify_handle *ih = watch->ih;
iput(watch->inode);
ih->in_ops->destroy_watch(watch);
put_inotify_handle(ih);
}
}
EXPORT_SYMBOL_GPL(put_inotify_watch);
void unpin_inotify_watch(struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
put_inotify_watch(watch);
deactivate_super(sb);
}
/*
* inotify_handle_get_wd - returns the next WD for use by the given handle
*
* Callers must hold ih->mutex. This function can sleep.
*/
static int inotify_handle_get_wd(struct inotify_handle *ih,
struct inotify_watch *watch)
{
int ret;
do {
if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
return -ENOSPC;
ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
} while (ret == -EAGAIN);
if (likely(!ret))
ih->last_wd = watch->wd;
return ret;
}
/*
* inotify_inode_watched - returns nonzero if there are watches on this inode
* and zero otherwise. We call this lockless, we do not care if we race.
*/
static inline int inotify_inode_watched(struct inode *inode)
{
return !list_empty(&inode->inotify_watches);
}
/*
* Get child dentry flag into synch with parent inode.
* Flag should always be clear for negative dentrys.
*/
static void set_dentry_child_flags(struct inode *inode, int watched)
{
struct dentry *alias;
spin_lock(&dcache_lock);
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
spin_lock(&child->d_lock);
if (watched)
child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
}
spin_unlock(&dcache_lock);
}
/*
* inotify_find_handle - find the watch associated with the given inode and
* handle
*
* Callers must hold inode->inotify_mutex.
*/
static struct inotify_watch *inode_find_handle(struct inode *inode,
struct inotify_handle *ih)
{
struct inotify_watch *watch;
list_for_each_entry(watch, &inode->inotify_watches, i_list) {
if (watch->ih == ih)
return watch;
}
return NULL;
}
/*
* remove_watch_no_event - remove watch without the IN_IGNORED event.
*
* Callers must hold both inode->inotify_mutex and ih->mutex.
*/
static void remove_watch_no_event(struct inotify_watch *watch,
struct inotify_handle *ih)
{
list_del(&watch->i_list);
list_del(&watch->h_list);
if (!inotify_inode_watched(watch->inode))
set_dentry_child_flags(watch->inode, 0);
idr_remove(&ih->idr, watch->wd);
}
/**
* inotify_remove_watch_locked - Remove a watch from both the handle and the
* inode. Sends the IN_IGNORED event signifying that the inode is no longer
* watched. May be invoked from a caller's event handler.
* @ih: inotify handle associated with watch
* @watch: watch to remove
*
* Callers must hold both inode->inotify_mutex and ih->mutex.
*/
void inotify_remove_watch_locked(struct inotify_handle *ih,
struct inotify_watch *watch)
{
remove_watch_no_event(watch, ih);
ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
}
EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
/* Kernel API for producing events */
/*
* inotify_d_instantiate - instantiate dcache entry for inode
*/
void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
{
struct dentry *parent;
if (!inode)
return;
spin_lock(&entry->d_lock);
parent = entry->d_parent;
if (parent->d_inode && inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&entry->d_lock);
}
/*
* inotify_d_move - dcache entry has been moved
*/
void inotify_d_move(struct dentry *entry)
{
struct dentry *parent;
parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
}
/**
* inotify_inode_queue_event - queue an event to all watches on this inode
* @inode: inode event is originating from
* @mask: event mask describing this event
* @cookie: cookie for synchronization, or zero
* @name: filename, if any
* @n_inode: inode associated with name
*/
void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
const char *name, struct inode *n_inode)
{
struct inotify_watch *watch, *next;
if (!inotify_inode_watched(inode))
return;
mutex_lock(&inode->inotify_mutex);
list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
u32 watch_mask = watch->mask;
if (watch_mask & mask) {
struct inotify_handle *ih= watch->ih;
mutex_lock(&ih->mutex);
if (watch_mask & IN_ONESHOT)
remove_watch_no_event(watch, ih);
ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
name, n_inode);
mutex_unlock(&ih->mutex);
}
}
mutex_unlock(&inode->inotify_mutex);
}
EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
/**
* inotify_dentry_parent_queue_event - queue an event to a dentry's parent
* @dentry: the dentry in question, we queue against this dentry's parent
* @mask: event mask describing this event
* @cookie: cookie for synchronization, or zero
* @name: filename, if any
*/
void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
u32 cookie, const char *name)
{
struct dentry *parent;
struct inode *inode;
if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
return;
spin_lock(&dentry->d_lock);
parent = dentry->d_parent;
inode = parent->d_inode;
if (inotify_inode_watched(inode)) {
dget(parent);
spin_unlock(&dentry->d_lock);
inotify_inode_queue_event(inode, mask, cookie, name,
dentry->d_inode);
dput(parent);
} else
spin_unlock(&dentry->d_lock);
}
EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
/**
* inotify_get_cookie - return a unique cookie for use in synchronizing events.
*/
u32 inotify_get_cookie(void)
{
return atomic_inc_return(&inotify_cookie);
}
EXPORT_SYMBOL_GPL(inotify_get_cookie);
/**
* inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
* @list: list of inodes being unmounted (sb->s_inodes)
*
* Called with inode_lock held, protecting the unmounting super block's list
* of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
* We temporarily drop inode_lock, however, and CAN block.
*/
void inotify_unmount_inodes(struct list_head *list)
{
struct inode *inode, *next_i, *need_iput = NULL;
list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
struct inotify_watch *watch, *next_w;
struct inode *need_iput_tmp;
struct list_head *watches;
/*
* We cannot __iget() an inode in state I_CLEAR, I_FREEING,
* I_WILL_FREE, or I_NEW which is fine because by that point
* the inode cannot have any associated watches.
*/
if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
continue;
/*
* If i_count is zero, the inode cannot have any watches and
* doing an __iget/iput with MS_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
if (!atomic_read(&inode->i_count))
continue;
need_iput_tmp = need_iput;
need_iput = NULL;
/* In case inotify_remove_watch_locked() drops a reference. */
if (inode != need_iput_tmp)
__iget(inode);
else
need_iput_tmp = NULL;
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) &&
atomic_read(&next_i->i_count) &&
!(next_i->i_state & (I_CLEAR | I_FREEING |
I_WILL_FREE))) {
__iget(next_i);
need_iput = next_i;
}
/*
* We can safely drop inode_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun. Finally,
* iprune_mutex keeps shrink_icache_memory() away.
*/
spin_unlock(&inode_lock);
if (need_iput_tmp)
iput(need_iput_tmp);
/* for each watch, send IN_UNMOUNT and then remove it */
mutex_lock(&inode->inotify_mutex);
watches = &inode->inotify_watches;
list_for_each_entry_safe(watch, next_w, watches, i_list) {
struct inotify_handle *ih= watch->ih;
get_inotify_watch(watch);
mutex_lock(&ih->mutex);
ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
NULL, NULL);
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
put_inotify_watch(watch);
}
mutex_unlock(&inode->inotify_mutex);
iput(inode);
spin_lock(&inode_lock);
}
}
EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
/**
* inotify_inode_is_dead - an inode has been deleted, cleanup any watches
* @inode: inode that is about to be removed
*/
void inotify_inode_is_dead(struct inode *inode)
{
struct inotify_watch *watch, *next;
mutex_lock(&inode->inotify_mutex);
list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
struct inotify_handle *ih = watch->ih;
mutex_lock(&ih->mutex);
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
}
mutex_unlock(&inode->inotify_mutex);
}
EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
/* Kernel Consumer API */
/**
* inotify_init - allocate and initialize an inotify instance
* @ops: caller's inotify operations
*/
struct inotify_handle *inotify_init(const struct inotify_operations *ops)
{
struct inotify_handle *ih;
ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
if (unlikely(!ih))
return ERR_PTR(-ENOMEM);
idr_init(&ih->idr);
INIT_LIST_HEAD(&ih->watches);
mutex_init(&ih->mutex);
ih->last_wd = 0;
ih->in_ops = ops;
atomic_set(&ih->count, 0);
get_inotify_handle(ih);
return ih;
}
EXPORT_SYMBOL_GPL(inotify_init);
/**
* inotify_init_watch - initialize an inotify watch
* @watch: watch to initialize
*/
void inotify_init_watch(struct inotify_watch *watch)
{
INIT_LIST_HEAD(&watch->h_list);
INIT_LIST_HEAD(&watch->i_list);
atomic_set(&watch->count, 0);
get_inotify_watch(watch); /* initial get */
}
EXPORT_SYMBOL_GPL(inotify_init_watch);
/*
* Watch removals suck violently. To kick the watch out we need (in this
* order) inode->inotify_mutex and ih->mutex. That's fine if we have
* a hold on inode; however, for all other cases we need to make damn sure
* we don't race with umount. We can *NOT* just grab a reference to a
* watch - inotify_unmount_inodes() will happily sail past it and we'll end
* with reference to inode potentially outliving its superblock. Ideally
* we just want to grab an active reference to superblock if we can; that
* will make sure we won't go into inotify_umount_inodes() until we are
* done. Cleanup is just deactivate_super(). However, that leaves a messy
* case - what if we *are* racing with umount() and active references to
* superblock can't be acquired anymore? We can bump ->s_count, grab
* ->s_umount, which will almost certainly wait until the superblock is shut
* down and the watch in question is pining for fjords. That's fine, but
* there is a problem - we might have hit the window between ->s_active
* getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
* is past the point of no return and is heading for shutdown) and the
* moment when deactivate_super() acquires ->s_umount. We could just do
* drop_super() yield() and retry, but that's rather antisocial and this
* stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
* found that we'd got there first (i.e. that ->s_root is non-NULL) we know
* that we won't race with inotify_umount_inodes(). So we could grab a
* reference to watch and do the rest as above, just with drop_super() instead
* of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
* could grab ->s_umount. So the watch could've been gone already.
*
* That still can be dealt with - we need to save watch->wd, do idr_find()
* and compare its result with our pointer. If they match, we either have
* the damn thing still alive or we'd lost not one but two races at once,
* the watch had been killed and a new one got created with the same ->wd
* at the same address. That couldn't have happened in inotify_destroy(),
* but inotify_rm_wd() could run into that. Still, "new one got created"
* is not a problem - we have every right to kill it or leave it alone,
* whatever's more convenient.
*
* So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
* "grab it and kill it" check. If it's been our original watch, we are
* fine, if it's a newcomer - nevermind, just pretend that we'd won the
* race and kill the fscker anyway; we are safe since we know that its
* superblock won't be going away.
*
* And yes, this is far beyond mere "not very pretty"; so's the entire
* concept of inotify to start with.
*/
/**
* pin_to_kill - pin the watch down for removal
* @ih: inotify handle
* @watch: watch to kill
*
* Called with ih->mutex held, drops it. Possible return values:
* 0 - nothing to do, it has died
* 1 - remove it, drop the reference and deactivate_super()
* 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
* that variant, since it involved a lot of PITA, but that's the best that
* could've been done.
*/
static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
{
struct super_block *sb = watch->inode->i_sb;
s32 wd = watch->wd;
spin_lock(&sb_lock);
if (sb->s_count >= S_BIAS) {
atomic_inc(&sb->s_active);
spin_unlock(&sb_lock);
get_inotify_watch(watch);
mutex_unlock(&ih->mutex);
return 1; /* the best outcome */
}
sb->s_count++;
spin_unlock(&sb_lock);
mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
down_read(&sb->s_umount);
if (likely(!sb->s_root)) {
/* fs is already shut down; the watch is dead */
drop_super(sb);
return 0;
}
/* raced with the final deactivate_super() */
mutex_lock(&ih->mutex);
if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
/* the watch is dead */
mutex_unlock(&ih->mutex);
drop_super(sb);
return 0;
}
/* still alive or freed and reused with the same sb and wd; kill */
get_inotify_watch(watch);
mutex_unlock(&ih->mutex);
return 2;
}
static void unpin_and_kill(struct inotify_watch *watch, int how)
{
struct super_block *sb = watch->inode->i_sb;
put_inotify_watch(watch);
switch (how) {
case 1:
deactivate_super(sb);
break;
case 2:
drop_super(sb);
}
}
/**
* inotify_destroy - clean up and destroy an inotify instance
* @ih: inotify handle
*/
void inotify_destroy(struct inotify_handle *ih)
{
/*
* Destroy all of the watches for this handle. Unfortunately, not very
* pretty. We cannot do a simple iteration over the list, because we
* do not know the inode until we iterate to the watch. But we need to
* hold inode->inotify_mutex before ih->mutex. The following works.
*
* AV: it had to become even uglier to start working ;-/
*/
while (1) {
struct inotify_watch *watch;
struct list_head *watches;
struct super_block *sb;
struct inode *inode;
int how;
mutex_lock(&ih->mutex);
watches = &ih->watches;
if (list_empty(watches)) {
mutex_unlock(&ih->mutex);
break;
}
watch = list_first_entry(watches, struct inotify_watch, h_list);
sb = watch->inode->i_sb;
how = pin_to_kill(ih, watch);
if (!how)
continue;
inode = watch->inode;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/* make sure we didn't race with another list removal */
if (likely(idr_find(&ih->idr, watch->wd))) {
remove_watch_no_event(watch, ih);
put_inotify_watch(watch);
}
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
unpin_and_kill(watch, how);
}
/* free this handle: the put matching the get in inotify_init() */
put_inotify_handle(ih);
}
EXPORT_SYMBOL_GPL(inotify_destroy);
/**
* inotify_find_watch - find an existing watch for an (ih,inode) pair
* @ih: inotify handle
* @inode: inode to watch
* @watchp: pointer to existing inotify_watch
*
* Caller must pin given inode (via nameidata).
*/
s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
struct inotify_watch **watchp)
{
struct inotify_watch *old;
int ret = -ENOENT;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
old = inode_find_handle(inode, ih);
if (unlikely(old)) {
get_inotify_watch(old); /* caller must put watch */
*watchp = old;
ret = old->wd;
}
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(inotify_find_watch);
/**
* inotify_find_update_watch - find and update the mask of an existing watch
* @ih: inotify handle
* @inode: inode's watch to update
* @mask: mask of events to watch
*
* Caller must pin given inode (via nameidata).
*/
s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
u32 mask)
{
struct inotify_watch *old;
int mask_add = 0;
int ret;
if (mask & IN_MASK_ADD)
mask_add = 1;
/* don't allow invalid bits: we don't want flags set */
mask &= IN_ALL_EVENTS | IN_ONESHOT;
if (unlikely(!mask))
return -EINVAL;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/*
* Handle the case of re-adding a watch on an (inode,ih) pair that we
* are already watching. We just update the mask and return its wd.
*/
old = inode_find_handle(inode, ih);
if (unlikely(!old)) {
ret = -ENOENT;
goto out;
}
if (mask_add)
old->mask |= mask;
else
old->mask = mask;
ret = old->wd;
out:
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(inotify_find_update_watch);
/**
* inotify_add_watch - add a watch to an inotify instance
* @ih: inotify handle
* @watch: caller allocated watch structure
* @inode: inode to watch
* @mask: mask of events to watch
*
* Caller must pin given inode (via nameidata).
* Caller must ensure it only calls inotify_add_watch() once per watch.
* Calls inotify_handle_get_wd() so may sleep.
*/
s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
struct inode *inode, u32 mask)
{
int ret = 0;
int newly_watched;
/* don't allow invalid bits: we don't want flags set */
mask &= IN_ALL_EVENTS | IN_ONESHOT;
if (unlikely(!mask))
return -EINVAL;
watch->mask = mask;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/* Initialize a new watch */
ret = inotify_handle_get_wd(ih, watch);
if (unlikely(ret))
goto out;
ret = watch->wd;
/* save a reference to handle and bump the count to make it official */
get_inotify_handle(ih);
watch->ih = ih;
/*
* Save a reference to the inode and bump the ref count to make it
* official. We hold a reference to nameidata, which makes this safe.
*/
watch->inode = igrab(inode);
/* Add the watch to the handle's and the inode's list */
newly_watched = !inotify_inode_watched(inode);
list_add(&watch->h_list, &ih->watches);
list_add(&watch->i_list, &inode->inotify_watches);
/*
* Set child flags _after_ adding the watch, so there is no race
* windows where newly instantiated children could miss their parent's
* watched flag.
*/
if (newly_watched)
set_dentry_child_flags(inode, 1);
out:
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(inotify_add_watch);
/**
* inotify_clone_watch - put the watch next to existing one
* @old: already installed watch
* @new: new watch
*
* Caller must hold the inotify_mutex of inode we are dealing with;
* it is expected to remove the old watch before unlocking the inode.
*/
s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
{
struct inotify_handle *ih = old->ih;
int ret = 0;
new->mask = old->mask;
new->ih = ih;
mutex_lock(&ih->mutex);
/* Initialize a new watch */
ret = inotify_handle_get_wd(ih, new);
if (unlikely(ret))
goto out;
ret = new->wd;
get_inotify_handle(ih);
new->inode = igrab(old->inode);
list_add(&new->h_list, &ih->watches);
list_add(&new->i_list, &old->inode->inotify_watches);
out:
mutex_unlock(&ih->mutex);
return ret;
}
void inotify_evict_watch(struct inotify_watch *watch)
{
get_inotify_watch(watch);
mutex_lock(&watch->ih->mutex);
inotify_remove_watch_locked(watch->ih, watch);
mutex_unlock(&watch->ih->mutex);
}
/**
* inotify_rm_wd - remove a watch from an inotify instance
* @ih: inotify handle
* @wd: watch descriptor to remove
*
* Can sleep.
*/
int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
{
struct inotify_watch *watch;
struct super_block *sb;
struct inode *inode;
int how;
mutex_lock(&ih->mutex);
watch = idr_find(&ih->idr, wd);
if (unlikely(!watch)) {
mutex_unlock(&ih->mutex);
return -EINVAL;
}
sb = watch->inode->i_sb;
how = pin_to_kill(ih, watch);
if (!how)
return 0;
inode = watch->inode;
mutex_lock(&inode->inotify_mutex);
mutex_lock(&ih->mutex);
/* make sure that we did not race */
if (likely(idr_find(&ih->idr, wd) == watch))
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
mutex_unlock(&inode->inotify_mutex);
unpin_and_kill(watch, how);
return 0;
}
EXPORT_SYMBOL_GPL(inotify_rm_wd);
/**
* inotify_rm_watch - remove a watch from an inotify instance
* @ih: inotify handle
* @watch: watch to remove
*
* Can sleep.
*/
int inotify_rm_watch(struct inotify_handle *ih,
struct inotify_watch *watch)
{
return inotify_rm_wd(ih, watch->wd);
}
EXPORT_SYMBOL_GPL(inotify_rm_watch);
/*
* inotify_setup - core initialization function
*/
static int __init inotify_setup(void)
{
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
BUILD_BUG_ON(IN_OPEN != FS_OPEN);
BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
BUILD_BUG_ON(IN_CREATE != FS_CREATE);
BUILD_BUG_ON(IN_DELETE != FS_DELETE);
BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
atomic_set(&inotify_cookie, 0);
return 0;
}
module_init(inotify_setup);

View File

@@ -0,0 +1,22 @@
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
#include <linux/slab.h> /* struct kmem_cache */
extern struct kmem_cache *event_priv_cachep;
struct inotify_event_private_data {
struct fsnotify_event_private_data fsnotify_event_priv_data;
int wd;
};
struct inotify_inode_mark_entry {
/* fsnotify_mark_entry MUST be the first thing */
struct fsnotify_mark_entry fsn_entry;
int wd;
};
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
struct fsnotify_group *group);
extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
extern const struct fsnotify_ops inotify_fsnotify_ops;

View File

@@ -0,0 +1,173 @@
/*
* fs/inotify_user.c - inotify support for userspace
*
* Authors:
* John McCutchan <ttb@tentacle.dhs.org>
* Robert Love <rml@novell.com>
*
* Copyright (C) 2005 John McCutchan
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
* inotify was largely rewriten to make use of the fsnotify infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/fs.h> /* struct inode */
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
#include <linux/path.h> /* struct path */
#include <linux/slab.h> /* kmem_* */
#include <linux/types.h>
#include <linux/sched.h>
#include "inotify.h"
static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
struct inode *to_tell;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
int wd, ret;
to_tell = event->to_tell;
spin_lock(&to_tell->i_lock);
entry = fsnotify_find_mark_entry(group, to_tell);
spin_unlock(&to_tell->i_lock);
/* race with watch removal? We already passes should_send */
if (unlikely(!entry))
return 0;
ientry = container_of(entry, struct inotify_inode_mark_entry,
fsn_entry);
wd = ientry->wd;
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
if (unlikely(!event_priv))
return -ENOMEM;
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
fsn_event_priv->group = group;
event_priv->wd = wd;
ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
if (ret) {
inotify_free_event_priv(fsn_event_priv);
/* EEXIST says we tail matched, EOVERFLOW isn't something
* to report up the stack. */
if ((ret == -EEXIST) ||
(ret == -EOVERFLOW))
ret = 0;
}
if (entry->mask & IN_ONESHOT)
fsnotify_destroy_mark_by_entry(entry);
/*
* If we hold the entry until after the event is on the queue
* IN_IGNORED won't be able to pass this event in the queue
*/
fsnotify_put_mark(entry);
return ret;
}
static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
{
inotify_ignored_and_remove_idr(entry, group);
}
static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
{
struct fsnotify_mark_entry *entry;
bool send;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
return false;
mask = (mask & ~FS_EVENT_ON_CHILD);
send = (entry->mask & mask);
/* find took a reference */
fsnotify_put_mark(entry);
return send;
}
/*
* This is NEVER supposed to be called. Inotify marks should either have been
* removed from the idr when the watch was removed or in the
* fsnotify_destroy_mark_by_group() call when the inotify instance was being
* torn down. This is only called if the idr is about to be freed but there
* are still marks in it.
*/
static int idr_callback(int id, void *p, void *data)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
static bool warned = false;
if (warned)
return 0;
warned = true;
entry = p;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
"idr. Probably leaking memory\n", id, p, data);
/*
* I'm taking the liberty of assuming that the mark in question is a
* valid address and I'm dereferencing it. This might help to figure
* out why we got here and the panic is no worse than the original
* BUG() that was here.
*/
if (entry)
printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
entry->group, entry->inode, ientry->wd);
return 0;
}
static void inotify_free_group_priv(struct fsnotify_group *group)
{
/* ideally the idr is empty and we won't hit the BUG in teh callback */
idr_for_each(&group->inotify_data.idr, idr_callback, group);
idr_remove_all(&group->inotify_data.idr);
idr_destroy(&group->inotify_data.idr);
free_uid(group->inotify_data.user);
}
void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
{
struct inotify_event_private_data *event_priv;
event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
fsnotify_event_priv_data);
kmem_cache_free(event_priv_cachep, event_priv);
}
const struct fsnotify_ops inotify_fsnotify_ops = {
.handle_event = inotify_handle_event,
.should_send_event = inotify_should_send_event,
.free_group_priv = inotify_free_group_priv,
.free_event_priv = inotify_free_event_priv,
.freeing_mark = inotify_freeing_mark,
};

View File

@@ -0,0 +1,843 @@
/*
* fs/inotify_user.c - inotify support for userspace
*
* Authors:
* John McCutchan <ttb@tentacle.dhs.org>
* Robert Love <rml@novell.com>
*
* Copyright (C) 2005 John McCutchan
* Copyright 2006 Hewlett-Packard Development Company, L.P.
*
* Copyright (C) 2009 Eric Paris <Red Hat Inc>
* inotify was largely rewriten to make use of the fsnotify infrastructure
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/file.h>
#include <linux/fs.h> /* struct inode */
#include <linux/fsnotify_backend.h>
#include <linux/idr.h>
#include <linux/init.h> /* module_init */
#include <linux/inotify.h>
#include <linux/kernel.h> /* roundup() */
#include <linux/magic.h> /* superblock magic number */
#include <linux/mount.h> /* mntget */
#include <linux/namei.h> /* LOOKUP_FOLLOW */
#include <linux/path.h> /* struct path */
#include <linux/sched.h> /* struct user */
#include <linux/slab.h> /* struct kmem_cache */
#include <linux/syscalls.h>
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <linux/wait.h>
#include "inotify.h"
#include <asm/ioctls.h>
static struct vfsmount *inotify_mnt __read_mostly;
/* these are configurable via /proc/sys/fs/inotify/ */
static int inotify_max_user_instances __read_mostly;
static int inotify_max_queued_events __read_mostly;
int inotify_max_user_watches __read_mostly;
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
struct kmem_cache *event_priv_cachep __read_mostly;
/*
* When inotify registers a new group it increments this and uses that
* value as an offset to set the fsnotify group "name" and priority.
*/
static atomic_t inotify_grp_num;
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
static int zero;
ctl_table inotify_table[] = {
{
.ctl_name = INOTIFY_MAX_USER_INSTANCES,
.procname = "max_user_instances",
.data = &inotify_max_user_instances,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
{
.ctl_name = INOTIFY_MAX_USER_WATCHES,
.procname = "max_user_watches",
.data = &inotify_max_user_watches,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
{
.ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
.procname = "max_queued_events",
.data = &inotify_max_queued_events,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero
},
{ .ctl_name = 0 }
};
#endif /* CONFIG_SYSCTL */
static inline __u32 inotify_arg_to_mask(u32 arg)
{
__u32 mask;
/*
* everything should accept their own ignored, cares about children,
* and should receive events when the inode is unmounted
*/
mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
/* mask off the flags used to open the fd */
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
return mask;
}
static inline u32 inotify_mask_to_arg(__u32 mask)
{
return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
IN_Q_OVERFLOW);
}
/* intofiy userspace file descriptor functions */
static unsigned int inotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
int ret = 0;
poll_wait(file, &group->notification_waitq, wait);
mutex_lock(&group->notification_mutex);
if (!fsnotify_notify_queue_is_empty(group))
ret = POLLIN | POLLRDNORM;
mutex_unlock(&group->notification_mutex);
return ret;
}
/*
* Get an inotify_kernel_event if one exists and is small
* enough to fit in "count". Return an error pointer if
* not large enough.
*
* Called with the group->notification_mutex held.
*/
static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
size_t event_size = sizeof(struct inotify_event);
struct fsnotify_event *event;
if (fsnotify_notify_queue_is_empty(group))
return NULL;
event = fsnotify_peek_notify_event(group);
if (event->name_len)
event_size += roundup(event->name_len + 1, event_size);
if (event_size > count)
return ERR_PTR(-EINVAL);
/* held the notification_mutex the whole time, so this is the
* same event we peeked above */
fsnotify_remove_notify_event(group);
return event;
}
/*
* Copy an event to user space, returning how much we copied.
*
* We already checked that the event size is smaller than the
* buffer we had in "get_one_event()" above.
*/
static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fsnotify_event *event,
char __user *buf)
{
struct inotify_event inotify_event;
struct fsnotify_event_private_data *fsn_priv;
struct inotify_event_private_data *priv;
size_t event_size = sizeof(struct inotify_event);
size_t name_len = 0;
/* we get the inotify watch descriptor from the event private data */
spin_lock(&event->lock);
fsn_priv = fsnotify_remove_priv_from_event(group, event);
spin_unlock(&event->lock);
if (!fsn_priv)
inotify_event.wd = -1;
else {
priv = container_of(fsn_priv, struct inotify_event_private_data,
fsnotify_event_priv_data);
inotify_event.wd = priv->wd;
inotify_free_event_priv(fsn_priv);
}
/*
* round up event->name_len so it is a multiple of event_size
* plus an extra byte for the terminating '\0'.
*/
if (event->name_len)
name_len = roundup(event->name_len + 1, event_size);
inotify_event.len = name_len;
inotify_event.mask = inotify_mask_to_arg(event->mask);
inotify_event.cookie = event->sync_cookie;
/* send the main event */
if (copy_to_user(buf, &inotify_event, event_size))
return -EFAULT;
buf += event_size;
/*
* fsnotify only stores the pathname, so here we have to send the pathname
* and then pad that pathname out to a multiple of sizeof(inotify_event)
* with zeros. I get my zeros from the nul_inotify_event.
*/
if (name_len) {
unsigned int len_to_zero = name_len - event->name_len;
/* copy the path name */
if (copy_to_user(buf, event->file_name, event->name_len))
return -EFAULT;
buf += event->name_len;
/* fill userspace with 0's */
if (clear_user(buf, len_to_zero))
return -EFAULT;
buf += len_to_zero;
event_size += name_len;
}
return event_size;
}
static ssize_t inotify_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct fsnotify_group *group;
struct fsnotify_event *kevent;
char __user *start;
int ret;
DEFINE_WAIT(wait);
start = buf;
group = file->private_data;
while (1) {
prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
if (kevent) {
ret = PTR_ERR(kevent);
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
fsnotify_put_event(kevent);
if (ret < 0)
break;
buf += ret;
count -= ret;
continue;
}
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
ret = -EINTR;
if (signal_pending(current))
break;
if (start != buf)
break;
schedule();
}
finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
}
static int inotify_fasync(int fd, struct file *file, int on)
{
struct fsnotify_group *group = file->private_data;
return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
}
static int inotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
struct user_struct *user = group->inotify_data.user;
fsnotify_clear_marks_by_group(group);
/* free this group, matching get was inotify_init->fsnotify_obtain_group */
fsnotify_put_group(group);
atomic_dec(&user->inotify_devs);
return 0;
}
static long inotify_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fsnotify_group *group;
struct fsnotify_event_holder *holder;
struct fsnotify_event *event;
void __user *p;
int ret = -ENOTTY;
size_t send_len = 0;
group = file->private_data;
p = (void __user *) arg;
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
list_for_each_entry(holder, &group->notification_list, event_list) {
event = holder->event;
send_len += sizeof(struct inotify_event);
if (event->name_len)
send_len += roundup(event->name_len + 1,
sizeof(struct inotify_event));
}
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
break;
}
return ret;
}
static const struct file_operations inotify_fops = {
.poll = inotify_poll,
.read = inotify_read,
.fasync = inotify_fasync,
.release = inotify_release,
.unlocked_ioctl = inotify_ioctl,
.compat_ioctl = inotify_ioctl,
};
/*
* find_inode - resolve a user-given path to a specific inode
*/
static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
{
int error;
error = user_path_at(AT_FDCWD, dirname, flags, path);
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
error = inode_permission(path->dentry->d_inode, MAY_READ);
if (error)
path_put(path);
return error;
}
/*
* Remove the mark from the idr (if present) and drop the reference
* on the mark because it was in the idr.
*/
static void inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark_entry *ientry)
{
struct idr *idr;
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *found_ientry;
int wd;
spin_lock(&group->inotify_data.idr_lock);
idr = &group->inotify_data.idr;
wd = ientry->wd;
if (wd == -1)
goto out;
entry = idr_find(&group->inotify_data.idr, wd);
if (unlikely(!entry))
goto out;
found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
if (unlikely(found_ientry != ientry)) {
/* We found an entry in the idr with the right wd, but it's
* not the entry we were told to remove. eparis seriously
* fucked up somewhere. */
WARN_ON(1);
ientry->wd = -1;
goto out;
}
/* One ref for being in the idr, one ref held by the caller */
BUG_ON(atomic_read(&entry->refcnt) < 2);
idr_remove(idr, wd);
ientry->wd = -1;
/* removed from the idr, drop that ref */
fsnotify_put_mark(entry);
out:
spin_unlock(&group->inotify_data.idr_lock);
}
/*
* Send IN_IGNORED for this wd, remove this wd from the idr.
*/
void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
struct fsnotify_group *group)
{
struct inotify_inode_mark_entry *ientry;
struct fsnotify_event *ignored_event;
struct inotify_event_private_data *event_priv;
struct fsnotify_event_private_data *fsn_event_priv;
int ret;
ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
FSNOTIFY_EVENT_NONE, NULL, 0,
GFP_NOFS);
if (!ignored_event)
return;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
if (unlikely(!event_priv))
goto skip_send_ignore;
fsn_event_priv = &event_priv->fsnotify_event_priv_data;
fsn_event_priv->group = group;
event_priv->wd = ientry->wd;
ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
if (ret)
inotify_free_event_priv(fsn_event_priv);
skip_send_ignore:
/* matches the reference taken when the event was created */
fsnotify_put_event(ignored_event);
/* remove this entry from the idr */
inotify_remove_from_idr(group, ientry);
atomic_dec(&group->inotify_data.user->inotify_watches);
}
/* ding dong the mark is dead */
static void inotify_free_mark(struct fsnotify_mark_entry *entry)
{
struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
kmem_cache_free(inotify_inode_mark_cachep, ientry);
}
static int inotify_update_existing_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
{
struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
__u32 old_mask, new_mask;
__u32 mask;
int add = (arg & IN_MASK_ADD);
int ret;
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
if (unlikely(!mask))
return -EINVAL;
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
if (!entry)
return -ENOENT;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
spin_lock(&entry->lock);
old_mask = entry->mask;
if (add) {
entry->mask |= mask;
new_mask = entry->mask;
} else {
entry->mask = mask;
new_mask = entry->mask;
}
spin_unlock(&entry->lock);
if (old_mask != new_mask) {
/* more bits in old than in new? */
int dropped = (old_mask & ~new_mask);
/* more bits in this entry than the inode's mask? */
int do_inode = (new_mask & ~inode->i_fsnotify_mask);
/* more bits in this entry than the group? */
int do_group = (new_mask & ~group->mask);
/* update the inode with this new entry */
if (dropped || do_inode)
fsnotify_recalc_inode_mask(inode);
/* update the group mask with the new mask */
if (dropped || do_group)
fsnotify_recalc_group_mask(group);
}
/* return the wd */
ret = ientry->wd;
/* match the get from fsnotify_find_mark_entry() */
fsnotify_put_mark(entry);
return ret;
}
static int inotify_new_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
{
struct inotify_inode_mark_entry *tmp_ientry;
__u32 mask;
int ret;
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
if (unlikely(!mask))
return -EINVAL;
tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_ientry))
return -ENOMEM;
fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
tmp_ientry->fsn_entry.mask = mask;
tmp_ientry->wd = -1;
ret = -ENOSPC;
if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
goto out_err;
retry:
ret = -ENOMEM;
if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
goto out_err;
/* we are putting the mark on the idr, take a reference */
fsnotify_get_mark(&tmp_ientry->fsn_entry);
spin_lock(&group->inotify_data.idr_lock);
ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
group->inotify_data.last_wd+1,
&tmp_ientry->wd);
spin_unlock(&group->inotify_data.idr_lock);
if (ret) {
/* we didn't get on the idr, drop the idr reference */
fsnotify_put_mark(&tmp_ientry->fsn_entry);
/* idr was out of memory allocate and try again */
if (ret == -EAGAIN)
goto retry;
goto out_err;
}
/* we are on the idr, now get on the inode */
ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
if (ret) {
/* we failed to get on the inode, get off the idr */
inotify_remove_from_idr(group, tmp_ientry);
goto out_err;
}
/* update the idr hint, who cares about races, it's just a hint */
group->inotify_data.last_wd = tmp_ientry->wd;
/* increment the number of watches the user has */
atomic_inc(&group->inotify_data.user->inotify_watches);
/* return the watch descriptor for this new entry */
ret = tmp_ientry->wd;
/* match the ref from fsnotify_init_markentry() */
fsnotify_put_mark(&tmp_ientry->fsn_entry);
/* if this mark added a new event update the group mask */
if (mask & ~group->mask)
fsnotify_recalc_group_mask(group);
out_err:
if (ret < 0)
kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
return ret;
}
static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
{
int ret = 0;
retry:
/* try to update and existing watch with the new arg */
ret = inotify_update_existing_watch(group, inode, arg);
/* no mark present, try to add a new one */
if (ret == -ENOENT)
ret = inotify_new_watch(group, inode, arg);
/*
* inotify_new_watch could race with another thread which did an
* inotify_new_watch between the update_existing and the add watch
* here, go back and try to update an existing mark again.
*/
if (ret == -EEXIST)
goto retry;
return ret;
}
static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
{
struct fsnotify_group *group;
unsigned int grp_num;
/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
if (IS_ERR(group))
return group;
group->max_events = max_events;
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
group->inotify_data.last_wd = 0;
group->inotify_data.user = user;
group->inotify_data.fa = NULL;
return group;
}
/* inotify syscalls */
SYSCALL_DEFINE1(inotify_init1, int, flags)
{
struct fsnotify_group *group;
struct user_struct *user;
struct file *filp;
int fd, ret;
/* Check the IN_* constants for consistency. */
BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
return -EINVAL;
fd = get_unused_fd_flags(flags & O_CLOEXEC);
if (fd < 0)
return fd;
filp = get_empty_filp();
if (!filp) {
ret = -ENFILE;
goto out_put_fd;
}
user = get_current_user();
if (unlikely(atomic_read(&user->inotify_devs) >=
inotify_max_user_instances)) {
ret = -EMFILE;
goto out_free_uid;
}
/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
group = inotify_new_group(user, inotify_max_queued_events);
if (IS_ERR(group)) {
ret = PTR_ERR(group);
goto out_free_uid;
}
filp->f_op = &inotify_fops;
filp->f_path.mnt = mntget(inotify_mnt);
filp->f_path.dentry = dget(inotify_mnt->mnt_root);
filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
filp->f_mode = FMODE_READ;
filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
filp->private_data = group;
atomic_inc(&user->inotify_devs);
fd_install(fd, filp);
return fd;
out_free_uid:
free_uid(user);
put_filp(filp);
out_put_fd:
put_unused_fd(fd);
return ret;
}
SYSCALL_DEFINE0(inotify_init)
{
return sys_inotify_init1(0);
}
SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
u32, mask)
{
struct fsnotify_group *group;
struct inode *inode;
struct path path;
struct file *filp;
int ret, fput_needed;
unsigned flags = 0;
filp = fget_light(fd, &fput_needed);
if (unlikely(!filp))
return -EBADF;
/* verify that this is indeed an inotify instance */
if (unlikely(filp->f_op != &inotify_fops)) {
ret = -EINVAL;
goto fput_and_out;
}
if (!(mask & IN_DONT_FOLLOW))
flags |= LOOKUP_FOLLOW;
if (mask & IN_ONLYDIR)
flags |= LOOKUP_DIRECTORY;
ret = inotify_find_inode(pathname, &path, flags);
if (ret)
goto fput_and_out;
/* inode held in place by reference to path; group by fget on fd */
inode = path.dentry->d_inode;
group = filp->private_data;
/* create/update an inode mark */
ret = inotify_update_watch(group, inode, mask);
if (unlikely(ret))
goto path_put_and_out;
path_put_and_out:
path_put(&path);
fput_and_out:
fput_light(filp, fput_needed);
return ret;
}
SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
{
struct fsnotify_group *group;
struct fsnotify_mark_entry *entry;
struct file *filp;
int ret = 0, fput_needed;
filp = fget_light(fd, &fput_needed);
if (unlikely(!filp))
return -EBADF;
/* verify that this is indeed an inotify instance */
if (unlikely(filp->f_op != &inotify_fops)) {
ret = -EINVAL;
goto out;
}
group = filp->private_data;
spin_lock(&group->inotify_data.idr_lock);
entry = idr_find(&group->inotify_data.idr, wd);
if (unlikely(!entry)) {
spin_unlock(&group->inotify_data.idr_lock);
ret = -EINVAL;
goto out;
}
fsnotify_get_mark(entry);
spin_unlock(&group->inotify_data.idr_lock);
fsnotify_destroy_mark_by_entry(entry);
fsnotify_put_mark(entry);
out:
fput_light(filp, fput_needed);
return ret;
}
static int
inotify_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt)
{
return get_sb_pseudo(fs_type, "inotify", NULL,
INOTIFYFS_SUPER_MAGIC, mnt);
}
static struct file_system_type inotify_fs_type = {
.name = "inotifyfs",
.get_sb = inotify_get_sb,
.kill_sb = kill_anon_super,
};
/*
* inotify_user_setup - Our initialization function. Note that we cannnot return
* error because we have compiled-in VFS hooks. So an (unlikely) failure here
* must result in panic().
*/
static int __init inotify_user_setup(void)
{
int ret;
ret = register_filesystem(&inotify_fs_type);
if (unlikely(ret))
panic("inotify: register_filesystem returned %d!\n", ret);
inotify_mnt = kern_mount(&inotify_fs_type);
if (IS_ERR(inotify_mnt))
panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
inotify_max_queued_events = 16384;
inotify_max_user_instances = 128;
inotify_max_user_watches = 8192;
return 0;
}
module_init(inotify_user_setup);

View File

@@ -0,0 +1,421 @@
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Basic idea behind the notification queue: An fsnotify group (like inotify)
* sends the userspace notification about events asyncronously some time after
* the event happened. When inotify gets an event it will need to add that
* event to the group notify queue. Since a single event might need to be on
* multiple group's notification queues we can't add the event directly to each
* queue and instead add a small "event_holder" to each queue. This event_holder
* has a pointer back to the original event. Since the majority of events are
* going to end up on one, and only one, notification queue we embed one
* event_holder into each event. This means we have a single allocation instead
* of always needing two. If the embedded event_holder is already in use by
* another group a new event_holder (from fsnotify_event_holder_cachep) will be
* allocated and used.
*/
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/mutex.h>
#include <linux/namei.h>
#include <linux/path.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <asm/atomic.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
static struct kmem_cache *fsnotify_event_cachep;
static struct kmem_cache *fsnotify_event_holder_cachep;
/*
* This is a magic event we send when the q is too full. Since it doesn't
* hold real event information we just keep one system wide and use it any time
* it is needed. It's refcnt is set 1 at kernel init time and will never
* get set to 0 so it will never get 'freed'
*/
static struct fsnotify_event q_overflow_event;
static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
/**
* fsnotify_get_cookie - return a unique cookie for use in synchronizing events.
* Called from fsnotify_move, which is inlined into filesystem modules.
*/
u32 fsnotify_get_cookie(void)
{
return atomic_inc_return(&fsnotify_sync_cookie);
}
EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
/* return true if the notify queue is empty, false otherwise */
bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&group->notification_mutex));
return list_empty(&group->notification_list) ? true : false;
}
void fsnotify_get_event(struct fsnotify_event *event)
{
atomic_inc(&event->refcnt);
}
void fsnotify_put_event(struct fsnotify_event *event)
{
if (!event)
return;
if (atomic_dec_and_test(&event->refcnt)) {
if (event->data_type == FSNOTIFY_EVENT_PATH)
path_put(&event->path);
BUG_ON(!list_empty(&event->private_data_list));
kfree(event->file_name);
kmem_cache_free(fsnotify_event_cachep, event);
}
}
struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
{
return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
}
void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
{
kmem_cache_free(fsnotify_event_holder_cachep, holder);
}
/*
* Find the private data that the group previously attached to this event when
* the group added the event to the notification queue (fsnotify_add_notify_event)
*/
struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
{
struct fsnotify_event_private_data *lpriv;
struct fsnotify_event_private_data *priv = NULL;
assert_spin_locked(&event->lock);
list_for_each_entry(lpriv, &event->private_data_list, event_list) {
if (lpriv->group == group) {
priv = lpriv;
list_del(&priv->event_list);
break;
}
}
return priv;
}
/*
* Check if 2 events contain the same information. We do not compare private data
* but at this moment that isn't a problem for any know fsnotify listeners.
*/
static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
{
if ((old->mask == new->mask) &&
(old->to_tell == new->to_tell) &&
(old->data_type == new->data_type) &&
(old->name_len == new->name_len)) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_INODE):
/* remember, after old was put on the wait_q we aren't
* allowed to look at the inode any more, only thing
* left to check was if the file_name is the same */
if (!old->name_len ||
!strcmp(old->file_name, new->file_name))
return true;
break;
case (FSNOTIFY_EVENT_PATH):
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
break;
case (FSNOTIFY_EVENT_NONE):
if (old->mask & FS_Q_OVERFLOW)
return true;
else if (old->mask & FS_IN_IGNORED)
return false;
return false;
};
}
return false;
}
/*
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. If the event is successfully added to the
* group's notification queue, a reference is taken on event.
*/
int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
struct fsnotify_event_private_data *priv)
{
struct fsnotify_event_holder *holder = NULL;
struct list_head *list = &group->notification_list;
struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
int ret = 0;
/*
* There is one fsnotify_event_holder embedded inside each fsnotify_event.
* Check if we expect to be able to use that holder. If not alloc a new
* holder.
* For the overflow event it's possible that something will use the in
* event holder before we get the lock so we may need to jump back and
* alloc a new holder, this can't happen for most events...
*/
if (!list_empty(&event->holder.event_list)) {
alloc_holder:
holder = fsnotify_alloc_event_holder();
if (!holder)
return -ENOMEM;
}
mutex_lock(&group->notification_mutex);
if (group->q_len >= group->max_events) {
event = &q_overflow_event;
ret = -EOVERFLOW;
/* sorry, no private data on the overflow event */
priv = NULL;
}
spin_lock(&event->lock);
if (list_empty(&event->holder.event_list)) {
if (unlikely(holder))
fsnotify_destroy_event_holder(holder);
holder = &event->holder;
} else if (unlikely(!holder)) {
/* between the time we checked above and got the lock the in
* event holder was used, go back and get a new one */
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
goto alloc_holder;
}
if (!list_empty(list)) {
last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
last_event = last_holder->event;
if (event_compare(last_event, event)) {
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
if (holder != &event->holder)
fsnotify_destroy_event_holder(holder);
return -EEXIST;
}
}
group->q_len++;
holder->event = event;
fsnotify_get_event(event);
list_add_tail(&holder->event_list, list);
if (priv)
list_add_tail(&priv->event_list, &event->private_data_list);
spin_unlock(&event->lock);
mutex_unlock(&group->notification_mutex);
wake_up(&group->notification_waitq);
return ret;
}
/*
* Remove and return the first event from the notification list. There is a
* reference held on this event since it was on the list. It is the responsibility
* of the caller to drop this reference.
*/
struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
struct fsnotify_event_holder *holder;
BUG_ON(!mutex_is_locked(&group->notification_mutex));
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
event = holder->event;
spin_lock(&event->lock);
holder->event = NULL;
list_del_init(&holder->event_list);
spin_unlock(&event->lock);
/* event == holder means we are referenced through the in event holder */
if (holder != &event->holder)
fsnotify_destroy_event_holder(holder);
group->q_len--;
return event;
}
/*
* This will not remove the event, that must be done with fsnotify_remove_notify_event()
*/
struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
struct fsnotify_event_holder *holder;
BUG_ON(!mutex_is_locked(&group->notification_mutex));
holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
event = holder->event;
return event;
}
/*
* Called when a group is being torn down to clean up any outstanding
* event notifications.
*/
void fsnotify_flush_notify(struct fsnotify_group *group)
{
struct fsnotify_event *event;
struct fsnotify_event_private_data *priv;
mutex_lock(&group->notification_mutex);
while (!fsnotify_notify_queue_is_empty(group)) {
event = fsnotify_remove_notify_event(group);
/* if they don't implement free_event_priv they better not have attached any */
if (group->ops->free_event_priv) {
spin_lock(&event->lock);
priv = fsnotify_remove_priv_from_event(group, event);
spin_unlock(&event->lock);
if (priv)
group->ops->free_event_priv(priv);
}
fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
}
mutex_unlock(&group->notification_mutex);
}
static void initialize_event(struct fsnotify_event *event)
{
event->holder.event = NULL;
INIT_LIST_HEAD(&event->holder.event_list);
atomic_set(&event->refcnt, 1);
spin_lock_init(&event->lock);
event->path.dentry = NULL;
event->path.mnt = NULL;
event->inode = NULL;
event->data_type = FSNOTIFY_EVENT_NONE;
INIT_LIST_HEAD(&event->private_data_list);
event->to_tell = NULL;
event->file_name = NULL;
event->name_len = 0;
event->sync_cookie = 0;
}
/*
* fsnotify_create_event - Allocate a new event which will be sent to each
* group's handle_event function if the group was interested in this
* particular event.
*
* @to_tell the inode which is supposed to receive the event (sometimes a
* parent of the inode to which the event happened.
* @mask what actually happened.
* @data pointer to the object which was actually affected
* @data_type flag indication if the data is a file, path, inode, nothing...
* @name the filename, if available
*/
struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
int data_type, const char *name, u32 cookie,
gfp_t gfp)
{
struct fsnotify_event *event;
event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
if (!event)
return NULL;
initialize_event(event);
if (name) {
event->file_name = kstrdup(name, gfp);
if (!event->file_name) {
kmem_cache_free(fsnotify_event_cachep, event);
return NULL;
}
event->name_len = strlen(event->file_name);
}
event->sync_cookie = cookie;
event->to_tell = to_tell;
switch (data_type) {
case FSNOTIFY_EVENT_FILE: {
struct file *file = data;
struct path *path = &file->f_path;
event->path.dentry = path->dentry;
event->path.mnt = path->mnt;
path_get(&event->path);
event->data_type = FSNOTIFY_EVENT_PATH;
break;
}
case FSNOTIFY_EVENT_PATH: {
struct path *path = data;
event->path.dentry = path->dentry;
event->path.mnt = path->mnt;
path_get(&event->path);
event->data_type = FSNOTIFY_EVENT_PATH;
break;
}
case FSNOTIFY_EVENT_INODE:
event->inode = data;
event->data_type = FSNOTIFY_EVENT_INODE;
break;
case FSNOTIFY_EVENT_NONE:
event->inode = NULL;
event->path.dentry = NULL;
event->path.mnt = NULL;
break;
default:
BUG();
}
event->mask = mask;
return event;
}
__init int fsnotify_notification_init(void)
{
fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
initialize_event(&q_overflow_event);
q_overflow_event.mask = FS_Q_OVERFLOW;
return 0;
}
subsys_initcall(fsnotify_notification_init);