summaryrefslogtreecommitdiffstats
path: root/fs/notify
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 11:39:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 11:39:13 -0700
commit8c8946f509a494769a8c602b5ed189df01917d39 (patch)
treedfd96bd6ca5ea6803c6d77f65ba37e04f78b2d3b /fs/notify
parent5f248c9c251c60af3403902b26e08de43964ea0b (diff)
parent1968f5eed54ce47bde488fd9a450912e4a2d7138 (diff)
Merge branch 'for-linus' of git://git.infradead.org/users/eparis/notify
* 'for-linus' of git://git.infradead.org/users/eparis/notify: (132 commits) fanotify: use both marks when possible fsnotify: pass both the vfsmount mark and inode mark fsnotify: walk the inode and vfsmount lists simultaneously fsnotify: rework ignored mark flushing fsnotify: remove global fsnotify groups lists fsnotify: remove group->mask fsnotify: remove the global masks fsnotify: cleanup should_send_event fanotify: use the mark in handler functions audit: use the mark in handler functions dnotify: use the mark in handler functions inotify: use the mark in handler functions fsnotify: send fsnotify_mark to groups in event handling functions fsnotify: Exchange list heads instead of moving elements fsnotify: srcu to protect read side of inode and vfsmount locks fsnotify: use an explicit flag to indicate fsnotify_destroy_mark has been called fsnotify: use _rcu functions for mark list traversal fsnotify: place marks on object in order of group memory address vfs/fsnotify: fsnotify_close can delay the final work in fput fsnotify: store struct file not struct path ... Fix up trivial delete/modify conflict in fs/notify/inotify/inotify.c.
Diffstat (limited to 'fs/notify')
-rw-r--r--fs/notify/Kconfig1
-rw-r--r--fs/notify/Makefile4
-rw-r--r--fs/notify/dnotify/dnotify.c213
-rw-r--r--fs/notify/fanotify/Kconfig26
-rw-r--r--fs/notify/fanotify/Makefile1
-rw-r--r--fs/notify/fanotify/fanotify.c212
-rw-r--r--fs/notify/fanotify/fanotify_user.c760
-rw-r--r--fs/notify/fsnotify.c201
-rw-r--r--fs/notify/fsnotify.h27
-rw-r--r--fs/notify/group.c182
-rw-r--r--fs/notify/inode_mark.c331
-rw-r--r--fs/notify/inotify/Kconfig15
-rw-r--r--fs/notify/inotify/Makefile1
-rw-r--r--fs/notify/inotify/inotify.c872
-rw-r--r--fs/notify/inotify/inotify.h7
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c151
-rw-r--r--fs/notify/inotify/inotify_user.c369
-rw-r--r--fs/notify/mark.c371
-rw-r--r--fs/notify/notification.c236
-rw-r--r--fs/notify/vfsmount_mark.c187
20 files changed, 2440 insertions, 1727 deletions
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index dffbb0911d02..22c629eedd82 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,3 +3,4 @@ config FSNOTIFY
source "fs/notify/dnotify/Kconfig"
source "fs/notify/inotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 0922cc826c46..ae5f33a6d868 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,6 @@
-obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o
+obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
+ mark.o vfsmount_mark.o
obj-y += dnotify/
obj-y += inotify/
+obj-y += fanotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 7e54e52964dd..3344bdd5506e 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -29,17 +29,17 @@
int dir_notify_enable __read_mostly = 1;
static struct kmem_cache *dnotify_struct_cache __read_mostly;
-static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_cache __read_mostly;
static struct fsnotify_group *dnotify_group __read_mostly;
static DEFINE_MUTEX(dnotify_mark_mutex);
/*
- * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * dnotify will attach one of these to each inode (i_fsnotify_marks) which
* is being watched by dnotify. If multiple userspace applications are watching
* the same directory with dnotify their information is chained in dn
*/
-struct dnotify_mark_entry {
- struct fsnotify_mark_entry fsn_entry;
+struct dnotify_mark {
+ struct fsnotify_mark fsn_mark;
struct dnotify_struct *dn;
};
@@ -51,27 +51,27 @@ struct dnotify_mark_entry {
* it calls the fsnotify function so it can update the set of all events relevant
* to this inode.
*/
-static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
+static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
{
__u32 new_mask, old_mask;
struct dnotify_struct *dn;
- struct dnotify_mark_entry *dnentry = container_of(entry,
- struct dnotify_mark_entry,
- fsn_entry);
+ struct dnotify_mark *dn_mark = container_of(fsn_mark,
+ struct dnotify_mark,
+ fsn_mark);
- assert_spin_locked(&entry->lock);
+ assert_spin_locked(&fsn_mark->lock);
- old_mask = entry->mask;
+ old_mask = fsn_mark->mask;
new_mask = 0;
- for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
+ for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
- entry->mask = new_mask;
+ fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
if (old_mask == new_mask)
return;
- if (entry->inode)
- fsnotify_recalc_inode_mask(entry->inode);
+ if (fsn_mark->i.inode)
+ fsnotify_recalc_inode_mask(fsn_mark->i.inode);
}
/*
@@ -83,29 +83,25 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
* events.
*/
static int dnotify_handle_event(struct fsnotify_group *group,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmount_mark,
struct fsnotify_event *event)
{
- struct fsnotify_mark_entry *entry = NULL;
- struct dnotify_mark_entry *dnentry;
+ struct dnotify_mark *dn_mark;
struct inode *to_tell;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct fown_struct *fown;
__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
- to_tell = event->to_tell;
+ BUG_ON(vfsmount_mark);
- spin_lock(&to_tell->i_lock);
- entry = fsnotify_find_mark_entry(group, to_tell);
- spin_unlock(&to_tell->i_lock);
+ to_tell = event->to_tell;
- /* unlikely since we alreay passed dnotify_should_send_event() */
- if (unlikely(!entry))
- return 0;
- dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+ dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
- spin_lock(&entry->lock);
- prev = &dnentry->dn;
+ spin_lock(&inode_mark->lock);
+ prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_mask & test_mask) == 0) {
prev = &dn->dn_next;
@@ -118,12 +114,11 @@ static int dnotify_handle_event(struct fsnotify_group *group,
else {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
- dnotify_recalc_inode_mask(entry);
+ dnotify_recalc_inode_mask(inode_mark);
}
}
- spin_unlock(&entry->lock);
- fsnotify_put_mark(entry);
+ spin_unlock(&inode_mark->lock);
return 0;
}
@@ -133,44 +128,27 @@ static int dnotify_handle_event(struct fsnotify_group *group,
* userspace notification for that pair.
*/
static bool dnotify_should_send_event(struct fsnotify_group *group,
- struct inode *inode, __u32 mask)
+ struct inode *inode,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmount_mark,
+ __u32 mask, void *data, int data_type)
{
- struct fsnotify_mark_entry *entry;
- bool send;
-
- /* !dir_notify_enable should never get here, don't waste time checking
- if (!dir_notify_enable)
- return 0; */
-
/* not a dir, dnotify doesn't care */
if (!S_ISDIR(inode->i_mode))
return false;
- spin_lock(&inode->i_lock);
- entry = fsnotify_find_mark_entry(group, inode);
- spin_unlock(&inode->i_lock);
-
- /* no mark means no dnotify watch */
- if (!entry)
- return false;
-
- mask = (mask & ~FS_EVENT_ON_CHILD);
- send = (mask & entry->mask);
-
- fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
-
- return send;
+ return true;
}
-static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
{
- struct dnotify_mark_entry *dnentry = container_of(entry,
- struct dnotify_mark_entry,
- fsn_entry);
+ struct dnotify_mark *dn_mark = container_of(fsn_mark,
+ struct dnotify_mark,
+ fsn_mark);
- BUG_ON(dnentry->dn);
+ BUG_ON(dn_mark->dn);
- kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+ kmem_cache_free(dnotify_mark_cache, dn_mark);
}
static struct fsnotify_ops dnotify_fsnotify_ops = {
@@ -183,15 +161,15 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
/*
* Called every time a file is closed. Looks first for a dnotify mark on the
- * inode. If one is found run all of the ->dn entries attached to that
+ * inode. If one is found run all of the ->dn structures attached to that
* mark for one relevant to this process closing the file and remove that
* dnotify_struct. If that was the last dnotify_struct also remove the
- * fsnotify_mark_entry.
+ * fsnotify_mark.
*/
void dnotify_flush(struct file *filp, fl_owner_t id)
{
- struct fsnotify_mark_entry *entry;
- struct dnotify_mark_entry *dnentry;
+ struct fsnotify_mark *fsn_mark;
+ struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct inode *inode;
@@ -200,38 +178,34 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
if (!S_ISDIR(inode->i_mode))
return;
- spin_lock(&inode->i_lock);
- entry = fsnotify_find_mark_entry(dnotify_group, inode);
- spin_unlock(&inode->i_lock);
- if (!entry)
+ fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+ if (!fsn_mark)
return;
- dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+ dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
mutex_lock(&dnotify_mark_mutex);
- spin_lock(&entry->lock);
- prev = &dnentry->dn;
+ spin_lock(&fsn_mark->lock);
+ prev = &dn_mark->dn;
while ((dn = *prev) != NULL) {
if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
*prev = dn->dn_next;
kmem_cache_free(dnotify_struct_cache, dn);
- dnotify_recalc_inode_mask(entry);
+ dnotify_recalc_inode_mask(fsn_mark);
break;
}
prev = &dn->dn_next;
}
- spin_unlock(&entry->lock);
+ spin_unlock(&fsn_mark->lock);
/* nothing else could have found us thanks to the dnotify_mark_mutex */
- if (dnentry->dn == NULL)
- fsnotify_destroy_mark_by_entry(entry);
-
- fsnotify_recalc_group_mask(dnotify_group);
+ if (dn_mark->dn == NULL)
+ fsnotify_destroy_mark(fsn_mark);
mutex_unlock(&dnotify_mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(fsn_mark);
}
/* this conversion is done only at watch creation */
@@ -259,16 +233,16 @@ static __u32 convert_arg(unsigned long arg)
/*
* If multiple processes watch the same inode with dnotify there is only one
- * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
* onto that mark. This function either attaches the new dnotify_struct onto
* that list, or it |= the mask onto an existing dnofiy_struct.
*/
-static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
fl_owner_t id, int fd, struct file *filp, __u32 mask)
{
struct dnotify_struct *odn;
- odn = dnentry->dn;
+ odn = dn_mark->dn;
while (odn != NULL) {
/* adding more events to existing dnofiy_struct? */
if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
@@ -283,8 +257,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
dn->dn_fd = fd;
dn->dn_filp = filp;
dn->dn_owner = id;
- dn->dn_next = dnentry->dn;
- dnentry->dn = dn;
+ dn->dn_next = dn_mark->dn;
+ dn_mark->dn = dn;
return 0;
}
@@ -296,8 +270,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
*/
int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
{
- struct dnotify_mark_entry *new_dnentry, *dnentry;
- struct fsnotify_mark_entry *new_entry, *entry;
+ struct dnotify_mark *new_dn_mark, *dn_mark;
+ struct fsnotify_mark *new_fsn_mark, *fsn_mark;
struct dnotify_struct *dn;
struct inode *inode;
fl_owner_t id = current->files;
@@ -306,7 +280,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
__u32 mask;
/* we use these to tell if we need to kfree */
- new_entry = NULL;
+ new_fsn_mark = NULL;
dn = NULL;
if (!dir_notify_enable) {
@@ -336,8 +310,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
}
/* new fsnotify mark, we expect most fcntl calls to add a new mark */
- new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
- if (!new_dnentry) {
+ new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
+ if (!new_dn_mark) {
error = -ENOMEM;
goto out_err;
}
@@ -345,29 +319,27 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
mask = convert_arg(arg);
- /* set up the new_entry and new_dnentry */
- new_entry = &new_dnentry->fsn_entry;
- fsnotify_init_mark(new_entry, dnotify_free_mark);
- new_entry->mask = mask;
- new_dnentry->dn = NULL;
+ /* set up the new_fsn_mark and new_dn_mark */
+ new_fsn_mark = &new_dn_mark->fsn_mark;
+ fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
+ new_fsn_mark->mask = mask;
+ new_dn_mark->dn = NULL;
/* this is needed to prevent the fcntl/close race described below */
mutex_lock(&dnotify_mark_mutex);
- /* add the new_entry or find an old one. */
- spin_lock(&inode->i_lock);
- entry = fsnotify_find_mark_entry(dnotify_group, inode);
- spin_unlock(&inode->i_lock);
- if (entry) {
- dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
- spin_lock(&entry->lock);
+ /* add the new_fsn_mark or find an old one. */
+ fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+ if (fsn_mark) {
+ dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
+ spin_lock(&fsn_mark->lock);
} else {
- fsnotify_add_mark(new_entry, dnotify_group, inode);
- spin_lock(&new_entry->lock);
- entry = new_entry;
- dnentry = new_dnentry;
- /* we used new_entry, so don't free it */
- new_entry = NULL;
+ fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
+ spin_lock(&new_fsn_mark->lock);
+ fsn_mark = new_fsn_mark;
+ dn_mark = new_dn_mark;
+ /* we used new_fsn_mark, so don't free it */
+ new_fsn_mark = NULL;
}
rcu_read_lock();
@@ -376,17 +348,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
/* if (f != filp) means that we lost a race and another task/thread
* actually closed the fd we are still playing with before we grabbed
- * the dnotify_mark_mutex and entry->lock. Since closing the fd is the
- * only time we clean up the mark entries we need to get our mark off
+ * the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the
+ * only time we clean up the marks we need to get our mark off
* the list. */
if (f != filp) {
/* if we added ourselves, shoot ourselves, it's possible that
- * the flush actually did shoot this entry. That's fine too
+ * the flush actually did shoot this fsn_mark. That's fine too
* since multiple calls to destroy_mark is perfectly safe, if
- * we found a dnentry already attached to the inode, just sod
+ * we found a dn_mark already attached to the inode, just sod
* off silently as the flush at close time dealt with it.
*/
- if (dnentry == new_dnentry)
+ if (dn_mark == new_dn_mark)
destroy = 1;
goto out;
}
@@ -394,13 +366,13 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error) {
/* if we added, we must shoot */
- if (dnentry == new_dnentry)
+ if (dn_mark == new_dn_mark)
destroy = 1;
goto out;
}
- error = attach_dn(dn, dnentry, id, fd, filp, mask);
- /* !error means that we attached the dn to the dnentry, so don't free it */
+ error = attach_dn(dn, dn_mark, id, fd, filp, mask);
+ /* !error means that we attached the dn to the dn_mark, so don't free it */
if (!error)
dn = NULL;
/* -EEXIST means that we didn't add this new dn and used an old one.
@@ -408,20 +380,18 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
else if (error == -EEXIST)
error = 0;
- dnotify_recalc_inode_mask(entry);
+ dnotify_recalc_inode_mask(fsn_mark);
out:
- spin_unlock(&entry->lock);
+ spin_unlock(&fsn_mark->lock);
if (destroy)
- fsnotify_destroy_mark_by_entry(entry);
-
- fsnotify_recalc_group_mask(dnotify_group);
+ fsnotify_destroy_mark(fsn_mark);
mutex_unlock(&dnotify_mark_mutex);
- fsnotify_put_mark(entry);
+ fsnotify_put_mark(fsn_mark);
out_err:
- if (new_entry)
- fsnotify_put_mark(new_entry);
+ if (new_fsn_mark)
+ fsnotify_put_mark(new_fsn_mark);
if (dn)
kmem_cache_free(dnotify_struct_cache, dn);
return error;
@@ -430,10 +400,9 @@ out_err:
static int __init dnotify_init(void)
{
dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
- dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+ dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
- dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
- 0, &dnotify_fsnotify_ops);
+ dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
if (IS_ERR(dnotify_group))
panic("unable to allocate fsnotify group for dnotify\n");
return 0;
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
new file mode 100644
index 000000000000..3ac36b7bf6b9
--- /dev/null
+++ b/fs/notify/fanotify/Kconfig
@@ -0,0 +1,26 @@
+config FANOTIFY
+ bool "Filesystem wide access notification"
+ select FSNOTIFY
+ select ANON_INODES
+ default n
+ ---help---
+ Say Y here to enable fanotify suport. fanotify is a file access
+ notification system which differs from inotify in that it sends
+ and open file descriptor to the userspace listener along with
+ the event.
+
+ If unsure, say Y.
+
+config FANOTIFY_ACCESS_PERMISSIONS
+ bool "fanotify permissions checking"
+ depends on FANOTIFY
+ depends on SECURITY
+ default n
+ ---help---
+ Say Y here is you want fanotify listeners to be able to make permissions
+ decisions concerning filesystem events. This is used by some fanotify
+ listeners which need to scan files before allowing the system access to
+ use those files. This is used by some anti-malware vendors and by some
+ hierarchical storage managent systems.
+
+ If unsure, say N.
diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile
new file mode 100644
index 000000000000..0999213e7e6e
--- /dev/null
+++ b/fs/notify/fanotify/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
new file mode 100644
index 000000000000..eb8f73c9c131
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.c
@@ -0,0 +1,212 @@
+#include <linux/fanotify.h>
+#include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h> /* UINT_MAX */
+#include <linux/mount.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+ pr_debug("%s: old=%p new=%p\n", __func__, old, new);
+
+ if (old->to_tell == new->to_tell &&
+ old->data_type == new->data_type &&
+ old->tgid == new->tgid) {
+ switch (old->data_type) {
+ case (FSNOTIFY_EVENT_FILE):
+ if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
+ (old->file->f_path.dentry == new->file->f_path.dentry))
+ return true;
+ case (FSNOTIFY_EVENT_NONE):
+ return true;
+ default:
+ BUG();
+ };
+ }
+ return false;
+}
+
+/* and the list better be locked by something too! */
+static struct fsnotify_event *fanotify_merge(struct list_head *list,
+ struct fsnotify_event *event)
+{
+ struct fsnotify_event_holder *test_holder;
+ struct fsnotify_event *test_event = NULL;
+ struct fsnotify_event *new_event;
+
+ pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+
+
+ list_for_each_entry_reverse(test_holder, list, event_list) {
+ if (should_merge(test_holder->event, event)) {
+ test_event = test_holder->event;
+ break;
+ }
+ }
+
+ if (!test_event)
+ return NULL;
+
+ fsnotify_get_event(test_event);
+
+ /* if they are exactly the same we are done */
+ if (test_event->mask == event->mask)
+ return test_event;
+
+ /*
+ * if the refcnt == 2 this is the only queue
+ * for this event and so we can update the mask
+ * in place.
+ */
+ if (atomic_read(&test_event->refcnt) == 2) {
+ test_event->mask |= event->mask;
+ return test_event;
+ }
+
+ new_event = fsnotify_clone_event(test_event);
+
+ /* done with test_event */
+ fsnotify_put_event(test_event);
+
+ /* couldn't allocate memory, merge was not possible */
+ if (unlikely(!new_event))
+ return ERR_PTR(-ENOMEM);
+
+ /* build new event and replace it on the list */
+ new_event->mask = (test_event->mask | event->mask);
+ fsnotify_replace_event(test_holder, new_event);
+
+ /* we hold a reference on new_event from clone_event */
+ return new_event;
+}
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+static int fanotify_get_response_from_access(struct fsnotify_group *group,
+ struct fsnotify_event *event)
+{
+ int ret;
+
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+ wait_event(group->fanotify_data.access_waitq, event->response);
+
+ /* userspace responded, convert to something usable */
+ spin_lock(&event->lock);
+ switch (event->response) {
+ case FAN_ALLOW:
+ ret = 0;
+ break;
+ case FAN_DENY:
+ default:
+ ret = -EPERM;
+ }
+ event->response = 0;
+ spin_unlock(&event->lock);
+
+ pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
+ group, event, ret);
+
+ return ret;
+}
+#endif
+
+static int fanotify_handle_event(struct fsnotify_group *group,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *fanotify_mark,
+ struct fsnotify_event *event)
+{
+ int ret = 0;
+ struct fsnotify_event *notify_event = NULL;
+
+ BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
+ BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+ BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+ BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
+ BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+ BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
+ BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
+ BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
+ BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+ notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
+ if (IS_ERR(notify_event))
+ return PTR_ERR(notify_event);
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ if (event->mask & FAN_ALL_PERM_EVENTS) {
+ /* if we merged we need to wait on the new event */
+ if (notify_event)
+ event = notify_event;
+ ret = fanotify_get_response_from_access(group, event);
+ }
+#endif
+
+ if (notify_event)
+ fsnotify_put_event(notify_event);
+
+ return ret;
+}
+
+static bool fanotify_should_send_event(struct fsnotify_group *group,
+ struct inode *to_tell,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmnt_mark,
+ __u32 event_mask, void *data, int data_type)
+{
+ __u32 marks_mask, marks_ignored_mask;
+
+ pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
+ "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
+ inode_mark, vfsmnt_mark, event_mask, data, data_type);
+
+ pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
+ __func__, group, vfsmnt_mark, inode_mark, event_mask);
+
+ /* sorry, fanotify only gives a damn about files and dirs */
+ if (!S_ISREG(to_tell->i_mode) &&
+ !S_ISDIR(to_tell->i_mode))
+ return false;
+
+ /* if we don't have enough info to send an event to userspace say no */
+ if (data_type != FSNOTIFY_EVENT_FILE)
+ return false;
+
+ if (inode_mark && vfsmnt_mark) {
+ marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
+ marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
+ } else if (inode_mark) {
+ /*
+ * if the event is for a child and this inode doesn't care about
+ * events on the child, don't send it!
+ */
+ if ((event_mask & FS_EVENT_ON_CHILD) &&
+ !(inode_mark->mask & FS_EVENT_ON_CHILD))
+ return false;
+ marks_mask = inode_mark->mask;
+ marks_ignored_mask = inode_mark->ignored_mask;
+ } else if (vfsmnt_mark) {
+ marks_mask = vfsmnt_mark->mask;
+ marks_ignored_mask = vfsmnt_mark->ignored_mask;
+ } else {
+ BUG();
+ }
+
+ if (event_mask & marks_mask & ~marks_ignored_mask)
+ return true;
+
+ return false;
+}
+
+const struct fsnotify_ops fanotify_fsnotify_ops = {
+ .handle_event = fanotify_handle_event,
+ .should_send_event = fanotify_should_send_event,
+ .free_group_priv = NULL,
+ .free_event_priv = NULL,
+ .freeing_mark = NULL,
+};
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
new file mode 100644
index 000000000000..25a3b4dfcf61
--- /dev/null
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -0,0 +1,760 @@
+#include <linux/fanotify.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <asm/ioctls.h>
+
+extern const struct fsnotify_ops fanotify_fsnotify_ops;
+
+static struct kmem_cache *fanotify_mark_cache __read_mostly;
+static struct kmem_cache *fanotify_response_event_cache __read_mostly;
+
+struct fanotify_response_event {
+ struct list_head list;
+ __s32 fd;
+ struct fsnotify_event *event;
+};
+
+/*
+ * Get an fsnotify notification event if one exists and is small
+ * enough to fit in "count". Return an error pointer if the count
+ * is not large enough.
+ *
+ * Called with the group->notification_mutex held.
+ */
+static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+ size_t count)
+{
+ BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+ pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
+
+ if (fsnotify_notify_queue_is_empty(group))
+ return NULL;
+
+ if (FAN_EVENT_METADATA_LEN > count)
+ return ERR_PTR(-EINVAL);
+
+ /* held the notification_mutex the whole time, so this is the
+ * same event we peeked above */
+ return fsnotify_remove_notify_event(group);
+}
+
+static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+ int client_fd;
+ struct dentry *dentry;
+ struct vfsmount *mnt;
+ struct file *new_file;
+
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+ client_fd = get_unused_fd();
+ if (client_fd < 0)
+ return client_fd;
+
+ if (event->data_type != FSNOTIFY_EVENT_FILE) {
+ WARN_ON(1);
+ put_unused_fd(client_fd);
+ return -EINVAL;
+ }
+
+ /*
+ * we need a new file handle for the userspace program so it can read even if it was
+ * originally opened O_WRONLY.
+ */
+ dentry = dget(event->file->f_path.dentry);
+ mnt = mntget(event->file->f_path.mnt);
+ /* it's possible this event was an overflow event. in that case dentry and mnt
+ * are NULL; That's fine, just don't call dentry open */
+ if (dentry && mnt)
+ new_file = dentry_open(dentry, mnt,
+ group->fanotify_data.f_flags | FMODE_NONOTIFY,
+ current_cred());
+ else
+ new_file = ERR_PTR(-EOVERFLOW);
+ if (IS_ERR(new_file)) {
+ /*
+ * we still send an event even if we can't open the file. this
+ * can happen when say tasks are gone and we try to open their
+ * /proc files or we try to open a WRONLY file like in sysfs
+ * we just send the errno to userspace since there isn't much
+ * else we can do.
+ */
+ put_unused_fd(client_fd);
+ client_fd = PTR_ERR(new_file);
+ } else {
+ fd_install(client_fd, new_file);
+ }
+
+ return client_fd;
+}
+
+static ssize_t fill_event_metadata(struct fsnotify_group *group,
+ struct fanotify_event_metadata *metadata,
+ struct fsnotify_event *event)
+{
+ pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
+ group, metadata, event);
+
+ metadata->event_len = FAN_EVENT_METADATA_LEN;
+ metadata->vers = FANOTIFY_METADATA_VERSION;
+ metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
+ metadata->pid = pid_vnr(event->tgid);
+ metadata->fd = create_fd(group, event);
+
+ return metadata->fd;
+}
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
+ __s32 fd)
+{
+ struct fanotify_response_event *re, *return_re = NULL;
+
+ mutex_lock(&group->fanotify_data.access_mutex);
+ list_for_each_entry(re, &group->fanotify_data.access_list, list) {
+ if (re->fd != fd)
+ continue;
+
+ list_del_init(&re->list);
+ return_re = re;
+ break;
+ }
+ mutex_unlock(&group->fanotify_data.access_mutex);
+
+ pr_debug("%s: found return_re=%p\n", __func__, return_re);
+
+ return return_re;
+}
+
+static int process_access_response(struct fsnotify_group *group,
+ struct fanotify_response *response_struct)
+{
+ struct fanotify_response_event *re;
+ __s32 fd = response_struct->fd;
+ __u32 response = response_struct->response;
+
+ pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
+ fd, response);
+ /*
+ * make sure the response is valid, if invalid we do nothing and either
+ * userspace can send a valid responce or we will clean it up after the
+ * timeout
+ */
+ switch (response) {
+ case FAN_ALLOW:
+ case FAN_DENY:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (fd < 0)
+ return -EINVAL;
+
+ re = dequeue_re(group, fd);
+ if (!re)
+ return -ENOENT;
+
+ re->event->response = response;
+
+ wake_up(&group->fanotify_data.access_waitq);
+
+ kmem_cache_free(fanotify_response_event_cache, re);
+
+ return 0;
+}
+
+static int prepare_for_access_response(struct fsnotify_group *group,
+ struct fsnotify_event *event,
+ __s32 fd)
+{
+ struct fanotify_response_event *re;
+
+ if (!(event->mask & FAN_ALL_PERM_EVENTS))
+ return 0;
+
+ re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
+ if (!re)
+ return -ENOMEM;
+
+ re->event = event;
+ re->fd = fd;
+
+ mutex_lock(&group->fanotify_data.access_mutex);
+ list_add_tail(&re->list, &group->fanotify_data.access_list);
+ mutex_unlock(&group->fanotify_data.access_m