From 5f02a877638472e83cb5e335f9eec27052b1c7c2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:28 +0200 Subject: fsnotify: annotate directory entry modification events "dirent" events are referring to events that modify directory entries, such as create,delete,rename. Those events should always be reported on a watched directory, regardless if FS_EVENT_ON_CHILD is set on the watch mask. fsnotify_nameremove() and fsnotify_move() were modified to no longer set the FS_EVENT_ON_CHILD event bit. This is a semantic change to align with the "dirent" event definition. It has no effect on any existing backend, because dnotify, inotify and audit always requets the child events and fanotify does not get the delete,rename events. The fsnotify_dirent() helper is used instead of fsnotify_parent() to report a dirent event to dentry->d_parent without FS_EVENT_ON_CHILD and regardless if parent has the FS_EVENT_ON_CHILD bit set. Unlike fsnotify_parent(), fsnotify_dirent() assumes that dentry->d_name and dentry->d_parent are stable. For fsnotify_create()/fsnotify_mkdir(), this assumption is abviously correct. For fsnotify_nameremove(), it is less trivial, so we use dget_parent() and take_dentry_name_snapshot() to grab stable references. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 49 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 2ccb08cb5d6a..39b22e88423d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,8 +17,22 @@ #include #include +/* + * Notify this @dir inode about a change in the directory entry @dentry. + * + * Unlike fsnotify_parent(), the event will be reported regardless of the + * FS_EVENT_ON_CHILD mask on the parent inode. + */ +static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) +{ + return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + dentry->d_name.name, 0); +} + /* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(const struct path *path, + struct dentry *dentry, __u32 mask) { if (!dentry) dentry = path->dentry; @@ -85,8 +99,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); - __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); - __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); + __u32 old_dir_mask = FS_MOVED_FROM; + __u32 new_dir_mask = FS_MOVED_TO; const unsigned char *new_name = moved->d_name.name; if (old_dir == new_dir) @@ -128,15 +142,35 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) /* * fsnotify_nameremove - a filename was removed from a directory + * + * This is mostly called under parent vfs inode lock so name and + * dentry->d_parent should be stable. However there are some corner cases where + * inode lock is not held. So to be on the safe side and be reselient to future + * callers and out of tree users of d_delete(), we do not assume that d_parent + * and d_name are stable and we use dget_parent() and + * take_dentry_name_snapshot() to grab stable references. */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + struct dentry *parent; + struct name_snapshot name; __u32 mask = FS_DELETE; + /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ + if (IS_ROOT(dentry)) + return; + if (isdir) mask |= FS_ISDIR; - fsnotify_parent(NULL, dentry, mask); + parent = dget_parent(dentry); + take_dentry_name_snapshot(&name, dentry); + + fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, + name.name, 0); + + release_dentry_name_snapshot(&name); + dput(parent); } /* @@ -155,7 +189,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE); } /* @@ -176,12 +210,9 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - __u32 mask = (FS_CREATE | FS_ISDIR); - struct inode *d_inode = dentry->d_inode; - audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); + fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR); } /* -- cgit v1.2.3 From e220140ff6241e180d0c2fc294e61ee6bbc6a18e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:29 +0200 Subject: fsnotify: remove dirent events from FS_EVENTS_POSS_ON_CHILD mask "dirent" events are referring to events that modify directory entries, such as create,delete,rename. Those events are always be reported on a watched directory, regardless if FS_EVENT_ON_CHILD is set on the watch mask. ALL_FSNOTIFY_DIRENT_EVENTS defines all the dirent event types and those event types are removed from FS_EVENTS_POSS_ON_CHILD. That means for a directory with an inotify watch and only dirent events in the mask (i.e. create,delete,move), all children dentries will no longer have the DCACHE_FSNOTIFY_PARENT_WATCHED flag set. This will allow all events that happen on children to be optimized away in __fsnotify_parent() without the need to dereference child->d_parent->d_inode->i_fsnotify_mask. Since the dirent events are never repoted via __fsnotify_parent(), this results in no change of logic, but only an optimization. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7639774e7475..7f195d43efaf 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -59,27 +59,33 @@ * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 -/* This is a list of all events that may get sent to a parernt based on fs event - * happening to inodes inside that directory */ -#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) - #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) +/* + * Directory entry modification events - reported only to directory + * where entry is modified and not to a watching parent. + * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event + * when a directory entry inside a child subdir changes. + */ +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) + #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) +/* + * This is a list of all events that may get sent to a parent based on fs event + * happening to inodes inside that directory. + */ +#define FS_EVENTS_POSS_ON_CHILD (ALL_FSNOTIFY_PERM_EVENTS | \ + FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \ + FS_OPEN | FS_OPEN_EXEC) + /* Events that can be reported to backends */ -#define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \ - FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \ - FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \ - FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \ - FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ - FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \ - FS_OPEN_EXEC | FS_OPEN_EXEC_PERM) +#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FS_EVENTS_POSS_ON_CHILD | \ + FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \ + FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED) /* Extra flags that may be reported with event or control handling of events */ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \ -- cgit v1.2.3 From 45a9fb3725d8868a9b4192afd1a1f2bff1cc5ffb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:30 +0200 Subject: fsnotify: send all event types to super block marks So far, existence of super block marks was checked only on events with data type FSNOTIFY_EVENT_PATH. Use the super block of the "to_tell" inode to report the events of all event types to super block marks. This change has no effect on current backends. Soon, this will allow fanotify backend to receive all event types on a super block mark. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ecf09b6243d9..df06f3da166c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -328,16 +328,15 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const unsigned char *file_name, u32 cookie) { struct fsnotify_iter_info iter_info = {}; - struct super_block *sb = NULL; + struct super_block *sb = to_tell->i_sb; struct mount *mnt = NULL; - __u32 mnt_or_sb_mask = 0; + __u32 mnt_or_sb_mask = sb->s_fsnotify_mask; int ret = 0; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); if (data_is == FSNOTIFY_EVENT_PATH) { mnt = real_mount(((const struct path *)data)->mnt); - sb = mnt->mnt.mnt_sb; - mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask; + mnt_or_sb_mask |= mnt->mnt_fsnotify_mask; } /* An event "on child" is not intended for a mount/sb mark */ if (mask & FS_EVENT_ON_CHILD) @@ -350,8 +349,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (!to_tell->i_fsnotify_marks && - (!mnt || (!mnt->mnt_fsnotify_marks && !sb->s_fsnotify_marks))) + if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks && + (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks @@ -366,11 +365,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&to_tell->i_fsnotify_marks); + iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = + fsnotify_first_mark(&sb->s_fsnotify_marks); if (mnt) { iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] = fsnotify_first_mark(&mnt->mnt_fsnotify_marks); - iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] = - fsnotify_first_mark(&sb->s_fsnotify_marks); } /* -- cgit v1.2.3 From a0a92d261f2922f4b5d2c0a98d6c41a89c7f5edd Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:31 +0200 Subject: fsnotify: move mask out of struct fsnotify_event Common fsnotify_event helpers have no need for the mask field. It is only used by backend code, so move the field out of the abstract fsnotify_event struct and into the concrete backend event structs. This change packs struct inotify_event_info better on 64bit machine and will allow us to cram some more fields into struct fanotify_event_info. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 11 +++++++---- fs/notify/fanotify/fanotify.h | 1 + fs/notify/fanotify/fanotify_user.c | 10 +++++----- fs/notify/inotify/inotify.h | 1 + fs/notify/inotify/inotify_fsnotify.c | 9 +++++---- fs/notify/inotify/inotify_user.c | 5 +++-- fs/notify/notification.c | 22 +--------------------- include/linux/fsnotify_backend.h | 10 ++++++---- 8 files changed, 29 insertions(+), 40 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 3723f3d18d20..98197802bbfb 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -36,20 +36,22 @@ static bool should_merge(struct fsnotify_event *old_fsn, static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; + struct fanotify_event_info *new; pr_debug("%s: list=%p event=%p\n", __func__, list, event); + new = FANOTIFY_E(event); /* * Don't merge a permission event with any other event so that we know * the event structure we have created in fanotify_handle_event() is the * one we should check for permission response. */ - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(new->mask)) return 0; list_for_each_entry_reverse(test_event, list, list) { if (should_merge(test_event, event)) { - test_event->mask |= event->mask; + FANOTIFY_E(test_event)->mask |= new->mask; return 1; } } @@ -173,7 +175,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, if (!event) goto out; init: __maybe_unused - fsnotify_init_event(&event->fse, inode, mask); + fsnotify_init_event(&event->fse, inode); + event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); else @@ -280,7 +283,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) event = FANOTIFY_E(fsn_event); path_put(&event->path); put_pid(event->pid); - if (fanotify_is_perm_event(fsn_event->mask)) { + if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PE(fsn_event)); return; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index ea05b8a401e7..e630d787d4c3 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -14,6 +14,7 @@ extern struct kmem_cache *fanotify_perm_event_cachep; */ struct fanotify_event_info { struct fsnotify_event fse; + u32 mask; /* * We hold ref to this path so it may be dereferenced at any point * during this object's lifetime diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9c870b0d2b56..dea47d07cc29 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -131,9 +131,9 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; metadata->reserved = 0; - metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata->mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->pid); - if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW)) + if (unlikely(event->mask & FAN_Q_OVERFLOW)) metadata->fd = FAN_NOFD; else { metadata->fd = create_fd(group, event, file); @@ -230,7 +230,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fanotify_event_metadata.event_len)) goto out_close_fd; - if (fanotify_is_perm_event(event->mask)) + if (fanotify_is_perm_event(FANOTIFY_E(event)->mask)) FANOTIFY_PE(event)->fd = fd; if (fd != FAN_NOFD) @@ -316,7 +316,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(kevent->mask)) { + if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { fsnotify_destroy_event(group, kevent); } else { if (ret <= 0) { @@ -401,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ while (!fsnotify_notify_queue_is_empty(group)) { fsn_event = fsnotify_remove_first_event(group); - if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) { + if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); fsnotify_destroy_event(group, fsn_event); spin_lock(&group->notification_lock); diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7e4578d35b61..74ae60305189 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -5,6 +5,7 @@ struct inotify_event_info { struct fsnotify_event fse; + u32 mask; int wd; u32 sync_cookie; int name_len; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index f4184b4f3815..fe97299975f2 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn, { struct inotify_event_info *old, *new; - if (old_fsn->mask & FS_IN_IGNORED) - return false; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); - if ((old_fsn->mask == new_fsn->mask) && + if (old->mask & FS_IN_IGNORED) + return false; + if ((old->mask == new->mask) && (old_fsn->inode == new_fsn->inode) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) @@ -114,7 +114,8 @@ int inotify_handle_event(struct fsnotify_group *group, } fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode, mask); + fsnotify_init_event(fsn_event, inode); + event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; event->name_len = len; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 798f1253141a..e2901fbb9f76 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, */ pad_name_len = round_event_name_len(fsn_event); inotify_event.len = pad_name_len; - inotify_event.mask = inotify_mask_to_arg(fsn_event->mask); + inotify_event.mask = inotify_mask_to_arg(event->mask); inotify_event.wd = event->wd; inotify_event.cookie = event->sync_cookie; @@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + fsnotify_init_event(group->overflow_event, NULL); + oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; oevent->name_len = 0; diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3c3e36745f59..027d5d5bb90e 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group, struct fsnotify_event *event) { /* Overflow events are per-group and we don't want to free them */ - if (!event || event->mask == FS_Q_OVERFLOW) + if (!event || event == group->overflow_event) return; /* * If the event is still queued, we have a problem... Do an unreliable @@ -194,23 +194,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group) } spin_unlock(&group->notification_lock); } - -/* - * fsnotify_create_event - Allocate a new event which will be sent to each - * group's handle_event function if the group was interested in this - * particular event. - * - * @inode the inode which is supposed to receive the event (sometimes a - * parent of the inode to which the event happened. - * @mask what actually happened. - * @data pointer to the object which was actually affected - * @data_type flag indication if the data is a file, path, inode, nothing... - * @name the filename, if available - */ -void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode, - u32 mask) -{ - INIT_LIST_HEAD(&event->list); - event->inode = inode; - event->mask = mask; -} diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7f195d43efaf..1e4b88bd1443 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -135,7 +135,6 @@ struct fsnotify_event { struct list_head list; /* inode may ONLY be dereferenced during handle_event(). */ struct inode *inode; /* either the inode the event happened to or its parent */ - u32 mask; /* the type of access, bitwise OR for FS_* event types */ }; /* @@ -485,9 +484,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); -/* put here because inotify does some weird stuff when destroying watches */ -extern void fsnotify_init_event(struct fsnotify_event *event, - struct inode *to_tell, u32 mask); +static inline void fsnotify_init_event(struct fsnotify_event *event, + struct inode *inode) +{ + INIT_LIST_HEAD(&event->list); + event->inode = inode; +} #else -- cgit v1.2.3 From 33913997d5c06781c162952c6e5017131fc5aa19 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:32 +0200 Subject: fanotify: rename struct fanotify_{,perm_}event_info struct fanotify_event_info "inherits" from struct fsnotify_event and therefore a more appropriate (and short) name for it is fanotify_event. Same for struct fanotify_perm_event_info, which now "inherits" from struct fanotify_event. We plan to reuse the name struct fanotify_event_info for user visible event info record format. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 16 ++++++++-------- fs/notify/fanotify/fanotify.h | 16 ++++++++-------- fs/notify/fanotify/fanotify_user.c | 20 ++++++++++---------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 98197802bbfb..d8e3b6e50844 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -19,7 +19,7 @@ static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { - struct fanotify_event_info *old, *new; + struct fanotify_event *old, *new; pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn); old = FANOTIFY_E(old_fsn); @@ -36,7 +36,7 @@ static bool should_merge(struct fsnotify_event *old_fsn, static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) { struct fsnotify_event *test_event; - struct fanotify_event_info *new; + struct fanotify_event *new; pr_debug("%s: list=%p event=%p\n", __func__, list, event); new = FANOTIFY_E(event); @@ -60,7 +60,7 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) } static int fanotify_get_response(struct fsnotify_group *group, - struct fanotify_perm_event_info *event, + struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { int ret; @@ -143,11 +143,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, ~marks_ignored_mask; } -struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, +struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const struct path *path) { - struct fanotify_event_info *event = NULL; + struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; /* @@ -162,7 +162,7 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, memalloc_use_memcg(group->memcg); if (fanotify_is_perm_event(mask)) { - struct fanotify_perm_event_info *pevent; + struct fanotify_perm_event *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) @@ -200,7 +200,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info) { int ret = 0; - struct fanotify_event_info *event; + struct fanotify_event *event; struct fsnotify_event *fsn_event; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); @@ -278,7 +278,7 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) static void fanotify_free_event(struct fsnotify_event *fsn_event) { - struct fanotify_event_info *event; + struct fanotify_event *event; event = FANOTIFY_E(fsn_event); path_put(&event->path); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index e630d787d4c3..898b5b2bc1c7 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -12,7 +12,7 @@ extern struct kmem_cache *fanotify_perm_event_cachep; * fanotify_handle_event() and freed when the information is retrieved by * userspace */ -struct fanotify_event_info { +struct fanotify_event { struct fsnotify_event fse; u32 mask; /* @@ -30,16 +30,16 @@ struct fanotify_event_info { * group->notification_list to group->fanotify_data.access_list to wait for * user response. */ -struct fanotify_perm_event_info { - struct fanotify_event_info fae; +struct fanotify_perm_event { + struct fanotify_event fae; int response; /* userspace answer to question */ int fd; /* fd we passed to userspace for this event */ }; -static inline struct fanotify_perm_event_info * +static inline struct fanotify_perm_event * FANOTIFY_PE(struct fsnotify_event *fse) { - return container_of(fse, struct fanotify_perm_event_info, fae.fse); + return container_of(fse, struct fanotify_perm_event, fae.fse); } static inline bool fanotify_is_perm_event(u32 mask) @@ -48,11 +48,11 @@ static inline bool fanotify_is_perm_event(u32 mask) mask & FANOTIFY_PERM_EVENTS; } -static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) +static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { - return container_of(fse, struct fanotify_event_info, fse); + return container_of(fse, struct fanotify_event, fse); } -struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group, +struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const struct path *path); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index dea47d07cc29..55cd87b0cc26 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -73,7 +73,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, } static int create_fd(struct fsnotify_group *group, - struct fanotify_event_info *event, + struct fanotify_event *event, struct file **file) { int client_fd; @@ -120,13 +120,13 @@ static int fill_event_metadata(struct fsnotify_group *group, struct file **file) { int ret = 0; - struct fanotify_event_info *event; + struct fanotify_event *event; pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group, metadata, fsn_event); *file = NULL; - event = container_of(fsn_event, struct fanotify_event_info, fse); + event = container_of(fsn_event, struct fanotify_event, fse); metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; @@ -144,10 +144,10 @@ static int fill_event_metadata(struct fsnotify_group *group, return ret; } -static struct fanotify_perm_event_info *dequeue_event( +static struct fanotify_perm_event *dequeue_event( struct fsnotify_group *group, int fd) { - struct fanotify_perm_event_info *event, *return_e = NULL; + struct fanotify_perm_event *event, *return_e = NULL; spin_lock(&group->notification_lock); list_for_each_entry(event, &group->fanotify_data.access_list, @@ -169,7 +169,7 @@ static struct fanotify_perm_event_info *dequeue_event( static int process_access_response(struct fsnotify_group *group, struct fanotify_response *response_struct) { - struct fanotify_perm_event_info *event; + struct fanotify_perm_event *event; int fd = response_struct->fd; int response = response_struct->response; @@ -370,7 +370,7 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fanotify_perm_event_info *event, *next; + struct fanotify_perm_event *event, *next; struct fsnotify_event *fsn_event; /* @@ -688,7 +688,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct fsnotify_group *group; int f_flags, fd; struct user_struct *user; - struct fanotify_event_info *oevent; + struct fanotify_event *oevent; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -955,10 +955,10 @@ static int __init fanotify_user_setup(void) fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); + fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = - KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC); + KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } return 0; -- cgit v1.2.3 From bb2f7b4542c7a1d023d516af37dc70bb49db0438 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:33 +0200 Subject: fanotify: open code fill_event_metadata() The helper is quite trivial and open coding it will make it easier to implement copying event fid info to user. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 71 +++++++++++++++----------------------- 1 file changed, 27 insertions(+), 44 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 55cd87b0cc26..096503bd0edb 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -114,36 +114,6 @@ static int create_fd(struct fsnotify_group *group, return client_fd; } -static int fill_event_metadata(struct fsnotify_group *group, - struct fanotify_event_metadata *metadata, - struct fsnotify_event *fsn_event, - struct file **file) -{ - int ret = 0; - struct fanotify_event *event; - - pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, - group, metadata, fsn_event); - - *file = NULL; - event = container_of(fsn_event, struct fanotify_event, fse); - metadata->event_len = FAN_EVENT_METADATA_LEN; - metadata->metadata_len = FAN_EVENT_METADATA_LEN; - metadata->vers = FANOTIFY_METADATA_VERSION; - metadata->reserved = 0; - metadata->mask = event->mask & FANOTIFY_OUTGOING_EVENTS; - metadata->pid = pid_vnr(event->pid); - if (unlikely(event->mask & FAN_Q_OVERFLOW)) - metadata->fd = FAN_NOFD; - else { - metadata->fd = create_fd(group, event, file); - if (metadata->fd < 0) - ret = metadata->fd; - } - - return ret; -} - static struct fanotify_perm_event *dequeue_event( struct fsnotify_group *group, int fd) { @@ -205,37 +175,50 @@ static int process_access_response(struct fsnotify_group *group, } static ssize_t copy_event_to_user(struct fsnotify_group *group, - struct fsnotify_event *event, + struct fsnotify_event *fsn_event, char __user *buf, size_t count) { - struct fanotify_event_metadata fanotify_event_metadata; - struct file *f; + struct fanotify_event_metadata metadata; + struct fanotify_event *event; + struct file *f = NULL; int fd, ret; - pr_debug("%s: group=%p event=%p\n", __func__, group, event); + pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); - ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); - if (ret < 0) - return ret; + event = container_of(fsn_event, struct fanotify_event, fse); + metadata.event_len = FAN_EVENT_METADATA_LEN; + metadata.metadata_len = FAN_EVENT_METADATA_LEN; + metadata.vers = FANOTIFY_METADATA_VERSION; + metadata.reserved = 0; + metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; + metadata.pid = pid_vnr(event->pid); + + if (unlikely(event->mask & FAN_Q_OVERFLOW)) { + fd = FAN_NOFD; + } else { + fd = create_fd(group, event, &f); + if (fd < 0) + return fd; + } + metadata.fd = fd; - fd = fanotify_event_metadata.fd; ret = -EFAULT; /* * Sanity check copy size in case get_one_event() and * fill_event_metadata() event_len sizes ever get out of sync. */ - if (WARN_ON_ONCE(fanotify_event_metadata.event_len > count)) + if (WARN_ON_ONCE(metadata.event_len > count)) goto out_close_fd; - if (copy_to_user(buf, &fanotify_event_metadata, - fanotify_event_metadata.event_len)) + + if (copy_to_user(buf, &metadata, metadata.event_len)) goto out_close_fd; - if (fanotify_is_perm_event(FANOTIFY_E(event)->mask)) - FANOTIFY_PE(event)->fd = fd; + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PE(fsn_event)->fd = fd; if (fd != FAN_NOFD) fd_install(fd, f); - return fanotify_event_metadata.event_len; + return metadata.event_len; out_close_fd: if (fd != FAN_NOFD) { -- cgit v1.2.3 From e9e0c8903009477b630e37a8b6364b26a00720da Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:34 +0200 Subject: fanotify: encode file identifier for FAN_REPORT_FID When user requests the flag FAN_REPORT_FID in fanotify_init(), a unique file identifier of the event target object will be reported with the event. The file identifier includes the filesystem's fsid (i.e. from statfs(2)) and an NFS file handle of the file (i.e. from name_to_handle_at(2)). The file identifier makes holding the path reference and passing a file descriptor to user redundant, so those are disabled in a group with FAN_REPORT_FID. Encode fid and store it in event for a group with FAN_REPORT_FID. Up to 12 bytes of file handle on 32bit arch (16 bytes on 64bit arch) are stored inline in fanotify_event struct. Larger file handles are stored in an external allocated buffer. On failure to encode fid, we print a warning and queue the event without the fid information. [JK: Fold part of later patched into this one to use exportfs_encode_inode_fh() right away] Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 84 +++++++++++++++++++++++++++++++++++--- fs/notify/fanotify/fanotify.h | 78 +++++++++++++++++++++++++++++++++-- fs/notify/fanotify/fanotify_user.c | 6 +-- include/uapi/linux/fanotify.h | 1 + 4 files changed, 156 insertions(+), 13 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d8e3b6e50844..dd33227e518a 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "fanotify.h" @@ -25,10 +26,18 @@ static bool should_merge(struct fsnotify_event *old_fsn, old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode == new_fsn->inode && old->pid == new->pid && - old->path.mnt == new->path.mnt && - old->path.dentry == new->path.dentry) - return true; + if (old_fsn->inode != new_fsn->inode || old->pid != new->pid || + old->fh_type != new->fh_type || old->fh_len != new->fh_len) + return false; + + if (fanotify_event_has_path(old)) { + return old->path.mnt == new->path.mnt && + old->path.dentry == new->path.dentry; + } else if (fanotify_event_has_fid(old)) { + return fanotify_fid_equal(&old->fid, &new->fid, old->fh_len); + } + + /* Do not merge events if we failed to encode fid */ return false; } @@ -143,6 +152,60 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, ~marks_ignored_mask; } +static int fanotify_encode_fid(struct fanotify_event *event, + const struct path *path, gfp_t gfp) +{ + struct fanotify_fid *fid = &event->fid; + int dwords, bytes = 0; + struct kstatfs stat; + int err, type; + + stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0; + fid->ext_fh = NULL; + dwords = 0; + err = -ENOENT; + type = exportfs_encode_inode_fh(d_inode(path->dentry), NULL, &dwords, + NULL); + if (!dwords) + goto out_err; + + err = vfs_statfs(path, &stat); + if (err) + goto out_err; + + bytes = dwords << 2; + if (bytes > FANOTIFY_INLINE_FH_LEN) { + /* Treat failure to allocate fh as failure to allocate event */ + err = -ENOMEM; + fid->ext_fh = kmalloc(bytes, gfp); + if (!fid->ext_fh) + goto out_err; + } + + type = exportfs_encode_inode_fh(d_inode(path->dentry), + fanotify_fid_fh(fid, bytes), &dwords, + NULL); + err = -EINVAL; + if (!type || type == FILEID_INVALID || bytes != dwords << 2) + goto out_err; + + fid->fsid = stat.f_fsid; + event->fh_len = bytes; + + return type; + +out_err: + pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " + "type=%d, bytes=%d, err=%i)\n", + stat.f_fsid.val[0], stat.f_fsid.val[1], + type, bytes, err); + kfree(fid->ext_fh); + fid->ext_fh = NULL; + event->fh_len = 0; + + return FILEID_INVALID; +} + struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const struct path *path) @@ -181,10 +244,16 @@ init: __maybe_unused event->pid = get_pid(task_pid(current)); else event->pid = get_pid(task_tgid(current)); - if (path) { + event->fh_len = 0; + if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + /* Report the event without a file identifier on encode error */ + event->fh_type = fanotify_encode_fid(event, path, gfp); + } else if (path) { + event->fh_type = FILEID_ROOT; event->path = *path; path_get(&event->path); } else { + event->fh_type = FILEID_INVALID; event->path.mnt = NULL; event->path.dentry = NULL; } @@ -281,7 +350,10 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) struct fanotify_event *event; event = FANOTIFY_E(fsn_event); - path_put(&event->path); + if (fanotify_event_has_path(event)) + path_put(&event->path); + else if (fanotify_event_has_ext_fh(event)) + kfree(event->fid.ext_fh); put_pid(event->pid); if (fanotify_is_perm_event(event->mask)) { kmem_cache_free(fanotify_perm_event_cachep, diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 898b5b2bc1c7..271482fb9611 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -2,11 +2,49 @@ #include #include #include +#include extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +/* + * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation). + * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and + * fh_* fields increase the size of fanotify_event by another 4 bytes. + * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and + * fh_* fields are packed in a hole after mask. + */ +#if BITS_PER_LONG == 32 +#define FANOTIFY_INLINE_FH_LEN (3 << 2) +#else +#define FANOTIFY_INLINE_FH_LEN (4 << 2) +#endif + +struct fanotify_fid { + __kernel_fsid_t fsid; + union { + unsigned char fh[FANOTIFY_INLINE_FH_LEN]; + unsigned char *ext_fh; + }; +}; + +static inline void *fanotify_fid_fh(struct fanotify_fid *fid, + unsigned int fh_len) +{ + return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh; +} + +static inline bool fanotify_fid_equal(struct fanotify_fid *fid1, + struct fanotify_fid *fid2, + unsigned int fh_len) +{ + return fid1->fsid.val[0] == fid2->fsid.val[0] && + fid1->fsid.val[1] == fid2->fsid.val[1] && + !memcmp(fanotify_fid_fh(fid1, fh_len), + fanotify_fid_fh(fid2, fh_len), fh_len); +} + /* * Structure for normal fanotify events. It gets allocated in * fanotify_handle_event() and freed when the information is retrieved by @@ -16,13 +54,47 @@ struct fanotify_event { struct fsnotify_event fse; u32 mask; /* - * We hold ref to this path so it may be dereferenced at any point - * during this object's lifetime + * Those fields are outside fanotify_fid to pack fanotify_event nicely + * on 64bit arch and to use fh_type as an indication of whether path + * or fid are used in the union: + * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither. */ - struct path path; + u8 fh_type; + u8 fh_len; + u16 pad; + union { + /* + * We hold ref to this path so it may be dereferenced at any + * point during this object's lifetime + */ + struct path path; + /* + * With FAN_REPORT_FID, we do not hold any reference on the + * victim object. Instead we store its NFS file handle and its + * filesystem's fsid as a unique identifier. + */ + struct fanotify_fid fid; + }; struct pid *pid; }; +static inline bool fanotify_event_has_path(struct fanotify_event *event) +{ + return event->fh_type == FILEID_ROOT; +} + +static inline bool fanotify_event_has_fid(struct fanotify_event *event) +{ + return event->fh_type != FILEID_ROOT && + event->fh_type != FILEID_INVALID; +} + +static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event) +{ + return fanotify_event_has_fid(event) && + event->fh_len > FANOTIFY_INLINE_FH_LEN; +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 096503bd0edb..c965fcf4979e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -181,7 +181,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fanotify_event_metadata metadata; struct fanotify_event *event; struct file *f = NULL; - int fd, ret; + int ret, fd = FAN_NOFD; pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); @@ -193,9 +193,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); - if (unlikely(event->mask & FAN_Q_OVERFLOW)) { - fd = FAN_NOFD; - } else { + if (fanotify_event_has_path(event)) { fd = create_fd(group, event, &f); if (fd < 0) return fd; diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 909c98fcace2..d07f3cbc2786 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -44,6 +44,7 @@ /* Flags to determine fanotify event format */ #define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ -- cgit v1.2.3 From 5e469c830fdb5a1ebaa69b375b87f583326fd296 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:35 +0200 Subject: fanotify: copy event fid info to user If group requested FAN_REPORT_FID and event has file identifier, copy that information to user reading the event after event metadata. fid information is formatted as struct fanotify_event_info_fid that includes a generic header struct fanotify_event_info_header, so that other info types could be defined in the future using the same header. metadata->event_len includes the length of the fid information. The fid information includes the filesystem's fsid (see statfs(2)) followed by an NFS file handle of the file that could be passed as an argument to open_by_handle_at(2). Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.h | 5 +++ fs/notify/fanotify/fanotify_user.c | 82 +++++++++++++++++++++++++++++++++++--- include/uapi/linux/fanotify.h | 20 ++++++++++ 3 files changed, 102 insertions(+), 5 deletions(-) diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 271482fb9611..4aafc7144c3d 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -95,6 +95,11 @@ static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event) event->fh_len > FANOTIFY_INLINE_FH_LEN; } +static inline void *fanotify_event_fh(struct fanotify_event *event) +{ + return fanotify_fid_fh(&event->fid, event->fh_len); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c965fcf4979e..cd82dd713c91 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -47,6 +47,18 @@ struct kmem_cache *fanotify_mark_cache __read_mostly; struct kmem_cache *fanotify_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; +#define FANOTIFY_EVENT_ALIGN 4 + +static int fanotify_event_info_len(struct fanotify_event *event) +{ + if (!fanotify_event_has_fid(event)) + return 0; + + return roundup(sizeof(struct fanotify_event_info_fid) + + sizeof(struct file_handle) + event->fh_len, + FANOTIFY_EVENT_ALIGN); +} + /* * Get an fsnotify notification event if one exists and is small * enough to fit in "count". Return an error pointer if the count @@ -57,6 +69,9 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly; static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t count) { + size_t event_size = FAN_EVENT_METADATA_LEN; + struct fanotify_event *event; + assert_spin_locked(&group->notification_lock); pr_debug("%s: group=%p count=%zd\n", __func__, group, count); @@ -64,11 +79,18 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, if (fsnotify_notify_queue_is_empty(group)) return NULL; - if (FAN_EVENT_METADATA_LEN > count) + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + event = FANOTIFY_E(fsnotify_peek_first_event(group)); + event_size += fanotify_event_info_len(event); + } + + if (event_size > count) return ERR_PTR(-EINVAL); - /* held the notification_lock the whole time, so this is the - * same event we peeked above */ + /* + * Held the notification_lock the whole time, so this is the + * same event we peeked above + */ return fsnotify_remove_first_event(group); } @@ -174,6 +196,48 @@ static int process_access_response(struct fsnotify_group *group, return 0; } +static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) +{ + struct fanotify_event_info_fid info = { }; + struct file_handle handle = { }; + size_t fh_len = event->fh_len; + size_t len = fanotify_event_info_len(event); + + if (!len) + return 0; + + if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) + return -EFAULT; + + /* Copy event info fid header followed by vaiable sized file handle */ + info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; + info.hdr.len = len; + info.fsid = event->fid.fsid; + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + buf += sizeof(info); + len -= sizeof(info); + handle.handle_type = event->fh_type; + handle.handle_bytes = fh_len; + if (copy_to_user(buf, &handle, sizeof(handle))) + return -EFAULT; + + buf += sizeof(handle); + len -= sizeof(handle); + if (copy_to_user(buf, fanotify_event_fh(event), fh_len)) + return -EFAULT; + + /* Pad with 0's */ + buf += fh_len; + len -= fh_len; + WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); + if (len > 0 && clear_user(buf, len)) + return -EFAULT; + + return 0; +} + static ssize_t copy_event_to_user(struct fsnotify_group *group, struct fsnotify_event *fsn_event, char __user *buf, size_t count) @@ -197,6 +261,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, fd = create_fd(group, event, &f); if (fd < 0) return fd; + } else if (fanotify_event_has_fid(event)) { + metadata.event_len += fanotify_event_info_len(event); } metadata.fd = fd; @@ -208,14 +274,20 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (WARN_ON_ONCE(metadata.event_len > count)) goto out_close_fd; - if (copy_to_user(buf, &metadata, metadata.event_len)) + if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; if (fanotify_is_perm_event(event->mask)) FANOTIFY_PE(fsn_event)->fd = fd; - if (fd != FAN_NOFD) + if (fanotify_event_has_path(event)) { fd_install(fd, f); + } else if (fanotify_event_has_fid(event)) { + ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); + if (ret < 0) + return ret; + } + return metadata.event_len; out_close_fd: diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index d07f3cbc2786..959ae2bdc7ca 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -107,6 +107,26 @@ struct fanotify_event_metadata { __s32 pid; }; +#define FAN_EVENT_INFO_TYPE_FID 1 + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* Unique file identifier info record */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[0]; +}; + struct fanotify_response { __s32 fd; __u32 response; -- cgit v1.2.3 From a8b13aa20afb69161b5123b4f1acc7ea0a03d360 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:36 +0200 Subject: fanotify: enable FAN_REPORT_FID init flag When setting up an fanotify listener, user may request to get fid information in event instead of an open file descriptor. The fid obtained with event on a watched object contains the file handle returned by name_to_handle_at(2) and fsid returned by statfs(2). Restrict FAN_REPORT_FID to class FAN_CLASS_NOTIF, because we have have no good reason to support reporting fid on permission events. When setting a mark, we need to make sure that the filesystem supports encoding file handles with name_to_handle_at(2) and that statfs(2) encodes a non-zero fsid. Cc: Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 61 +++++++++++++++++++++++++++++++++++++- include/linux/fanotify.h | 2 +- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index cd82dd713c91..1638c171ca82 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include @@ -768,6 +770,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) return -EINVAL; } + if ((flags & FAN_REPORT_FID) && + (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF) + return -EINVAL; + user = get_current_user(); if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) { free_uid(user); @@ -854,6 +860,52 @@ out_destroy_group: return fd; } +/* Check if filesystem can encode a unique fid */ +static int fanotify_test_fid(struct path *path) +{ + struct kstatfs stat, root_stat; + struct path root = { + .mnt = path->mnt, + .dentry = path->dentry->d_sb->s_root, + }; + int err; + + /* + * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). + */ + err = vfs_statfs(path, &stat); + if (err) + return err; + + if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1]) + return -ENODEV; + + /* + * Make sure path is not inside a filesystem subvolume (e.g. btrfs) + * which uses a different fsid than sb root. + */ + err = vfs_statfs(&root, &root_stat); + if (err) + return err; + + if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] || + root_stat.f_fsid.val[1] != stat.f_fsid.val[1]) + return -EXDEV; + + /* + * We need to make sure that the file system supports at least + * encoding a file handle so user can use name_to_handle_at() to + * compare fid returned with event to the file handle of watched + * objects. However, name_to_handle_at() requires that the + * filesystem also supports decoding file handles. + */ + if (!path->dentry->d_sb->s_export_op || + !path->dentry->d_sb->s_export_op->fh_to_dentry) + return -EOPNOTSUPP; + + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -939,6 +991,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + ret = fanotify_test_fid(&path); + if (ret) + goto path_put_and_out; + } + /* inode held in place by reference to path; group by fget on fd */ if (mark_type == FAN_MARK_INODE) inode = path.dentry->d_inode; @@ -967,6 +1025,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, ret = -EINVAL; } +path_put_and_out: path_put(&path); fput_and_out: fdput(f); @@ -1003,7 +1062,7 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, */ static int __init fanotify_user_setup(void) { - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9); fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 9e2142795335..f59be967f72b 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -19,7 +19,7 @@ FAN_CLASS_PRE_CONTENT) #define FANOTIFY_INIT_FLAGS (FANOTIFY_CLASS_BITS | \ - FAN_REPORT_TID | \ + FAN_REPORT_TID | FAN_REPORT_FID | \ FAN_CLOEXEC | FAN_NONBLOCK | \ FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS) -- cgit v1.2.3 From 77115225acc67d9ac4b15f04dd138006b9cd1ef2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 10 Jan 2019 19:04:37 +0200 Subject: fanotify: cache fsid in fsnotify_mark_connector For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on every event. To avoid having to call vfs_statfs() on every event to get fsid, we store the fsid in fsnotify_mark_connector on the first time we add a mark and on handle event we use the cached fsid. Subsequent calls to add mark on the same object are expected to pass the same fsid, so the call will fail on cached fsid mismatch. If an event is reported on several mark types (inode, mount, filesystem), all connectors should already have the same fsid, so we use the cached fsid from the first connector. [JK: Simplify code flow around fanotify_get_fid() make fsid argument of fsnotify_add_mark_locked() unconditional] Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 59 ++++++++++++++++++++++++------------ fs/notify/fanotify/fanotify.h | 5 +-- fs/notify/fanotify/fanotify_user.c | 62 +++++++++++++++++++++++--------------- fs/notify/mark.c | 42 +++++++++++++++++++++----- include/linux/fsnotify_backend.h | 18 ++++++++--- 5 files changed, 128 insertions(+), 58 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index dd33227e518a..555831603637 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -153,26 +153,20 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info, } static int fanotify_encode_fid(struct fanotify_event *event, - const struct path *path, gfp_t gfp) + struct inode *inode, gfp_t gfp, + __kernel_fsid_t *fsid) { struct fanotify_fid *fid = &event->fid; int dwords, bytes = 0; - struct kstatfs stat; int err, type; - stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0; fid->ext_fh = NULL; dwords = 0; err = -ENOENT; - type = exportfs_encode_inode_fh(d_inode(path->dentry), NULL, &dwords, - NULL); + type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); if (!dwords) goto out_err; - err = vfs_statfs(path, &stat); - if (err) - goto out_err; - bytes = dwords << 2; if (bytes > FANOTIFY_INLINE_FH_LEN) { /* Treat failure to allocate fh as failure to allocate event */ @@ -182,14 +176,13 @@ static int fanotify_encode_fid(struct fanotify_event *event, goto out_err; } - type = exportfs_encode_inode_fh(d_inode(path->dentry), - fanotify_fid_fh(fid, bytes), &dwords, - NULL); + type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes), + &dwords, NULL); err = -EINVAL; if (!type || type == FILEID_INVALID || bytes != dwords << 2) goto out_err; - fid->fsid = stat.f_fsid; + fid->fsid = *fsid; event->fh_len = bytes; return type; @@ -197,8 +190,7 @@ static int fanotify_encode_fid(struct fanotify_event *event, out_err: pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " "type=%d, bytes=%d, err=%i)\n", - stat.f_fsid.val[0], stat.f_fsid.val[1], - type, bytes, err); + fsid->val[0], fsid->val[1], type, bytes, err); kfree(fid->ext_fh); fid->ext_fh = NULL; event->fh_len = 0; @@ -207,8 +199,9 @@ out_err: } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path) + struct inode *inode, u32 mask, + const struct path *path, + __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; @@ -247,7 +240,8 @@ init: __maybe_unused event->fh_len = 0; if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { /* Report the event without a file identifier on encode error */ - event->fh_type = fanotify_encode_fid(event, path, gfp); + event->fh_type = fanotify_encode_fid(event, + d_inode(path->dentry), gfp, fsid); } else if (path) { event->fh_type = FILEID_ROOT; event->path = *path; @@ -262,6 +256,29 @@ out: return event; } +/* + * Get cached fsid of the filesystem containing the object from any connector. + * All connectors are supposed to have the same fsid, but we do not verify that + * here. + */ +static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) +{ + int type; + __kernel_fsid_t fsid = {}; + + fsnotify_foreach_obj_type(type) { + if (!fsnotify_iter_should_report_type(iter_info, type)) + continue; + + fsid = iter_info->marks[type]->connector->fsid; + if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) + continue; + return fsid; + } + + return fsid; +} + static int fanotify_handle_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, @@ -271,6 +288,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, int ret = 0; struct fanotify_event *event; struct fsnotify_event *fsn_event; + __kernel_fsid_t fsid = {}; BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); @@ -303,7 +321,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, return 0; } - event = fanotify_alloc_event(group, inode, mask, data); + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) + fsid = fanotify_get_fsid(iter_info); + + event = fanotify_alloc_event(group, inode, mask, data, &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4aafc7144c3d..5b072afa4e19 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -131,5 +131,6 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, - struct inode *inode, u32 mask, - const struct path *path); + struct inode *inode, u32 mask, + const struct path *path, + __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 1638c171ca82..603419ce096f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -653,7 +653,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, - unsigned int type) + unsigned int type, + __kernel_fsid_t *fsid) { struct fsnotify_mark *mark; int ret; @@ -666,7 +667,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, return ERR_PTR(-ENOMEM); fsnotify_init_mark(mark, group); - ret = fsnotify_add_mark_locked(mark, connp, type, 0); + ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -678,7 +679,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, static int fanotify_add_mark(struct fsnotify_group *group, fsnotify_connp_t *connp, unsigned int type, - __u32 mask, unsigned int flags) + __u32 mask, unsigned int flags, + __kernel_fsid_t *fsid) { struct fsnotify_mark *fsn_mark; __u32 added; @@ -686,7 +688,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, mutex_lock(&group->mark_mutex); fsn_mark = fsnotify_find_mark(connp, group); if (!fsn_mark) { - fsn_mark = fanotify_add_new_mark(group, connp, type); + fsn_mark = fanotify_add_new_mark(group, connp, type, fsid); if (IS_ERR(fsn_mark)) { mutex_unlock(&group->mark_mutex); return PTR_ERR(fsn_mark); @@ -703,23 +705,23 @@ static int fanotify_add_mark(struct fsnotify_group *group, static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags); + FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid); } static int fanotify_add_sb_mark(struct fsnotify_group *group, - struct super_block *sb, __u32 mask, - unsigned int flags) + struct super_block *sb, __u32 mask, + unsigned int flags, __kernel_fsid_t *fsid) { return fanotify_add_mark(group, &sb->s_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_SB, mask, flags); + FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid); } static int fanotify_add_inode_mark(struct fsnotify_group *group, struct inode *inode, __u32 mask, - unsigned int flags) + unsigned int flags, __kernel_fsid_t *fsid) { pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); @@ -734,7 +736,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; return fanotify_add_mark(group, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, mask, flags); + FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid); } /* fanotify syscalls */ @@ -798,7 +800,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) atomic_inc(&user->fanotify_listeners); group->memcg = get_mem_cgroup_from_mm(current->mm); - oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL); + oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; @@ -861,9 +863,9 @@ out_destroy_group: } /* Check if filesystem can encode a unique fid */ -static int fanotify_test_fid(struct path *path) +static int fanotify_test_fid(struct path *path, struct kstatfs *stat) { - struct kstatfs stat, root_stat; + struct kstatfs root_stat; struct path root = { .mnt = path->mnt, .dentry = path->dentry->d_sb->s_root, @@ -873,11 +875,11 @@ static int fanotify_test_fid(struct path *path) /* * Make sure path is not in filesystem with zero fsid (e.g. tmpfs). */ - err = vfs_statfs(path, &stat); + err = vfs_statfs(path, stat); if (err) return err; - if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1]) + if (!stat->f_fsid.val[0] && !stat->f_fsid.val[1]) return -ENODEV; /* @@ -888,8 +890,8 @@ static int fanotify_test_fid(struct path *path) if (err) return err; - if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] || - root_stat.f_fsid.val[1] != stat.f_fsid.val[1]) + if (root_stat.f_fsid.val[0] != stat->f_fsid.val[0] || + root_stat.f_fsid.val[1] != stat->f_fsid.val[1]) return -EXDEV; /* @@ -914,6 +916,8 @@ static int d