summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/security/keys/core.rst57
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/watch_queue.rst339
-rw-r--r--fs/pipe.c242
-rw-r--r--fs/splice.c12
-rw-r--r--include/linux/key.h33
-rw-r--r--include/linux/lsm_audit.h1
-rw-r--r--include/linux/lsm_hook_defs.h9
-rw-r--r--include/linux/lsm_hooks.h14
-rw-r--r--include/linux/pipe_fs_i.h27
-rw-r--r--include/linux/security.h30
-rw-r--r--include/linux/watch_queue.h127
-rw-r--r--include/uapi/linux/keyctl.h2
-rw-r--r--include/uapi/linux/watch_queue.h104
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/watch_queue.c655
-rw-r--r--samples/Kconfig7
-rw-r--r--samples/Makefile1
-rw-r--r--samples/watch_queue/Makefile7
-rw-r--r--samples/watch_queue/watch_test.c186
-rw-r--r--security/keys/Kconfig9
-rw-r--r--security/keys/compat.c3
-rw-r--r--security/keys/gc.c5
-rw-r--r--security/keys/internal.h38
-rw-r--r--security/keys/key.c38
-rw-r--r--security/keys/keyctl.c115
-rw-r--r--security/keys/keyring.c20
-rw-r--r--security/keys/permission.c31
-rw-r--r--security/keys/process_keys.c46
-rw-r--r--security/keys/request_key.c4
-rw-r--r--security/security.c22
-rw-r--r--security/selinux/hooks.c51
-rw-r--r--security/smack/smack_lsm.c112
34 files changed, 2182 insertions, 179 deletions
diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9367d0fe4a02..cdc42ccc12e4 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1030,6 +1030,63 @@ The keyctl syscall functions are:
written into the output buffer. Verification returns 0 on success.
+ * Watch a key or keyring for changes::
+
+ long keyctl(KEYCTL_WATCH_KEY, key_serial_t key, int queue_fd,
+ const struct watch_notification_filter *filter);
+
+ This will set or remove a watch for changes on the specified key or
+ keyring.
+
+ "key" is the ID of the key to be watched.
+
+ "queue_fd" is a file descriptor referring to an open "/dev/watch_queue"
+ which manages the buffer into which notifications will be delivered.
+
+ "filter" is either NULL to remove a watch or a filter specification to
+ indicate what events are required from the key.
+
+ See Documentation/watch_queue.rst for more information.
+
+ Note that only one watch may be emplaced for any particular { key,
+ queue_fd } combination.
+
+ Notification records look like::
+
+ struct key_notification {
+ struct watch_notification watch;
+ __u32 key_id;
+ __u32 aux;
+ };
+
+ In this, watch::type will be "WATCH_TYPE_KEY_NOTIFY" and subtype will be
+ one of::
+
+ NOTIFY_KEY_INSTANTIATED
+ NOTIFY_KEY_UPDATED
+ NOTIFY_KEY_LINKED
+ NOTIFY_KEY_UNLINKED
+ NOTIFY_KEY_CLEARED
+ NOTIFY_KEY_REVOKED
+ NOTIFY_KEY_INVALIDATED
+ NOTIFY_KEY_SETATTR
+
+ Where these indicate a key being instantiated/rejected, updated, a link
+ being made in a keyring, a link being removed from a keyring, a keyring
+ being cleared, a key being revoked, a key being invalidated or a key
+ having one of its attributes changed (user, group, perm, timeout,
+ restriction).
+
+ If a watched key is deleted, a basic watch_notification will be issued
+ with "type" set to WATCH_TYPE_META and "subtype" set to
+ watch_meta_removal_notification. The watchpoint ID will be set in the
+ "info" field.
+
+ This needs to be configured by enabling:
+
+ "Provide key/keyring change notifications" (KEY_NOTIFICATIONS)
+
+
Kernel Services
===============
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 1f3da8f32fc1..59472cd6a11d 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -202,6 +202,7 @@ Code Seq# Include File Comments
'W' 00-1F linux/wanrouter.h conflict! (pre 3.9)
'W' 00-3F sound/asound.h conflict!
'W' 40-5F drivers/pci/switch/switchtec.c
+'W' 60-61 linux/watch_queue.h
'X' all fs/xfs/xfs_fs.h, conflict!
fs/xfs/linux-2.6/xfs_ioctl32.h,
include/linux/falloc.h,
diff --git a/Documentation/watch_queue.rst b/Documentation/watch_queue.rst
new file mode 100644
index 000000000000..849fad6893ef
--- /dev/null
+++ b/Documentation/watch_queue.rst
@@ -0,0 +1,339 @@
+==============================
+General notification mechanism
+==============================
+
+The general notification mechanism is built on top of the standard pipe driver
+whereby it effectively splices notification messages from the kernel into pipes
+opened by userspace. This can be used in conjunction with::
+
+ * Key/keyring notifications
+
+
+The notifications buffers can be enabled by:
+
+ "General setup"/"General notification queue"
+ (CONFIG_WATCH_QUEUE)
+
+This document has the following sections:
+
+.. contents:: :local:
+
+
+Overview
+========
+
+This facility appears as a pipe that is opened in a special mode. The pipe's
+internal ring buffer is used to hold messages that are generated by the kernel.
+These messages are then read out by read(). Splice and similar are disabled on
+such pipes due to them wanting to, under some circumstances, revert their
+additions to the ring - which might end up interleaved with notification
+messages.
+
+The owner of the pipe has to tell the kernel which sources it would like to
+watch through that pipe. Only sources that have been connected to a pipe will
+insert messages into it. Note that a source may be bound to multiple pipes and
+insert messages into all of them simultaneously.
+
+Filters may also be emplaced on a pipe so that certain source types and
+subevents can be ignored if they're not of interest.
+
+A message will be discarded if there isn't a slot available in the ring or if
+no preallocated message buffer is available. In both of these cases, read()
+will insert a WATCH_META_LOSS_NOTIFICATION message into the output buffer after
+the last message currently in the buffer has been read.
+
+Note that when producing a notification, the kernel does not wait for the
+consumers to collect it, but rather just continues on. This means that
+notifications can be generated whilst spinlocks are held and also protects the
+kernel from being held up indefinitely by a userspace malfunction.
+
+
+Message Structure
+=================
+
+Notification messages begin with a short header::
+
+ struct watch_notification {
+ __u32 type:24;
+ __u32 subtype:8;
+ __u32 info;
+ };
+
+"type" indicates the source of the notification record and "subtype" indicates
+the type of record from that source (see the Watch Sources section below). The
+type may also be "WATCH_TYPE_META". This is a special record type generated
+internally by the watch queue itself. There are two subtypes:
+
+ * WATCH_META_REMOVAL_NOTIFICATION
+ * WATCH_META_LOSS_NOTIFICATION
+
+The first indicates that an object on which a watch was installed was removed
+or destroyed and the second indicates that some messages have been lost.
+
+"info" indicates a bunch of things, including:
+
+ * The length of the message in bytes, including the header (mask with
+ WATCH_INFO_LENGTH and shift by WATCH_INFO_LENGTH__SHIFT). This indicates
+ the size of the record, which may be between 8 and 127 bytes.
+
+ * The watch ID (mask with WATCH_INFO_ID and shift by WATCH_INFO_ID__SHIFT).
+ This indicates that caller's ID of the watch, which may be between 0
+ and 255. Multiple watches may share a queue, and this provides a means to
+ distinguish them.
+
+ * A type-specific field (WATCH_INFO_TYPE_INFO). This is set by the
+ notification producer to indicate some meaning specific to the type and
+ subtype.
+
+Everything in info apart from the length can be used for filtering.
+
+The header can be followed by supplementary information. The format of this is
+at the discretion is defined by the type and subtype.
+
+
+Watch List (Notification Source) API
+====================================
+
+A "watch list" is a list of watchers that are subscribed to a source of
+notifications. A list may be attached to an object (say a key or a superblock)
+or may be global (say for device events). From a userspace perspective, a
+non-global watch list is typically referred to by reference to the object it
+belongs to (such as using KEYCTL_NOTIFY and giving it a key serial number to
+watch that specific key).
+
+To manage a watch list, the following functions are provided:
+
+ * ``void init_watch_list(struct watch_list *wlist,
+ void (*release_watch)(struct watch *wlist));``
+
+ Initialise a watch list. If ``release_watch`` is not NULL, then this
+ indicates a function that should be called when the watch_list object is
+ destroyed to discard any references the watch list holds on the watched
+ object.
+
+ * ``void remove_watch_list(struct watch_list *wlist);``
+
+ This removes all of the watches subscribed to a watch_list and frees them
+ and then destroys the watch_list object itself.
+
+
+Watch Queue (Notification Output) API
+=====================================
+
+A "watch queue" is the buffer allocated by an application that notification
+records will be written into. The workings of this are hidden entirely inside
+of the pipe device driver, but it is necessary to gain a reference to it to set
+a watch. These can be managed with:
+
+ * ``struct watch_queue *get_watch_queue(int fd);``
+
+ Since watch queues are indicated to the kernel by the fd of the pipe that
+ implements the buffer, userspace must hand that fd through a system call.
+ This can be used to look up an opaque pointer to the watch queue from the
+ system call.
+
+ * ``void put_watch_queue(struct watch_queue *wqueue);``
+
+ This discards the reference obtained from ``get_watch_queue()``.
+
+
+Watch Subscription API
+======================
+
+A "watch" is a subscription on a watch list, indicating the watch queue, and
+thus the buffer, into which notification records should be written. The watch
+queue object may also carry filtering rules for that object, as set by
+userspace. Some parts of the watch struct can be set by the driver::
+
+ struct watch {
+ union {
+ u32 info_id; /* ID to be OR'd in to info field */
+ ...
+ };
+ void *private; /* Private data for the watched object */
+ u64 id; /* Internal identifier */
+ ...
+ };
+
+The ``info_id`` value should be an 8-bit number obtained from userspace and
+shifted by WATCH_INFO_ID__SHIFT. This is OR'd into the WATCH_INFO_ID field of
+struct watch_notification::info when and if the notification is written into
+the associated watch queue buffer.
+
+The ``private`` field is the driver's data associated with the watch_list and
+is cleaned up by the ``watch_list::release_watch()`` method.
+
+The ``id`` field is the source's ID. Notifications that are posted with a
+different ID are ignored.
+
+The following functions are provided to manage watches:
+
+ * ``void init_watch(struct watch *watch, struct watch_queue *wqueue);``
+
+ Initialise a watch object, setting its pointer to the watch queue, using
+ appropriate barriering to avoid lockdep complaints.
+
+ * ``int add_watch_to_object(struct watch *watch, struct watch_list *wlist);``
+
+ Subscribe a watch to a watch list (notification source). The
+ driver-settable fields in the watch struct must have been set before this
+ is called.
+
+ * ``int remove_watch_from_object(struct watch_list *wlist,
+ struct watch_queue *wqueue,
+ u64 id, false);``
+
+ Remove a watch from a watch list, where the watch must match the specified
+ watch queue (``wqueue``) and object identifier (``id``). A notification
+ (``WATCH_META_REMOVAL_NOTIFICATION``) is sent to the watch queue to
+ indicate that the watch got removed.
+
+ * ``int remove_watch_from_object(struct watch_list *wlist, NULL, 0, true);``
+
+ Remove all the watches from a watch list. It is expected that this will be
+ called preparatory to destruction and that the watch list will be
+ inaccessible to new watches by this point. A notification
+ (``WATCH_META_REMOVAL_NOTIFICATION``) is sent to the watch queue of each
+ subscribed watch to indicate that the watch got removed.
+
+
+Notification Posting API
+========================
+
+To post a notification to watch list so that the subscribed watches can see it,
+the following function should be used::
+
+ void post_watch_notification(struct watch_list *wlist,
+ struct watch_notification *n,
+ const struct cred *cred,
+ u64 id);
+
+The notification should be preformatted and a pointer to the header (``n``)
+should be passed in. The notification may be larger than this and the size in
+units of buffer slots is noted in ``n->info & WATCH_INFO_LENGTH``.
+
+The ``cred`` struct indicates the credentials of the source (subject) and is
+passed to the LSMs, such as SELinux, to allow or suppress the recording of the
+note in each individual queue according to the credentials of that queue
+(object).
+
+The ``id`` is the ID of the source object (such as the serial number on a key).
+Only watches that have the same ID set in them will see this notification.
+
+
+Watch Sources
+=============
+
+Any particular buffer can be fed from multiple sources. Sources include:
+
+ * WATCH_TYPE_KEY_NOTIFY
+
+ Notifications of this type indicate changes to keys and keyrings, including
+ the changes of keyring contents or the attributes of keys.
+
+ See Documentation/security/keys/core.rst for more information.
+
+
+Event Filtering
+===============
+
+Once a watch queue has been created, a set of filters can be applied to limit
+the events that are received using::
+
+ struct watch_notification_filter filter = {
+ ...
+ };
+ ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter)
+
+The filter description is a variable of type::
+
+ struct watch_notification_filter {
+ __u32 nr_filters;
+ __u32 __reserved;
+ struct watch_notification_type_filter filters[];
+ };
+
+Where "nr_filters" is the number of filters in filters[] and "__reserved"
+should be 0. The "filters" array has elements of the following type::
+
+ struct watch_notification_type_filter {
+ __u32 type;
+ __u32 info_filter;
+ __u32 info_mask;
+ __u32 subtype_filter[8];
+ };
+
+Where:
+
+ * ``type`` is the event type to filter for and should be something like
+ "WATCH_TYPE_KEY_NOTIFY"
+
+ * ``info_filter`` and ``info_mask`` act as a filter on the info field of the
+ notification record. The notification is only written into the buffer if::
+
+ (watch.info & info_mask) == info_filter
+
+ This could be used, for example, to ignore events that are not exactly on
+ the watched point in a mount tree.
+
+ * ``subtype_filter`` is a bitmask indicating the subtypes that are of
+ interest. Bit 0 of subtype_filter[0] corresponds to subtype 0, bit 1 to
+ subtype 1, and so on.
+
+If the argument to the ioctl() is NULL, then the filters will be removed and
+all events from the watched sources will come through.
+
+
+Userspace Code Example
+======================
+
+A buffer is created with something like the following::
+
+ pipe2(fds, O_TMPFILE);
+ ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256);
+
+It can then be set to receive keyring change notifications::
+
+ keyctl(KEYCTL_WATCH_KEY, KEY_SPEC_SESSION_KEYRING, fds[1], 0x01);
+
+The notifications can then be consumed by something like the following::
+
+ static void consumer(int rfd, struct watch_queue_buffer *buf)
+ {
+ unsigned char buffer[128];
+ ssize_t buf_len;
+
+ while (buf_len = read(rfd, buffer, sizeof(buffer)),
+ buf_len > 0
+ ) {
+ void *p = buffer;
+ void *end = buffer + buf_len;
+ while (p < end) {
+ union {
+ struct watch_notification n;
+ unsigned char buf1[128];
+ } n;
+ size_t largest, len;
+
+ largest = end - p;
+ if (largest > 128)
+ largest = 128;
+ memcpy(&n, p, largest);
+
+ len = (n->info & WATCH_INFO_LENGTH) >>
+ WATCH_INFO_LENGTH__SHIFT;
+ if (len == 0 || len > largest)
+ return;
+
+ switch (n.n.type) {
+ case WATCH_TYPE_META:
+ got_meta(&n.n);
+ case WATCH_TYPE_KEY_NOTIFY:
+ saw_key_change(&n.n);
+ break;
+ }
+
+ p += len;
+ }
+ }
+ }
diff --git a/fs/pipe.c b/fs/pipe.c
index c7c4fb5f345f..60dbee457143 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -24,6 +24,7 @@
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <linux/memcontrol.h>
+#include <linux/watch_queue.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -259,14 +260,44 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
+#ifdef CONFIG_WATCH_QUEUE
+ if (pipe->note_loss) {
+ struct watch_notification n;
+
+ if (total_len < 8) {
+ if (ret == 0)
+ ret = -ENOBUFS;
+ break;
+ }
+
+ n.type = WATCH_TYPE_META;
+ n.subtype = WATCH_META_LOSS_NOTIFICATION;
+ n.info = watch_sizeof(n);
+ if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
+ ret += sizeof(n);
+ total_len -= sizeof(n);
+ pipe->note_loss = false;
+ }
+#endif
+
if (!pipe_empty(head, tail)) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
size_t chars = buf->len;
size_t written;
int error;
- if (chars > total_len)
+ if (chars > total_len) {
+ if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
+ if (ret == 0)
+ ret = -ENOBUFS;
+ break;
+ }
chars = total_len;
+ }
error = pipe_buf_confirm(pipe, buf);
if (error) {
@@ -294,6 +325,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (!buf->len) {
pipe_buf_release(pipe, buf);
spin_lock_irq(&pipe->rd_wait.lock);
+#ifdef CONFIG_WATCH_QUEUE
+ if (buf->flags & PIPE_BUF_FLAG_LOSS)
+ pipe->note_loss = true;
+#endif
tail++;
pipe->tail = tail;
spin_unlock_irq(&pipe->rd_wait.lock);
@@ -405,6 +440,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
+#ifdef CONFIG_WATCH_QUEUE
+ if (pipe->watch_queue) {
+ ret = -EXDEV;
+ goto out;
+ }
+#endif
+
/*
* Only wake up if the pipe started out empty, since
* otherwise there should be no readers waiting.
@@ -574,22 +616,37 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
int count, head, tail, mask;
switch (cmd) {
- case FIONREAD:
- __pipe_lock(pipe);
- count = 0;
- head = pipe->head;
- tail = pipe->tail;
- mask = pipe->ring_size - 1;
+ case FIONREAD:
+ __pipe_lock(pipe);
+ count = 0;
+ head = pipe->head;
+ tail = pipe->tail;
+ mask = pipe->ring_size - 1;
- while (tail != head) {
- count += pipe->bufs[tail & mask].len;
- tail++;
- }
- __pipe_unlock(pipe);
+ while (tail != head) {
+ count += pipe->bufs[tail & mask].len;
+ tail++;
+ }
+ __pipe_unlock(pipe);
+
+ return put_user(count, (int __user *)arg);
- return put_user(count, (int __user *)arg);
- default:
- return -ENOIOCTLCMD;
+#ifdef CONFIG_WATCH_QUEUE
+ case IOC_WATCH_QUEUE_SET_SIZE: {
+ int ret;
+ __pipe_lock(pipe);
+ ret = watch_queue_set_size(pipe, arg);
+ __pipe_unlock(pipe);
+ return ret;
+ }
+
+ case IOC_WATCH_QUEUE_SET_FILTER:
+ return watch_queue_set_filter(
+ pipe, (struct watch_notification_filter __user *)arg);
+#endif
+
+ default:
+ return -ENOIOCTLCMD;
}
}
@@ -700,27 +757,27 @@ pipe_fasync(int fd, struct file *filp, int on)
return retval;
}
-static unsigned long account_pipe_buffers(struct user_struct *user,
- unsigned long old, unsigned long new)
+unsigned long account_pipe_buffers(struct user_struct *user,
+ unsigned long old, unsigned long new)
{
return atomic_long_add_return(new - old, &user->pipe_bufs);
}
-static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
+bool too_many_pipe_buffers_soft(unsigned long user_bufs)
{
unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
return soft_limit && user_bufs > soft_limit;
}
-static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
+bool too_many_pipe_buffers_hard(unsigned long user_bufs)
{
unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
return hard_limit && user_bufs > hard_limit;
}
-static bool is_unprivileged_user(void)
+bool pipe_is_unprivileged_user(void)
{
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
}
@@ -742,12 +799,12 @@ struct pipe_inode_info *alloc_pipe_info(void)
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
- if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) {
+ if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
pipe_bufs = 1;
}
- if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user())
+ if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
goto out_revert_acct;
pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
@@ -759,6 +816,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->r_counter = pipe->w_counter = 1;
pipe->max_usage = pipe_bufs;
pipe->ring_size = pipe_bufs;
+ pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
return pipe;
@@ -776,7 +834,14 @@ void free_pipe_info(struct pipe_inode_info *pipe)
{
int i;
- (void) account_pipe_buffers(pipe->user, pipe->ring_size, 0);
+#ifdef CONFIG_WATCH_QUEUE
+ if (pipe->watch_queue) {
+ watch_queue_clear(pipe->watch_queue);
+ put_watch_queue(pipe->watch_queue);
+ }
+#endif
+
+ (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
free_uid(pipe->user);
for (i = 0; i < pipe->ring_size; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
@@ -852,6 +917,17 @@ int create_pipe_files(struct file **res, int flags)
if (!inode)
return -ENFILE;
+ if (flags & O_NOTIFICATION_PIPE) {
+#ifdef CONFIG_WATCH_QUEUE
+ if (watch_queue_init(inode->i_pipe) < 0) {
+ iput(inode);
+ return -ENOMEM;
+ }
+#else
+ return -ENOPKG;
+#endif
+ }
+
f = alloc_file_pseudo(inode, pipe_mnt, "",
O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
&pipefifo_fops);
@@ -882,7 +958,7 @@ static int __do_pipe_flags(int *fd, struct file **files, int flags)
int error;
int fdw, fdr;
- if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
+ if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
return -EINVAL;
error = create_pipe_files(files, flags);
@@ -1130,42 +1206,12 @@ unsigned int round_pipe_size(unsigned long size)
}
/*
- * Allocate a new array of pipe buffers and copy the info over. Returns the
- * pipe size if successful, or return -ERROR on error.
+ * Resize the pipe ring to a number of slots.
*/
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
{
struct pipe_buffer *bufs;
- unsigned int size, nr_slots, head, tail, mask, n;
- unsigned long user_bufs;
- long ret = 0;
-
- size = round_pipe_size(arg);
- nr_slots = size >> PAGE_SHIFT;
-
- if (!nr_slots)
- return -EINVAL;
-
- /*
- * If trying to increase the pipe capacity, check that an
- * unprivileged user is not trying to exceed various limits
- * (soft limit check here, hard limit check just below).
- * Decreasing the pipe capacity is always permitted, even
- * if the user is currently over a limit.
- */
- if (nr_slots > pipe->ring_size &&
- size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- user_bufs = account_pipe_buffers(pipe->user, pipe->ring_size, nr_slots);
-
- if (nr_slots > pipe->ring_size &&
- (too_many_pipe_buffers_hard(user_bufs) ||
- too_many_pipe_buffers_soft(user_bufs)) &&
- is_unprivileged_user()) {
- ret = -EPERM;
- goto out_revert_acct;
- }
+ unsigned int head, tail, mask, n;
/*
* We can shrink the pipe, if arg is greater than the ring occupancy.
@@ -1177,17 +1223,13 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
head = pipe->head;
tail = pipe->tail;
n = pipe_occupancy(pipe->head, pipe->tail);
- if (nr_slots < n) {
- ret = -EBUSY;
- goto out_revert_acct;
- }
+ if (nr_slots < n)
+ return -EBUSY;
bufs = kcalloc(nr_slots, sizeof(*bufs),
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
- if (unlikely(!bufs)) {
- ret = -ENOMEM;
- goto out_revert_acct;
- }
+ if (unlikely(!bufs))
+ return -ENOMEM;
/*
* The pipe array wraps around, so just start the new one at zero
@@ -1215,16 +1257,68 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
kfree(pipe->bufs);
pipe->bufs = bufs;
pipe->ring_size = nr_slots;
- pipe->max_usage = nr_slots;
+ if (pipe->max_usage > nr_slots)
+ pipe->max_usage = nr_slots;
pipe->tail = tail;
pipe->head = head;
/* This might have made more room for writers */
wake_up_interruptible(&pipe->wr_wait);
+ return 0;
+}
+
+/*
+ * Allocate a new array of pipe buffers and copy the info over. Returns the
+ * pipe size if successful, or return -ERROR on error.
+ */
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+{
+ unsigned long user_bufs;
+ unsigned int nr_slots, size;
+ long ret = 0;
+
+#ifdef CONFIG_WATCH_QUEUE
+ if (pipe->watch_queue)
+ return -EBUSY;
+#endif
+
+ size = round_pipe_size(arg);
+ nr_slots = size >> PAGE_SHIFT;
+
+ if (!nr_slots)
+ return -EINVAL;
+
+ /*
+ * If trying to increase the pipe capacity, check that an
+ * unprivileged user is not trying to exceed various limits
+ * (soft limit check here, hard limit check just below).
+ * Decreasing the pipe capacity is always permitted, even
+ * if the user is currently over a limit.
+ */
+ if (nr_slots > pipe->max_usage &&
+ size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+
+ user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
+
+ if (nr_slots > pipe->max_usage &&
+ (too_many_pipe_buffers_hard(user_bufs) ||
+ too_many_pipe_buffers_soft(user_bufs)) &&
+ pipe_is_unprivileged_user()) {
+ ret = -EPERM;
+ goto out_revert_acct;
+ }
+
+ ret = pipe_resize_ring(pipe, nr_slots);
+ if (ret < 0)
+ goto out_revert_acct;
+
+ pipe->max_usage = nr_slots;
+ pipe->nr_accounted = nr_slots;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
- (void) account_pipe_buffers(pipe->user, nr_slots, pipe->ring_size);
+ (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
return ret;
}
@@ -1233,9 +1327,17 @@ out_revert_acct:
* location, so checking ->i_pipe is not enough to verify that this is a
* pipe.
*/
-struct pipe_inode_info *get_pipe_info(struct file *file)
+struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
{
- return file->f_op == &pipefifo_fops ? file->private_data : NULL;
+ struct pipe_inode_info *pipe = file->private_data;
+
+ if (file->f_op != &pipefifo_fops || !pipe)
+ return NULL;
+#ifdef CONFIG_WATCH_QUEUE
+ if (for_splice && pipe->watch_queue)
+ return NULL;
+#endif
+ return pipe;
}
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1243,7 +1345,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
struct pipe_inode_info *pipe;
long ret;
- pipe = get_pipe_info(file);
+ pipe = get_pipe_info(file, false);
if (!pipe)
return -EBADF;
diff --git a/fs/splice.c b/fs/splice.c
index 6b3c9a018a8e..d7c8a7c4db07 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1101,8 +1101,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
!(out->f_mode & FMODE_WRITE)))
return -EBADF;
- ipipe = get_pipe_info(in);
- opipe = get_pipe_info(out);
+ ipipe = get_p