summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-28 09:43:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-28 10:40:47 -0700
commita11e1d432b51f63ba698d044441284a661f01144 (patch)
tree9f3c5a10bf0d7f9a342d5fb39c0c35ea14170124
parentf57494321cbf5b1e7769b6135407d2995a369e28 (diff)
Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL
The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/Locking7
-rw-r--r--Documentation/filesystems/vfs.txt13
-rw-r--r--crypto/af_alg.c13
-rw-r--r--crypto/algif_aead.c4
-rw-r--r--crypto/algif_skcipher.c4
-rw-r--r--drivers/char/random.c29
-rw-r--r--drivers/isdn/mISDN/socket.c2
-rw-r--r--drivers/net/ppp/pppoe.c2
-rw-r--r--fs/aio.c148
-rw-r--r--fs/eventfd.c19
-rw-r--r--fs/eventpoll.c15
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/select.c23
-rw-r--r--fs/timerfd.c22
-rw-r--r--include/crypto/if_alg.h3
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/poll.h12
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/net/bluetooth/bluetooth.h2
-rw-r--r--include/net/iucv/af_iucv.h2
-rw-r--r--include/net/sctp/sctp.h3
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/tls.h6
-rw-r--r--include/net/udp.h2
-rw-r--r--include/uapi/linux/aio_abi.h8
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c11
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bluetooth/af_bluetooth.c7
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/raw.c2
-rw-r--r--net/core/datagram.c13
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/dccp/proto.c13
-rw-r--r--net/decnet/af_decnet.c6
-rw-r--r--net/ieee802154/socket.c4
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/iucv/af_iucv.c7
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/packet/af_packet.c9
-rw-r--r--net/phonet/socket.c9
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/af_rxrpc.c10
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/smc/af_smc.c12
-rw-r--r--net/socket.c48
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c19
-rw-r--r--net/unix/af_unix.c30
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/xdp/xsk.c7
80 files changed, 301 insertions, 450 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2c391338c675..37bf0a9de75c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -441,8 +441,6 @@ prototypes:
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
- struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
- __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -473,7 +471,7 @@ prototypes:
};
locking rules:
- All except for ->poll_mask may block.
+ All may block.
->llseek() locking has moved from llseek to the individual llseek
implementations. If your fs is not using generic_file_llseek, you
@@ -505,9 +503,6 @@ in sys_read() and friends.
the lease within the individual filesystem to record the result of the
operation
-->poll_mask can be called with or without the waitqueue lock for the waitqueue
-returned from ->get_poll_head.
-
--------------------------- dquot_operations -------------------------------
prototypes:
int (*write_dquot) (struct dquot *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 829a7b7857a4..f608180ad59d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -857,8 +857,6 @@ struct file_operations {
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
- struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
- __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -903,17 +901,6 @@ otherwise noted.
activity on this file and (optionally) go to sleep until there
is activity. Called by the select(2) and poll(2) system calls
- get_poll_head: Returns the struct wait_queue_head that callers can
- wait on. Callers need to check the returned events using ->poll_mask
- once woken. Can return NULL to indicate polling is not supported,
- or any error code using the ERR_PTR convention to indicate that a
- grave error occured and ->poll_mask shall not be called.
-
- poll_mask: return the mask of EPOLL* values describing the file descriptor
- state. Called either before going to sleep on the waitqueue returned by
- get_poll_head, or after it has been woken. If ->get_poll_head and
- ->poll_mask are implemented ->poll does not need to be implement.
-
unlocked_ioctl: called by the ioctl(2) system call.
compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 49fa8582138b..314c52c967e5 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1060,12 +1060,19 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err)
}
EXPORT_SYMBOL_GPL(af_alg_async_cb);
-__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events)
+/**
+ * af_alg_poll - poll system call handler
+ */
+__poll_t af_alg_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct af_alg_ctx *ctx = ask->private;
- __poll_t mask = 0;
+ __poll_t mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
if (!ctx->more || ctx->used)
mask |= EPOLLIN | EPOLLRDNORM;
@@ -1075,7 +1082,7 @@ __poll_t af_alg_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
-EXPORT_SYMBOL_GPL(af_alg_poll_mask);
+EXPORT_SYMBOL_GPL(af_alg_poll);
/**
* af_alg_alloc_areq - allocate struct af_alg_async_req
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 825524f27438..c40a8c7ee8ae 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = {
.sendmsg = aead_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = aead_recvmsg,
- .poll_mask = af_alg_poll_mask,
+ .poll = af_alg_poll,
};
static int aead_check_key(struct socket *sock)
@@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = {
.sendmsg = aead_sendmsg_nokey,
.sendpage = aead_sendpage_nokey,
.recvmsg = aead_recvmsg_nokey,
- .poll_mask = af_alg_poll_mask,
+ .poll = af_alg_poll,
};
static void *aead_bind(const char *name, u32 type, u32 mask)
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 4c04eb9888ad..cfdaab2b7d76 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -206,7 +206,7 @@ static struct proto_ops algif_skcipher_ops = {
.sendmsg = skcipher_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = skcipher_recvmsg,
- .poll_mask = af_alg_poll_mask,
+ .poll = af_alg_poll,
};
static int skcipher_check_key(struct socket *sock)
@@ -302,7 +302,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
.sendmsg = skcipher_sendmsg_nokey,
.sendpage = skcipher_sendpage_nokey,
.recvmsg = skcipher_recvmsg_nokey,
- .poll_mask = af_alg_poll_mask,
+ .poll = af_alg_poll,
};
static void *skcipher_bind(const char *name, u32 type, u32 mask)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index a8fb0020ba5c..cd888d4ee605 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -402,7 +402,8 @@ static struct poolinfo {
/*
* Static global variables
*/
-static DECLARE_WAIT_QUEUE_HEAD(random_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
static struct fasync_struct *fasync;
static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -721,8 +722,8 @@ retry:
/* should we wake readers? */
if (entropy_bits >= random_read_wakeup_bits &&
- wq_has_sleeper(&random_wait)) {
- wake_up_interruptible_poll(&random_wait, POLLIN);
+ wq_has_sleeper(&random_read_wait)) {
+ wake_up_interruptible(&random_read_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
}
/* If the input pool is getting full, send some
@@ -1396,7 +1397,7 @@ retry:
trace_debit_entropy(r->name, 8 * ibytes);
if (ibytes &&
(r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
- wake_up_interruptible_poll(&random_wait, POLLOUT);
+ wake_up_interruptible(&random_write_wait);
kill_fasync(&fasync, SIGIO, POLL_OUT);
}
@@ -1838,7 +1839,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes)
if (nonblock)
return -EAGAIN;
- wait_event_interruptible(random_wait,
+ wait_event_interruptible(random_read_wait,
ENTROPY_BITS(&input_pool) >=
random_read_wakeup_bits);
if (signal_pending(current))
@@ -1875,17 +1876,14 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
return ret;
}
-static struct wait_queue_head *
-random_get_poll_head(struct file *file, __poll_t events)
-{
- return &random_wait;
-}
-
static __poll_t
-random_poll_mask(struct file *file, __poll_t events)
+random_poll(struct file *file, poll_table * wait)
{
- __poll_t mask = 0;
+ __poll_t mask;
+ poll_wait(file, &random_read_wait, wait);
+ poll_wait(file, &random_write_wait, wait);
+ mask = 0;
if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
mask |= EPOLLIN | EPOLLRDNORM;
if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
@@ -1992,8 +1990,7 @@ static int random_fasync(int fd, struct file *filp, int on)
const struct file_operations random_fops = {
.read = random_read,
.write = random_write,
- .get_poll_head = random_get_poll_head,
- .poll_mask = random_poll_mask,
+ .poll = random_poll,
.unlocked_ioctl = random_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
@@ -2326,7 +2323,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
* We'll be woken up again once below random_write_wakeup_thresh,
* or when the calling thread is about to terminate.
*/
- wait_event_interruptible(random_wait, kthread_should_stop() ||
+ wait_event_interruptible(random_write_wait, kthread_should_stop() ||
ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
mix_pool_bytes(poolp, buffer, count);
credit_entropy_bits(poolp, entropy);
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 98f90aadd141..18c0a1281914 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = {
.getname = data_sock_getname,
.sendmsg = mISDN_sock_sendmsg,
.recvmsg = mISDN_sock_recvmsg,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = data_sock_setsockopt,
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index de51e8f70f44..ce61231e96ea 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1107,7 +1107,7 @@ static const struct proto_ops pppoe_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pppoe_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..210df9da1283 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,7 +5,6 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
- * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -165,22 +164,10 @@ struct fsync_iocb {
bool datasync;
};
-struct poll_iocb {
- struct file *file;
- __poll_t events;
- struct wait_queue_head *head;
-
- union {
- struct wait_queue_entry wait;
- struct work_struct work;
- };
-};
-
struct aio_kiocb {
union {
struct kiocb rw;
struct fsync_iocb fsync;
- struct poll_iocb poll;
};
struct kioctx *ki_ctx;
@@ -1590,6 +1577,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags))
return -EINVAL;
+
req->file = fget(iocb->aio_fildes);
if (unlikely(!req->file))
return -EBADF;
@@ -1604,137 +1592,6 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
return 0;
}
-/* need to use list_del_init so we can check if item was present */
-static inline bool __aio_poll_remove(struct poll_iocb *req)
-{
- if (list_empty(&req->wait.entry))
- return false;
- list_del_init(&req->wait.entry);
- return true;
-}
-
-static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
- fput(iocb->poll.file);
- aio_complete(iocb, mangle_poll(mask), 0);
-}
-
-static void aio_poll_work(struct work_struct *work)
-{
- struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
-
- if (!list_empty_careful(&iocb->ki_list))
- aio_remove_iocb(iocb);
- __aio_poll_complete(iocb, iocb->poll.events);
-}
-
-static int aio_poll_cancel(struct kiocb *iocb)
-{
- struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
- struct poll_iocb *req = &aiocb->poll;
- struct wait_queue_head *head = req->head;
- bool found = false;
-
- spin_lock(&head->lock);
- found = __aio_poll_remove(req);
- spin_unlock(&head->lock);
-
- if (found) {
- req->events = 0;
- INIT_WORK(&req->work, aio_poll_work);
- schedule_work(&req->work);
- }
- return 0;
-}
-
-static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
- void *key)
-{
- struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
- struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
- struct file *file = req->file;
- __poll_t mask = key_to_poll(key);
-
- assert_spin_locked(&req->head->lock);
-
- /* for instances that support it check for an event match first: */
- if (mask && !(mask & req->events))
- return 0;
-
- mask = file->f_op->poll_mask(file, req->events) & req->events;
- if (!mask)
- return 0;
-
- __aio_poll_remove(req);
-
- /*
- * Try completing without a context switch if we can acquire ctx_lock
- * without spinning. Otherwise we need to defer to a workqueue to
- * avoid a deadlock due to the lock order.
- */
- if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
- list_del_init(&iocb->ki_list);
- spin_unlock(&iocb->ki_ctx->ctx_lock);
-
- __aio_poll_complete(iocb, mask);
- } else {
- req->events = mask;
- INIT_WORK(&req->work, aio_poll_work);
- schedule_work(&req->work);
- }
-
- return 1;
-}
-
-static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
-{
- struct kioctx *ctx = aiocb->ki_ctx;
- struct poll_iocb *req = &aiocb->poll;
- __poll_t mask;
-
- /* reject any unknown events outside the normal event mask. */
- if ((u16)iocb->aio_buf != iocb->aio_buf)
- return -EINVAL;
- /* reject fields that are not defined for poll */
- if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
- return -EINVAL;
-
- req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;
- if (!file_has_poll_mask(req->file))
- goto out_fail;
-
- req->head = req->file->f_op->get_poll_head(req->file, req->events);
- if (!req->head)
- goto out_fail;
- if (IS_ERR(req->head)) {
- mask = EPOLLERR;
- goto done;
- }
-
- init_waitqueue_func_entry(&req->wait, aio_poll_wake);
- aiocb->ki_cancel = aio_poll_cancel;
-
- spin_lock_irq(&ctx->ctx_lock);
- spin_lock(&req->head->lock);
- mask = req->file->f_op->poll_mask(req->file, req->events) & req->events;
- if (!mask) {
- __add_wait_queue(req->head, &req->wait);
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
- }
- spin_unlock(&req->head->lock);
- spin_unlock_irq(&ctx->ctx_lock);
-done:
- if (mask)
- __aio_poll_complete(aiocb, mask);
- return 0;
-out_fail:
- fput(req->file);
- return -EINVAL; /* same as no support for IOCB_CMD_POLL */
-}
-
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
bool compat)
{
@@ -1808,9 +1665,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
case IOCB_CMD_FDSYNC:
ret = aio_fsync(&req->fsync, &iocb, true);
break;
- case IOCB_CMD_POLL:
- ret = aio_poll(req, &iocb);
- break;
default:
pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ceb1031f1cac..08d3bd602f73 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -101,20 +101,14 @@ static int eventfd_release(struct inode *inode, struct file *file)
return 0;
}
-static struct wait_queue_head *
-eventfd_get_poll_head(struct file *file, __poll_t events)
-{
- struct eventfd_ctx *ctx = file->private_data;
-
- return &ctx->wqh;
-}
-
-static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
+static __poll_t eventfd_poll(struct file *file, poll_table *wait)
{
struct eventfd_ctx *ctx = file->private_data;