summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugo Landau <hlandau@openssl.org>2022-11-17 14:59:18 +0000
committerHugo Landau <hlandau@openssl.org>2023-01-13 13:20:14 +0000
commit69523214ee5a718a0f24803a93bedf0795578173 (patch)
tree95d9a1cf85b92650dff317cbede2089afc836f66
parent68801bcb766806a04e95e8ef714a0b836b1d7069 (diff)
QUIC: Add QUIC reactor
Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/19703)
-rw-r--r--include/internal/quic_reactor.h162
-rw-r--r--ssl/quic/build.info1
-rw-r--r--ssl/quic/quic_reactor.c301
3 files changed, 464 insertions, 0 deletions
diff --git a/include/internal/quic_reactor.h b/include/internal/quic_reactor.h
new file mode 100644
index 0000000000..1372ffc0bb
--- /dev/null
+++ b/include/internal/quic_reactor.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+#ifndef OSSL_QUIC_REACTOR_H
+# define OSSL_QUIC_REACTOR_H
+
+# include "internal/time.h"
+# include "internal/sockets.h"
+# include <openssl/bio.h>
+
+/*
+ * Core I/O Reactor Framework
+ * ==========================
+ *
+ * Manages use of async network I/O which the QUIC stack is built on. The core
+ * mechanic looks like this:
+ *
+ * - There is a pollable FD for both the read and write side respectively.
+ * Readability and writeability of these FDs respectively determines when
+ * network I/O is available.
+ *
+ * - The reactor can export these FDs to the user, as well as flags indicating
+ * whether the user should listen for readability, writeability, or neither.
+ *
+ * - The reactor can export a timeout indication to the user, indicating when
+ * the reactor should be called (via libssl APIs) regardless of whether
+ * the network socket has become ready.
+ *
+ * The reactor is based around a tick callback which is essentially the mutator
+ * function. The mutator attempts to do whatever it can, attempting to perform
+ * network I/O to the extent currently feasible. When done, the mutator returns
+ * information to the reactor indicating when it should be woken up again:
+ *
+ * - Should it be woken up when network RX is possible?
+ * - Should it be woken up when network TX is possible?
+ * - Should it be woken up no later than some deadline X?
+ *
+ * The intention is that ALL I/O-related SSL_* functions with side effects (e.g.
+ * SSL_read/SSL_write) consist of three phases:
+ *
+ * - Optionally mutate the QUIC machine's state.
+ * - Optionally tick the QUIC reactor.
+ * - Optionally mutate the QUIC machine's state.
+ *
+ * For example, SSL_write is a mutation (appending to a stream buffer) followed
+ * by an optional tick (generally expected as we may want to send the data
+ * immediately, though not strictly needed if transmission is being deferred due
+ * to Nagle's algorithm, etc.).
+ *
+ * SSL_read is also a mutation and in principle does not need to tick the
+ * reactor, but it generally will anyway to ensure that the reactor is regularly
+ * ticked by an application which is only reading and not writing.
+ *
+ * If the SSL object is being used in blocking mode, SSL_read may need to block
+ * if no data is available yet, and SSL_write may need to block if buffers
+ * are full.
+ *
+ * The internals of the QUIC I/O engine always use asynchronous I/O. If the
+ * application desires blocking semantics, we handle this by adding a blocking
+ * adaptation layer on top of our internal asynchronous I/O API as exposed by
+ * the reactor interface.
+ */
+# ifndef OPENSSL_NO_QUIC
+
+typedef struct quic_tick_result_st {
+ char want_net_read;
+ char want_net_write;
+ OSSL_TIME tick_deadline;
+} QUIC_TICK_RESULT;
+
+typedef struct quic_reactor_st {
+ /*
+ * BIO poll descriptors which can be polled. poll_r is a poll descriptor
+ * which becomes readable when the QUIC state machine can potentially do
+ * work, and poll_w is a poll descriptor which becomes writable when the
+ * QUIC state machine can potentially do work. Generally, either of these
+ * conditions means that SSL_tick() should be called, or another SSL
+ * function which implicitly calls SSL_tick() (e.g. SSL_read/SSL_write()).
+ */
+ BIO_POLL_DESCRIPTOR poll_r, poll_w;
+ OSSL_TIME tick_deadline; /* ossl_time_infinite() if none currently applicable */
+
+ void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg);
+ void *tick_cb_arg;
+
+ /*
+ * These are true if we would like to know when we can read or write from
+ * the network respectively.
+ */
+ unsigned int want_net_read : 1;
+ unsigned int want_net_write : 1;
+} QUIC_REACTOR;
+
+void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
+ void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
+ void *tick_cb_arg,
+ OSSL_TIME initial_tick_deadline);
+
+void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor,
+ const BIO_POLL_DESCRIPTOR *r);
+
+void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor,
+ const BIO_POLL_DESCRIPTOR *w);
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor);
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor);
+
+int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor);
+
+int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor);
+
+OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor);
+
+/*
+ * Do whatever work can be done, and as much work as can be done. This involves
+ * e.g. seeing if we can read anything from the network (if we want to), seeing
+ * if we can write anything to the network (if we want to), etc.
+ */
+int ossl_quic_reactor_tick(QUIC_REACTOR *rtor);
+
+/*
+ * Blocking I/O Adaptation Layer
+ * =============================
+ *
+ * The blocking I/O adaptation layer implements blocking I/O on top of our
+ * asynchronous core.
+ *
+ * The core mechanism is block_until_pred(), which does not return until pred()
+ * returns a value other than 0. The blocker uses OS I/O synchronisation
+ * primitives (e.g. poll(2)) and ticks the reactor until the predicate is
+ * satisfied. The blocker is not required to call pred() more than once between
+ * tick calls.
+ *
+ * When pred returns a non-zero value, that value is returned by this function.
+ * This can be used to allow pred() to indicate error conditions and short
+ * circuit the blocking process.
+ *
+ * A return value of -1 is reserved for network polling errors. Therefore this
+ * return value should not be used by pred() if ambiguity is not desired. Note
+ * that the predicate function can always arrange its own output mechanism, for
+ * example by passing a structure of its own as the argument.
+ *
+ * If the SKIP_FIRST_TICK flag is set, the first call to reactor_tick() before
+ * the first call to pred() is skipped. This is useful if it is known that
+ * ticking the reactor again will not be useful (e.g. because it has already
+ * been done).
+ */
+#define SKIP_FIRST_TICK (1U << 0)
+
+int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
+ int (*pred)(void *arg), void *pred_arg,
+ uint32_t flags);
+
+# endif
+
+#endif
diff --git a/ssl/quic/build.info b/ssl/quic/build.info
index 0d84df4be3..15aa53a359 100644
--- a/ssl/quic/build.info
+++ b/ssl/quic/build.info
@@ -9,3 +9,4 @@ SOURCE[$LIBSSL]=quic_cfq.c quic_txpim.c quic_fifd.c quic_txp.c
SOURCE[$LIBSSL]=quic_stream_map.c
SOURCE[$LIBSSL]=quic_sf_list.c quic_rstream.c quic_sstream.c
SOURCE[$LIBSSL]=quic_dummy_handshake.c
+SOURCE[$LIBSSL]=quic_reactor.c
diff --git a/ssl/quic/quic_reactor.c b/ssl/quic/quic_reactor.c
new file mode 100644
index 0000000000..ed5c7955db
--- /dev/null
+++ b/ssl/quic/quic_reactor.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+#include "internal/quic_reactor.h"
+
+/*
+ * Core I/O Reactor Framework
+ * ==========================
+ */
+void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
+ void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
+ void *tick_cb_arg,
+ OSSL_TIME initial_tick_deadline)
+{
+ rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
+ rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
+ rtor->want_net_read = 0;
+ rtor->want_net_write = 0;
+ rtor->tick_deadline = initial_tick_deadline;
+
+ rtor->tick_cb = tick_cb;
+ rtor->tick_cb_arg = tick_cb_arg;
+}
+
+void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)
+{
+ rtor->poll_r = *r;
+}
+
+void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)
+{
+ rtor->poll_w = *w;
+}
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor)
+{
+ return &rtor->poll_r;
+}
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor)
+{
+ return &rtor->poll_w;
+}
+
+int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor)
+{
+ return rtor->want_net_read;
+}
+
+int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor)
+{
+ return rtor->want_net_write;
+}
+
+OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)
+{
+ return rtor->tick_deadline;
+}
+
+int ossl_quic_reactor_tick(QUIC_REACTOR *rtor)
+{
+ QUIC_TICK_RESULT res = {0};
+
+ /*
+ * Note that the tick callback cannot fail; this is intentional. Arguably it
+ * does not make that much sense for ticking to 'fail' (in the sense of an
+ * explicit error indicated to the user) because ticking is by its nature
+ * best effort. If something fatal happens with a connection we can report
+ * it on the next actual application I/O call.
+ */
+ rtor->tick_cb(&res, rtor->tick_cb_arg);
+
+ rtor->want_net_read = res.want_net_read;
+ rtor->want_net_write = res.want_net_write;
+ rtor->tick_deadline = res.tick_deadline;
+ return 1;
+}
+
+/*
+ * Blocking I/O Adaptation Layer
+ * =============================
+ */
+
+/*
+ * Utility which can be used to poll on up to two FDs. This is designed to
+ * support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where
+ * different FDs are used for read and write).
+ *
+ * Generally use of poll(2) is preferred where available. Windows, however,
+ * hasn't traditionally offered poll(2), only select(2). WSAPoll() was
+ * introduced in Vista but has seemingly been buggy until relatively recent
+ * versions of Windows 10. Moreover we support XP so this is not a suitable
+ * target anyway. However, the traditional issues with select(2) turn out not to
+ * be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of
+ * FDs (and thus is limited in the magnitude of the FDs expressible), Windows
+ * select(2) is very different. In Windows, socket handles are not allocated
+ * contiguously from zero and thus this bitmap approach was infeasible. Thus in
+ * adapting the Berkeley sockets API to Windows a different approach was taken
+ * whereby the fd_set contains a fixed length array of socket handles and an
+ * integer indicating how many entries are valid; thus Windows select()
+ * ironically is actually much more like *NIX poll(2) than *NIX select(2). In
+ * any case, this means that the relevant limit for Windows select() is the
+ * number of FDs being polled, not the magnitude of those FDs. Since we only
+ * poll for two FDs here, this limit does not concern us.
+ *
+ * Usage: rfd and wfd may be the same or different. Either or both may also be
+ * -1. If rfd_want_read is 1, rfd is polled for readability, and if
+ * wfd_want_write is 1, wfd is polled for writability. Note that since any
+ * passed FD is always polled for error conditions, setting rfd_want_read=0 and
+ * wfd_want_write=0 is not the same as passing -1 for both FDs.
+ *
+ * deadline is a timestamp to return at. If it is ossl_time_infinite(), the call
+ * never times out.
+ *
+ * Returns 0 on error and 1 on success. Timeout expiry is considered a success
+ * condition. We don't elaborate our return values here because the way we are
+ * actually using this doesn't currently care.
+ */
+static int poll_two_fds(int rfd, int rfd_want_read,
+ int wfd, int wfd_want_write,
+ OSSL_TIME deadline)
+{
+#if defined(OSSL_SYS_WINDOWS) || !defined(POLLIN)
+ fd_set rfd_set, wfd_set, efd_set;
+ OSSL_TIME now, timeout;
+ struct timeval tv, *ptv;
+ int maxfd, pres;
+
+#ifndef OSSL_SYS_WINDOWS
+ /*
+ * On Windows there is no relevant limit to the magnitude of a fd value (see
+ * above). On *NIX the fd_set uses a bitmap and we must check the limit.
+ */
+ if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)
+ return 0;
+#endif
+
+ FD_ZERO(&rfd_set);
+ FD_ZERO(&wfd_set);
+ FD_ZERO(&efd_set);
+
+ if (rfd != -1 && rfd_want_read)
+ openssl_fdset(rfd, &rfd_set);
+ if (wfd != -1 && wfd_want_write)
+ openssl_fdset(wfd, &wfd_set);
+
+ /* Always check for error conditions. */
+ if (rfd != -1)
+ openssl_fdset(rfd, &efd_set);
+ if (wfd != -1)
+ openssl_fdset(wfd, &efd_set);
+
+ maxfd = rfd;
+ if (wfd > maxfd)
+ maxfd = wfd;
+
+ if (rfd == -1 && wfd == -1 && ossl_time_is_infinite(deadline))
+ /* Do not block forever; should not happen. */
+ return 0;
+
+ do {
+ /*
+ * select expects a timeout, not a deadline, so do the conversion.
+ * Update for each call to ensure the correct value is used if we repeat
+ * due to EINTR.
+ */
+ if (ossl_time_is_infinite(deadline)) {
+ ptv = NULL;
+ } else {
+ now = ossl_time_now();
+ /*
+ * ossl_time_subtract saturates to zero so we don't need to check if
+ * now > deadline.
+ */
+ timeout = ossl_time_subtract(deadline, now);
+ tv = ossl_time_to_timeval(timeout);
+ ptv = &tv;
+ }
+
+ pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);
+ } while (pres == -1 && get_last_socket_error_is_eintr());
+
+ return pres < 0 ? 0 : 1;
+#else
+ int pres, timeout_ms;
+ OSSL_TIME now, timeout;
+ struct pollfd pfds[2] = {0};
+ size_t npfd = 0;
+
+ if (rfd == wfd) {
+ pfds[npfd].fd = rfd;
+ pfds[npfd].events = (rfd_want_read ? POLLIN : 0)
+ | (wfd_want_write ? POLLOUT : 0);
+ if (rfd >= 0 && pfds[npfd].events != 0)
+ ++npfd;
+ } else {
+ pfds[npfd].fd = rfd;
+ pfds[npfd].events = (rfd_want_read ? POLLIN : 0);
+ if (rfd >= 0 && pfds[npfd].events != 0)
+ ++npfd;
+
+ pfds[npfd].fd = wfd;
+ pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);
+ if (wfd >= 0 && pfds[npfd].events != 0)
+ ++npfd;
+ }
+
+ if (npfd == 0 && ossl_time_is_infinite(deadline))
+ /* Do not block forever; should not happen. */
+ return 0;
+
+ do {
+ if (ossl_time_is_infinite(deadline)) {
+ timeout_ms = -1;
+ } else {
+ now = ossl_time_now();
+ timeout = ossl_time_subtract(deadline, now);
+ timeout_ms = ossl_time2ms(timeout);
+ }
+
+ pres = poll(pfds, npfd, timeout_ms);
+ } while (pres == -1 && get_last_socket_error_is_eintr());
+
+ return pres < 0 ? 0 : 1;
+#endif
+}
+
+static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)
+{
+ if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {
+ *fd = -1;
+ return 1;
+ }
+
+ if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD || d->value.fd < 0)
+ return 0;
+
+ *fd = d->value.fd;
+ return 1;
+}
+
+/*
+ * Poll up to two abstract poll descriptors. Currently we only support
+ * poll descriptors which represent FDs.
+ */
+static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,
+ const BIO_POLL_DESCRIPTOR *w, int w_want_write,
+ OSSL_TIME deadline)
+{
+ int rfd, wfd;
+
+ if (!poll_descriptor_to_fd(r, &rfd)
+ || !poll_descriptor_to_fd(w, &wfd))
+ return 0;
+
+ return poll_two_fds(rfd, r_want_read, wfd, w_want_write, deadline);
+}
+
+int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
+ int (*pred)(void *arg), void *pred_arg,
+ uint32_t flags)
+{
+ int res;
+
+ for (;;) {
+ if ((flags & SKIP_FIRST_TICK) != 0)
+ flags &= ~SKIP_FIRST_TICK;
+ else
+ /* best effort */
+ ossl_quic_reactor_tick(rtor);
+
+ if ((res = pred(pred_arg)) != 0)
+ return res;
+
+ if (!poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),
+ ossl_quic_reactor_want_net_read(rtor),
+ ossl_quic_reactor_get_poll_w(rtor),
+ ossl_quic_reactor_want_net_write(rtor),
+ ossl_quic_reactor_get_tick_deadline(rtor)))
+ /*
+ * We don't actually care why the call succeeded (timeout, FD
+ * readiness), we just call reactor_tick and start trying to do I/O
+ * things again. If poll_two_fds returns 0, this is some other
+ * non-timeout failure and we should stop here.
+ *
+ * TODO(QUIC): In the future we could avoid unnecessary syscalls by
+ * not retrying network I/O that isn't ready based on the result of
+ * the poll call. However this might be difficult because it
+ * requires we do the call to poll(2) or equivalent syscall
+ * ourselves, whereas in the general case the application does the
+ * polling and just calls SSL_tick(). Implementing this optimisation
+ * in the future will probably therefore require API changes.
+ */
+ return 0;
+ }
+}