From 69523214ee5a718a0f24803a93bedf0795578173 Mon Sep 17 00:00:00 2001
From: Hugo Landau <hlandau@openssl.org>
Date: Thu, 17 Nov 2022 14:59:18 +0000
Subject: QUIC: Add QUIC reactor

Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Matt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/19703)
---
 include/internal/quic_reactor.h | 162 +++++++++++++++++++++
 ssl/quic/build.info             |   1 +
 ssl/quic/quic_reactor.c         | 301 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 464 insertions(+)
 create mode 100644 include/internal/quic_reactor.h
 create mode 100644 ssl/quic/quic_reactor.c

diff --git a/include/internal/quic_reactor.h b/include/internal/quic_reactor.h
new file mode 100644
index 0000000000..1372ffc0bb
--- /dev/null
+++ b/include/internal/quic_reactor.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+#ifndef OSSL_QUIC_REACTOR_H
+# define OSSL_QUIC_REACTOR_H
+
+# include "internal/time.h"
+# include "internal/sockets.h"
+# include <openssl/bio.h>
+
+/*
+ * Core I/O Reactor Framework
+ * ==========================
+ *
+ * Manages use of async network I/O which the QUIC stack is built on. The core
+ * mechanic looks like this:
+ *
+ *   - There is a pollable FD for both the read and write side respectively.
+ *     Readability and writeability of these FDs respectively determines when
+ *     network I/O is available.
+ *
+ *   - The reactor can export these FDs to the user, as well as flags indicating
+ *     whether the user should listen for readability, writeability, or neither.
+ *
+ *   - The reactor can export a timeout indication to the user, indicating when
+ *     the reactor should be called (via libssl APIs) regardless of whether
+ *     the network socket has become ready.
+ *
+ * The reactor is based around a tick callback which is essentially the mutator
+ * function. The mutator attempts to do whatever it can, attempting to perform
+ * network I/O to the extent currently feasible. When done, the mutator returns
+ * information to the reactor indicating when it should be woken up again:
+ *
+ *   - Should it be woken up when network RX is possible?
+ *   - Should it be woken up when network TX is possible?
+ *   - Should it be woken up no later than some deadline X?
+ *
+ * The intention is that ALL I/O-related SSL_* functions with side effects (e.g.
+ * SSL_read/SSL_write) consist of three phases:
+ *
+ *   - Optionally mutate the QUIC machine's state.
+ *   - Optionally tick the QUIC reactor.
+ *   - Optionally mutate the QUIC machine's state.
+ *
+ * For example, SSL_write is a mutation (appending to a stream buffer) followed
+ * by an optional tick (generally expected as we may want to send the data
+ * immediately, though not strictly needed if transmission is being deferred due
+ * to Nagle's algorithm, etc.).
+ *
+ * SSL_read is also a mutation and in principle does not need to tick the
+ * reactor, but it generally will anyway to ensure that the reactor is regularly
+ * ticked by an application which is only reading and not writing.
+ *
+ * If the SSL object is being used in blocking mode, SSL_read may need to block
+ * if no data is available yet, and SSL_write may need to block if buffers
+ * are full.
+ *
+ * The internals of the QUIC I/O engine always use asynchronous I/O. If the
+ * application desires blocking semantics, we handle this by adding a blocking
+ * adaptation layer on top of our internal asynchronous I/O API as exposed by
+ * the reactor interface.
+ */
+# ifndef OPENSSL_NO_QUIC
+
+typedef struct quic_tick_result_st {
+    char        want_net_read;
+    char        want_net_write;
+    OSSL_TIME   tick_deadline;
+} QUIC_TICK_RESULT;
+
+typedef struct quic_reactor_st {
+    /*
+     * BIO poll descriptors which can be polled. poll_r is a poll descriptor
+     * which becomes readable when the QUIC state machine can potentially do
+     * work, and poll_w is a poll descriptor which becomes writable when the
+     * QUIC state machine can potentially do work. Generally, either of these
+     * conditions means that SSL_tick() should be called, or another SSL
+     * function which implicitly calls SSL_tick() (e.g. SSL_read/SSL_write()).
+     */
+    BIO_POLL_DESCRIPTOR poll_r, poll_w;
+    OSSL_TIME tick_deadline; /* ossl_time_infinite() if none currently applicable */
+
+    void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg);
+    void *tick_cb_arg;
+
+    /*
+     * These are true if we would like to know when we can read or write from
+     * the network respectively.
+     */
+    unsigned int want_net_read  : 1;
+    unsigned int want_net_write : 1;
+} QUIC_REACTOR;
+
+void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
+                            void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
+                            void *tick_cb_arg,
+                            OSSL_TIME initial_tick_deadline);
+
+void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor,
+                                  const BIO_POLL_DESCRIPTOR *r);
+
+void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor,
+                                  const BIO_POLL_DESCRIPTOR *w);
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor);
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor);
+
+int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor);
+
+int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor);
+
+OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor);
+
+/*
+ * Do whatever work can be done, and as much work as can be done. This involves
+ * e.g. seeing if we can read anything from the network (if we want to), seeing
+ * if we can write anything to the network (if we want to), etc.
+ */
+int ossl_quic_reactor_tick(QUIC_REACTOR *rtor);
+
+/*
+ * Blocking I/O Adaptation Layer
+ * =============================
+ *
+ * The blocking I/O adaptation layer implements blocking I/O on top of our
+ * asynchronous core.
+ *
+ * The core mechanism is block_until_pred(), which does not return until pred()
+ * returns a value other than 0. The blocker uses OS I/O synchronisation
+ * primitives (e.g. poll(2)) and ticks the reactor until the predicate is
+ * satisfied. The blocker is not required to call pred() more than once between
+ * tick calls.
+ *
+ * When pred returns a non-zero value, that value is returned by this function.
+ * This can be used to allow pred() to indicate error conditions and short
+ * circuit the blocking process.
+ *
+ * A return value of -1 is reserved for network polling errors. Therefore this
+ * return value should not be used by pred() if ambiguity is not desired. Note
+ * that the predicate function can always arrange its own output mechanism, for
+ * example by passing a structure of its own as the argument.
+ *
+ * If the SKIP_FIRST_TICK flag is set, the first call to reactor_tick() before
+ * the first call to pred() is skipped. This is useful if it is known that
+ * ticking the reactor again will not be useful (e.g. because it has already
+ * been done).
+ */
+#define SKIP_FIRST_TICK     (1U << 0)
+
+int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
+                                       int (*pred)(void *arg), void *pred_arg,
+                                       uint32_t flags);
+
+# endif
+
+#endif
diff --git a/ssl/quic/build.info b/ssl/quic/build.info
index 0d84df4be3..15aa53a359 100644
--- a/ssl/quic/build.info
+++ b/ssl/quic/build.info
@@ -9,3 +9,4 @@ SOURCE[$LIBSSL]=quic_cfq.c quic_txpim.c quic_fifd.c quic_txp.c
 SOURCE[$LIBSSL]=quic_stream_map.c
 SOURCE[$LIBSSL]=quic_sf_list.c quic_rstream.c quic_sstream.c
 SOURCE[$LIBSSL]=quic_dummy_handshake.c
+SOURCE[$LIBSSL]=quic_reactor.c
diff --git a/ssl/quic/quic_reactor.c b/ssl/quic/quic_reactor.c
new file mode 100644
index 0000000000..ed5c7955db
--- /dev/null
+++ b/ssl/quic/quic_reactor.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+#include "internal/quic_reactor.h"
+
+/*
+ * Core I/O Reactor Framework
+ * ==========================
+ */
+void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
+                            void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
+                            void *tick_cb_arg,
+                            OSSL_TIME initial_tick_deadline)
+{
+    rtor->poll_r.type       = BIO_POLL_DESCRIPTOR_TYPE_NONE;
+    rtor->poll_w.type       = BIO_POLL_DESCRIPTOR_TYPE_NONE;
+    rtor->want_net_read     = 0;
+    rtor->want_net_write    = 0;
+    rtor->tick_deadline     = initial_tick_deadline;
+
+    rtor->tick_cb           = tick_cb;
+    rtor->tick_cb_arg       = tick_cb_arg;
+}
+
+void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)
+{
+    rtor->poll_r = *r;
+}
+
+void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)
+{
+    rtor->poll_w = *w;
+}
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor)
+{
+    return &rtor->poll_r;
+}
+
+const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor)
+{
+    return &rtor->poll_w;
+}
+
+int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor)
+{
+    return rtor->want_net_read;
+}
+
+int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor)
+{
+    return rtor->want_net_write;
+}
+
+OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)
+{
+    return rtor->tick_deadline;
+}
+
+int ossl_quic_reactor_tick(QUIC_REACTOR *rtor)
+{
+    QUIC_TICK_RESULT res = {0};
+
+    /*
+     * Note that the tick callback cannot fail; this is intentional. Arguably it
+     * does not make that much sense for ticking to 'fail' (in the sense of an
+     * explicit error indicated to the user) because ticking is by its nature
+     * best effort. If something fatal happens with a connection we can report
+     * it on the next actual application I/O call.
+     */
+    rtor->tick_cb(&res, rtor->tick_cb_arg);
+
+    rtor->want_net_read     = res.want_net_read;
+    rtor->want_net_write    = res.want_net_write;
+    rtor->tick_deadline     = res.tick_deadline;
+    return 1;
+}
+
+/*
+ * Blocking I/O Adaptation Layer
+ * =============================
+ */
+
+/*
+ * Utility which can be used to poll on up to two FDs. This is designed to
+ * support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where
+ * different FDs are used for read and write).
+ *
+ * Generally use of poll(2) is preferred where available. Windows, however,
+ * hasn't traditionally offered poll(2), only select(2). WSAPoll() was
+ * introduced in Vista but has seemingly been buggy until relatively recent
+ * versions of Windows 10. Moreover we support XP so this is not a suitable
+ * target anyway. However, the traditional issues with select(2) turn out not to
+ * be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of
+ * FDs (and thus is limited in the magnitude of the FDs expressible), Windows
+ * select(2) is very different. In Windows, socket handles are not allocated
+ * contiguously from zero and thus this bitmap approach was infeasible. Thus in
+ * adapting the Berkeley sockets API to Windows a different approach was taken
+ * whereby the fd_set contains a fixed length array of socket handles and an
+ * integer indicating how many entries are valid; thus Windows select()
+ * ironically is actually much more like *NIX poll(2) than *NIX select(2). In
+ * any case, this means that the relevant limit for Windows select() is the
+ * number of FDs being polled, not the magnitude of those FDs. Since we only
+ * poll for two FDs here, this limit does not concern us.
+ *
+ * Usage: rfd and wfd may be the same or different. Either or both may also be
+ * -1. If rfd_want_read is 1, rfd is polled for readability, and if
+ * wfd_want_write is 1, wfd is polled for writability. Note that since any
+ * passed FD is always polled for error conditions, setting rfd_want_read=0 and
+ * wfd_want_write=0 is not the same as passing -1 for both FDs.
+ *
+ * deadline is a timestamp to return at. If it is ossl_time_infinite(), the call
+ * never times out.
+ *
+ * Returns 0 on error and 1 on success. Timeout expiry is considered a success
+ * condition. We don't elaborate our return values here because the way we are
+ * actually using this doesn't currently care.
+ */
+static int poll_two_fds(int rfd, int rfd_want_read,
+                        int wfd, int wfd_want_write,
+                        OSSL_TIME deadline)
+{
+#if defined(OSSL_SYS_WINDOWS) || !defined(POLLIN)
+    fd_set rfd_set, wfd_set, efd_set;
+    OSSL_TIME now, timeout;
+    struct timeval tv, *ptv;
+    int maxfd, pres;
+
+#ifndef OSSL_SYS_WINDOWS
+    /*
+     * On Windows there is no relevant limit to the magnitude of a fd value (see
+     * above). On *NIX the fd_set uses a bitmap and we must check the limit.
+     */
+    if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)
+        return 0;
+#endif
+
+    FD_ZERO(&rfd_set);
+    FD_ZERO(&wfd_set);
+    FD_ZERO(&efd_set);
+
+    if (rfd != -1 && rfd_want_read)
+        openssl_fdset(rfd, &rfd_set);
+    if (wfd != -1 && wfd_want_write)
+        openssl_fdset(wfd, &wfd_set);
+
+    /* Always check for error conditions. */
+    if (rfd != -1)
+        openssl_fdset(rfd, &efd_set);
+    if (wfd != -1)
+        openssl_fdset(wfd, &efd_set);
+
+    maxfd = rfd;
+    if (wfd > maxfd)
+        maxfd = wfd;
+
+    if (rfd == -1 && wfd == -1 && ossl_time_is_infinite(deadline))
+        /* Do not block forever; should not happen. */
+        return 0;
+
+    do {
+        /*
+         * select expects a timeout, not a deadline, so do the conversion.
+         * Update for each call to ensure the correct value is used if we repeat
+         * due to EINTR.
+         */
+        if (ossl_time_is_infinite(deadline)) {
+            ptv = NULL;
+        } else {
+            now = ossl_time_now();
+            /*
+             * ossl_time_subtract saturates to zero so we don't need to check if
+             * now > deadline.
+             */
+            timeout = ossl_time_subtract(deadline, now);
+            tv      = ossl_time_to_timeval(timeout);
+            ptv     = &tv;
+        }
+
+        pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);
+    } while (pres == -1 && get_last_socket_error_is_eintr());
+
+    return pres < 0 ? 0 : 1;
+#else
+    int pres, timeout_ms;
+    OSSL_TIME now, timeout;
+    struct pollfd pfds[2] = {0};
+    size_t npfd = 0;
+
+    if (rfd == wfd) {
+        pfds[npfd].fd = rfd;
+        pfds[npfd].events = (rfd_want_read  ? POLLIN  : 0)
+                          | (wfd_want_write ? POLLOUT : 0);
+        if (rfd >= 0 && pfds[npfd].events != 0)
+            ++npfd;
+    } else {
+        pfds[npfd].fd     = rfd;
+        pfds[npfd].events = (rfd_want_read ? POLLIN : 0);
+        if (rfd >= 0 && pfds[npfd].events != 0)
+            ++npfd;
+
+        pfds[npfd].fd     = wfd;
+        pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);
+        if (wfd >= 0 && pfds[npfd].events != 0)
+            ++npfd;
+    }
+
+    if (npfd == 0 && ossl_time_is_infinite(deadline))
+        /* Do not block forever; should not happen. */
+        return 0;
+
+    do {
+        if (ossl_time_is_infinite(deadline)) {
+            timeout_ms = -1;
+        } else {
+            now         = ossl_time_now();
+            timeout     = ossl_time_subtract(deadline, now);
+            timeout_ms  = ossl_time2ms(timeout);
+        }
+
+        pres = poll(pfds, npfd, timeout_ms);
+    } while (pres == -1 && get_last_socket_error_is_eintr());
+
+    return pres < 0 ? 0 : 1;
+#endif
+}
+
+static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)
+{
+    if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {
+        *fd = -1;
+        return 1;
+    }
+
+    if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD || d->value.fd < 0)
+        return 0;
+
+    *fd = d->value.fd;
+    return 1;
+}
+
+/*
+ * Poll up to two abstract poll descriptors. Currently we only support
+ * poll descriptors which represent FDs.
+ */
+static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,
+                                const BIO_POLL_DESCRIPTOR *w, int w_want_write,
+                                OSSL_TIME deadline)
+{
+    int rfd, wfd;
+
+    if (!poll_descriptor_to_fd(r, &rfd)
+        || !poll_descriptor_to_fd(w, &wfd))
+        return 0;
+
+    return poll_two_fds(rfd, r_want_read, wfd, w_want_write, deadline);
+}
+
+int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
+                                       int (*pred)(void *arg), void *pred_arg,
+                                       uint32_t flags)
+{
+    int res;
+
+    for (;;) {
+        if ((flags & SKIP_FIRST_TICK) != 0)
+            flags &= ~SKIP_FIRST_TICK;
+        else
+            /* best effort */
+            ossl_quic_reactor_tick(rtor);
+
+        if ((res = pred(pred_arg)) != 0)
+            return res;
+
+        if (!poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),
+                                  ossl_quic_reactor_want_net_read(rtor),
+                                  ossl_quic_reactor_get_poll_w(rtor),
+                                  ossl_quic_reactor_want_net_write(rtor),
+                                  ossl_quic_reactor_get_tick_deadline(rtor)))
+            /*
+             * We don't actually care why the call succeeded (timeout, FD
+             * readiness), we just call reactor_tick and start trying to do I/O
+             * things again. If poll_two_fds returns 0, this is some other
+             * non-timeout failure and we should stop here.
+             *
+             * TODO(QUIC): In the future we could avoid unnecessary syscalls by
+             * not retrying network I/O that isn't ready based on the result of
+             * the poll call. However this might be difficult because it
+             * requires we do the call to poll(2) or equivalent syscall
+             * ourselves, whereas in the general case the application does the
+             * polling and just calls SSL_tick(). Implementing this optimisation
+             * in the future will probably therefore require API changes.
+             */
+            return 0;
+    }
+}
-- 
cgit v1.2.3