1 files changed, 2018 insertions, 0 deletions
diff --git a/ssl/quic/quic_ackm.c b/ssl/quic/quic_ackm.c
new file mode 100644
index 0000000000..670b2d316e
--- /dev/null
+++ b/ssl/quic/quic_ackm.c
@@ -0,0 +1,2018 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/quic_ackm.h"
+#include "internal/common.h"
+#include <assert.h>
+
+/*
+ * TX Packet History
+ * *****************
+ *
+ * The TX Packet History object tracks information about packets which have been
+ * sent for which we later expect to receive an ACK. It is essentially a simple
+ * database keeping a list of packet information structures in packet number
+ * order which can also be looked up directly by packet number.
+ *
+ * We currently only allow packets to be appended to the list (i.e. the packet
+ * numbers of the packets appended to the list must monotonically increase), as
+ * we should not currently need more general functionality such as a sorted list
+ * insert.
+ */
+struct tx_pkt_history_st {
+    /* A linked list of all our packets. */
+    OSSL_ACKM_TX_PKT *head, *tail;
+
+    /* Number of packets in the list. */
+    size_t num_packets;
+
+    /*
+     * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
+     *
+     * Invariant: A packet is in this map if and only if it is in the linked
+     *            list.
+     */
+    LHASH_OF(OSSL_ACKM_TX_PKT) *map;
+
+    /*
+     * The lowest packet number which may currently be added to the history list
+     * (inclusive). We do not allow packet numbers to be added to the history
+     * list non-monotonically, so packet numbers must be greater than or equal
+     * to this value.
+     */
+    uint64_t watermark;
+
+    /*
+     * Packet number of the highest packet info structure we have yet appended
+     * to the list. This is usually one less than watermark, except when we have
+     * not added any packet yet.
+     */
+    uint64_t highest_sent;
+};
+
+DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT);
+
+static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt)
+{
+    return pkt->pkt_num;
+}
+
+static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a,
+                               const OSSL_ACKM_TX_PKT *b)
+{
+    if (a->pkt_num < b->pkt_num)
+        return -1;
+    if (a->pkt_num > b->pkt_num)
+        return 1;
+    return 0;
+}
+
+static int
+tx_pkt_history_init(struct tx_pkt_history_st *h)
+{
+    h->head = h->tail = NULL;
+    h->num_packets  = 0;
+    h->watermark    = 0;
+    h->highest_sent = 0;
+
+    h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare);
+    if (h->map == NULL)
+        return 0;
+
+    return 1;
+}
+
+static void
+tx_pkt_history_destroy(struct tx_pkt_history_st *h)
+{
+    lh_OSSL_ACKM_TX_PKT_free(h->map);
+    h->map = NULL;
+    h->head = h->tail = NULL;
+}
+
+static int
+tx_pkt_history_add_actual(struct tx_pkt_history_st *h,
+                          OSSL_ACKM_TX_PKT *pkt)
+{
+    OSSL_ACKM_TX_PKT *existing;
+
+    /*
+     * There should not be any existing packet with this number
+     * in our mapping.
+     */
+    existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt);
+    if (!ossl_assert(existing == NULL))
+        return 0;
+
+    /* Should not already be in a list. */
+    if (!ossl_assert(pkt->next == NULL && pkt->prev == NULL))
+        return 0;
+
+    lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt);
+
+    pkt->next = NULL;
+    pkt->prev = h->tail;
+    if (h->tail != NULL)
+        h->tail->next = pkt;
+    h->tail = pkt;
+    if (h->head == NULL)
+        h->head = h->tail;
+
+    ++h->num_packets;
+    return 1;
+}
+
+/* Adds a packet information structure to the history list. */
+static int
+tx_pkt_history_add(struct tx_pkt_history_st *h,
+                   OSSL_ACKM_TX_PKT *pkt)
+{
+    if (!ossl_assert(pkt->pkt_num >= h->watermark))
+        return 0;
+
+    if (tx_pkt_history_add_actual(h, pkt) < 1)
+        return 0;
+
+    h->watermark    = pkt->pkt_num + 1;
+    h->highest_sent = pkt->pkt_num;
+    return 1;
+}
+
+/* Retrieve a packet information structure by packet number. */
+static OSSL_ACKM_TX_PKT *
+tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num)
+{
+    OSSL_ACKM_TX_PKT key;
+
+    key.pkt_num = pkt_num;
+
+    return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key);
+}
+
+/* Remove a packet information structure from the history log. */
+static int
+tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num)
+{
+    OSSL_ACKM_TX_PKT key, *pkt;
+    key.pkt_num = pkt_num;
+
+    pkt = tx_pkt_history_by_pkt_num(h, pkt_num);
+    if (pkt == NULL)
+        return 0;
+
+    if (pkt->prev != NULL)
+        pkt->prev->next = pkt->next;
+    if (pkt->next != NULL)
+        pkt->next->prev = pkt->prev;
+    if (h->head == pkt)
+        h->head = pkt->next;
+    if (h->tail == pkt)
+        h->tail = pkt->prev;
+
+    pkt->prev = pkt->next = NULL;
+
+    lh_OSSL_ACKM_TX_PKT_delete(h->map, &key);
+    --h->num_packets;
+    return 1;
+}
+
+/*
+ * RX Packet Number Tracking
+ * *************************
+ *
+ * **Background.** The RX side of the ACK manager must track packets we have
+ * received for which we have to generate ACK frames. Broadly, this means we
+ * store a set of packet numbers which we have received but which we do not know
+ * for a fact that the transmitter knows we have received.
+ *
+ * This must handle various situations:
+ *
+ *   1. We receive a packet but have not sent an ACK yet, so the transmitter
+ *      does not know whether we have received it or not yet.
+ *
+ *   2. We receive a packet and send an ACK which is lost. We do not
+ *      immediately know that the ACK was lost and the transmitter does not know
+ *      that we have received the packet.
+ *
+ *   3. We receive a packet and send an ACK which is received by the
+ *      transmitter. The transmitter does not immediately respond with an ACK,
+ *      or responds with an ACK which is lost. The transmitter knows that we
+ *      have received the packet, but we do not know for sure that it knows,
+ *      because the ACK we sent could have been lost.
+ *
+ *   4. We receive a packet and send an ACK which is received by the
+ *      transmitter. The transmitter subsequently sends us an ACK which confirms
+ *      its receipt of the ACK we sent, and we successfully receive that ACK, so
+ *      we know that the transmitter knows, that we received the original
+ *      packet.
+ *
+ * Only when we reach case (4) are we relieved of any need to track a given
+ * packet number we have received, because only in this case do we know for sure
+ * that the peer knows we have received the packet. Having reached case (4) we
+ * will never again need to generate an ACK containing the PN in question, but
+ * until we reach that point, we must keep track of the PN as not having been
+ * provably ACKed, as we may have to keep generating ACKs for the given PN not
+ * just until the transmitter receives one, but until we know that it has
+ * received one. This will be referred to herein as "provably ACKed".
+ *
+ * **Duplicate handling.** The above discusses the case where we have received a
+ * packet with a given PN but are at best unsure whether the sender knows we
+ * have received it or not. However, we must also handle the case where we have
+ * yet to receive a packet with a given PN in the first place. The reason for
+ * this is because of the requirement expressed by RFC 9000 s. 12.3:
+ *
+ *   "A receiver MUST discard a newly unprotected packet unless it is certain
+ *    that it has not processed another packet with the same packet number from
+ *    the same packet number space."
+ *
+ * We must ensure we never process a duplicate PN. As such, each possible PN we
+ * can receive must exist in one of the following logical states:
+ *
+ *   - We have never processed this PN before
+ *     (so if we receive such a PN, it can be processed)
+ *
+ *   - We have processed this PN but it has not yet been provably ACKed
+ *     (and should therefore be in any future ACK frame generated;
+ *      if we receive such a PN again, it must be ignored)
+ *
+ *   - We have processed this PN and it has been provably ACKed
+ *     (if we receive such a PN again, it must be ignored)
+ *
+ * However, if we were to track this state for every PN ever used in the history
+ * of a connection, the amount of state required would increase unboundedly as
+ * the connection goes on (for example, we would have to store a set of every PN
+ * ever received.)
+ *
+ * RFC 9000 s. 12.3 continues:
+ *
+ *   "Endpoints that track all individual packets for the purposes of detecting
+ *    duplicates are at risk of accumulating excessive state. The data required
+ *    for detecting duplicates can be limited by maintaining a minimum packet
+ *    number below which all packets are immediately dropped."
+ *
+ * Moreover, RFC 9000 s. 13.2.3 states that:
+ *
+ *   "A receiver MUST retain an ACK Range unless it can ensure that it will not
+ *    subsequently accept packets with numbers in that range. Maintaining a
+ *    minimum packet number that increases as ranges are discarded is one way to
+ *    achieve this with minimal state."
+ *
+ * This touches on a subtlety of the original requirement quoted above: the
+ * receiver MUST discard a packet unless it is certain that it has not processed
+ * another packet with the same PN. However, this does not forbid the receiver
+ * from also discarding some PNs even though it has not yet processed them. In
+ * other words, implementations must be conservative and err in the direction of
+ * assuming a packet is a duplicate, but it is acceptable for this to come at
+ * the cost of falsely identifying some packets as duplicates.
+ *
+ * This allows us to bound the amount of state we must keep, and we adopt the
+ * suggested strategy quoted above to do so. We define a watermark PN below
+ * which all PNs are in the same state. This watermark is only ever increased.
+ * Thus the PNs the state for which needs to be explicitly tracked is limited to
+ * only a small number of recent PNs, and all older PNs have an assumed state.
+ *
+ * Any given PN thus falls into one of the following states:
+ *
+ *   - (A) The PN is above the watermark but we have not yet received it.
+ *
+ *         If we receive such a PN, we should process it and record the PN as
+ *         received.
+ *
+ *   - (B) The PN is above the watermark and we have received it.
+ *
+ *         The PN should be included in any future ACK frame we generate.
+ *         If we receive such a PN again, we should ignore it.
+ *
+ *   - (C) The PN is below the watermark.
+ *
+ *         We do not know whether a packet with the given PN was received or
+ *         not. To be safe, if we receive such a packet, it is not processed.
+ *
+ * Note that state (C) corresponds to both "we have processed this PN and it has
+ * been provably ACKed" logical state and a subset of the PNs in the "we have
+ * never processed this PN before" logical state (namely all PNs which were lost
+ * and never received, but which are not recent enough to be above the
+ * watermark). The reason we can merge these states and avoid tracking states
+ * for the PNs in this state is because the provably ACKed and never-received
+ * states are functionally identical in terms of how we need to handle them: we
+ * don't need to do anything for PNs in either of these states, so we don't have
+ * to care about PNs in this state nor do we have to care about distinguishing
+ * the two states for a given PN.
+ *
+ * Note that under this scheme provably ACKed PNs are by definition always below
+ * the watermark; therefore, it follows that when a PN becomes provably ACKed,
+ * the watermark must be immediately increased to exceed it (otherwise we would
+ * keep reporting it in future ACK frames).
+ *
+ * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
+ * to advance the watermark:
+ *
+ *   "When a packet containing an ACK frame is sent, the Largest Acknowledged
+ *    field in that frame can be saved. When a packet containing an ACK frame is
+ *    acknowledged, the receiver can stop acknowledging packets less than or
+ *    equal to the Largest Acknowledged field in the sent ACK frame."
+ *
+ * This is where our scheme's false positives arise. When a packet containing an
+ * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
+ * acked, and the watermark is bumped accordingly. However, the Largest
+ * Acknowledged field does not imply that all lower PNs have been received,
+ * because there may be gaps expressed in the ranges of PNs expressed by that
+ * and previous ACK frames. Thus, some unreceived PNs may be moved below the
+ * watermark, and we may subsequently reject those PNs as possibly being
+ * duplicates even though we have not actually received those PNs. Since we bump
+ * the watermark when a PN becomes provably ACKed, it follows that an unreceived
+ * PN falls below the watermark (and thus becomes a false positive for the
+ * purposes of duplicate detection) when a higher-numbered PN becomes provably
+ * ACKed.
+ *
+ * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
+ * n) will no longer be processed. Although datagrams may be reordered in the
+ * network, a PN we receive can only become provably ACKed after our own
+ * subsequently generated ACK frame is sent in a future TX packet, and then we
+ * receive another RX PN acknowleding that TX packet. This means that a given RX
+ * PN can only become provably ACKed at least 1 RTT after it is received; it is
+ * unlikely that any reordered datagrams will still be "in the network" (and not
+ * lost) by this time. If this does occur for whatever reason and a late PN is
+ * received, the packet will be discarded unprocessed and the PN is simply
+ * handled as though lost (a "written off" PN).
+ *
+ * **Data structure.** Our state for the RX handling side of the ACK manager, as
+ * discussed above, mainly comprises:
+ *
+ *   a) a logical set of PNs, and
+ *   b) a monotonically increasing PN counter (the watermark).
+ *
+ * For (a), we define a data structure which stores a logical set of PNs, which
+ * we use to keep track of which PNs we have received but which have not yet
+ * been provably ACKed, and thus will later need to generate an ACK frame for.
+ *
+ * The correspondance with the logical states discussed above is as follows. A
+ * PN is in state (C) if it is below the watermark; otherwise it is in state (B)
+ * if it is in the logical set of PNs, and in state (A) otherwise.
+ *
+ * Note that PNs are only removed from the PN set (when they become provably
+ * ACKed or written off) by virtue of advancement of the watermark. Removing PNs
+ * from the PN set any other way would be ambiguous as it would be
+ * indistinguishable from a PN we have not yet received and risk us processing a
+ * duplicate packet. In other words, for a given PN:
+ *
+ *   - State (A) can transition to state (B) or (C)
+ *   - State (B) can transition to state (C) only
+ *   - State (C) is the terminal state
+ *
+ * We can query the logical set data structure for PNs which have been received
+ * but which have not been provably ACKed when we want to generate ACK frames.
+ * Since ACK frames can be lost and/or we might not know that the peer has
+ * successfully received them, we might generate multiple ACK frames covering a
+ * given PN until that PN becomes provably ACKed and we finally remove it from
+ * our set (by bumping the watermark) as no longer being our concern.
+ *
+ * The data structure supports the following operations:
+ *
+ *   Insert Range: Adds an inclusive range of packet numbers [start, end]
+ *                 to the set. Equivalent to Insert for each number
+ *                 in the range. (Used when we receive a new PN.)
+ *
+ *   Remove Range: Removes an inclusive range of packet numbers [start, end]
+ *                 from the set. Not all of the range need already be in
+ *                 the set, but any part of the range in the set is removed.
+ *                 (Used when bumping the watermark.)
+ *
+ *   Query:        Is a PN in the data structure?
+ *
+ * The data structure can be iterated.
+ *
+ * For greater efficiency in tracking large numbers of contiguous PNs, we track
+ * PN ranges rather than individual PNs. The data structure manages a list of PN
+ * ranges [[start, end]...]. Internally this is implemented as a doubly linked
+ * sorted list of range structures, which are automatically split and merged as
+ * necessary.
+ *
+ * This data structure requires O(n) traversal of the list for insertion,
+ * removal and query when we are not adding/removing ranges which are near the
+ * beginning or end of the set of ranges. It is expected that the number of PN
+ * ranges needed at any given time will generally be small and that most
+ * operations will be close to the beginning or end of the range.
+ *
+ * Invariant: The data structure is always sorted in ascending order by PN.
+ *
+ * Invariant: No two adjacent ranges ever 'border' one another (have no
+ *            numerical gap between them) as the data structure always ensures
+ *            such ranges are merged.
+ *
+ * Invariant: No two ranges ever overlap.
+ *
+ * Invariant: No range [a, b] ever has a > b.
+ *
+ * Invariant: Since ranges are represented using inclusive bounds, no range
+ *            item inside the data structure can represent a span of zero PNs.
+ *
+ * **Possible duplicates.** A PN is considered a possible duplicate when either:
+ *
+ *  a) its PN is already in the PN set (i.e. has already been received), or
+ *  b) its PN is below the watermark (i.e. was provably ACKed or written off).
+ *
+ * A packet with a given PN is considered 'processable' when that PN is not
+ * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
+ *
+ * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
+ * provably ACKed. This occurs when an ACK frame is received by the TX side of
+ * the ACK manager; thus, there is necessary interaction between the TX and RX
+ * sides of the ACK manager.
+ *
+ * This is implemented as follows. When a packet is queued as sent in the TX
+ * side of the ACK manager, it may optionally have a Largest Acked value set on
+ * it. The user of the ACK manager should do this if the packet being
+ * transmitted contains an ACK frame, by setting the field to the Largest Acked
+ * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
+ * When a TX packet is eventually acknowledged which has this field set, it is
+ * used to update the state of the RX side of the ACK manager by bumping the
+ * watermark accordingly.
+ */
+struct pn_set_item_st {
+    struct pn_set_item_st *prev, *next;
+    OSSL_QUIC_ACK_RANGE    range;
+};
+
+struct pn_set_st {
+    struct pn_set_item_st *head, *tail;
+
+    /* Number of ranges (not PNs) in the set. */
+    size_t                 num_ranges;
+};
+
+static void pn_set_init(struct pn_set_st *s)
+{
+    s->head = s->tail = NULL;
+    s->num_ranges = 0;
+}
+
+static void pn_set_destroy(struct pn_set_st *s)
+{
+    struct pn_set_item_st *x, *xnext;
+
+    for (x = s->head; x != NULL; x = xnext) {
+        xnext = x->next;
+        OPENSSL_free(x);
+    }
+}
+
+/* Possible merge of x, x->prev */
+static void pn_set_merge_adjacent(struct pn_set_st *s, struct pn_set_item_st *x)
+{
+    struct pn_set_item_st *xprev = x->prev;
+
+    if (xprev == NULL)
+        return;
+
+    if (x->range.start - 1 != xprev->range.end)
+        return;
+
+    x->range.start = xprev->range.start;
+    x->prev = xprev->prev;
+    if (x->prev != NULL)
+        x->prev->next = x;
+
+    if (s->head == xprev)
+        s->head = x;
+
+    OPENSSL_free(xprev);
+    --s->num_ranges;
+}
+
+/* Returns 1 if there exists a PN x which falls within both ranges a and b. */
+static int pn_range_overlaps(const OSSL_QUIC_ACK_RANGE *a,
+                             const OSSL_QUIC_ACK_RANGE *b)
+{
+    return ossl_quic_pn_min(a->end, b->end)
+        >= ossl_quic_pn_max(a->start, b->start);
+}
+
+/*
+ * Insert a range into a PN set. Returns 0 on allocation failure, in which case
+ * the PN set is in a valid but undefined state. Otherwise, returns 1. Ranges
+ * can overlap existing ranges without limitation. If a range is a subset of
+ * an existing range in the set, this is a no-op and returns 1.
+ */
+static int pn_set_insert(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
+{
+    struct pn_set_item_st *x, *z, *xnext, *f, *fnext;
+    QUIC_PN start = range->start, end = range->end;
+
+    if (!ossl_assert(start <= end))
+        return 0;
+
+    if (s->head == NULL) {
+        /* Nothing in the set yet, so just add this range. */
+        x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+        if (x == NULL)
+            return 0;
+
+        x->range.start = start;
+        x->range.end   = end;
+        s->head = s->tail = x;
+        ++s->num_ranges;
+        return 1;
+    }
+
+    if (start > s->tail->range.end) {
+        /*
+         * Range is after the latest range in the set, so append.
+         *
+         * Note: The case where the range is before the earliest range in the
+         * set is handled as a degenerate case of the final case below. See
+         * optimization note (*) below.
+         */
+        if (s->tail->range.end + 1 == start) {
+            s->tail->range.end = end;
+            return 1;
+        }
+
+        x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+        if (x == NULL)
+            return 0;
+
+        x->range.start = start;
+        x->range.end   = end;
+        x->prev        = s->tail;
+        if (s->tail != NULL)
+            s->tail->next = x;
+        s->tail = x;
+        ++s->num_ranges;
+        return 1;
+    }
+
+    if (start <= s->head->range.start && end >= s->tail->range.end) {
+        /*
+         * New range dwarfs all ranges in our set.
+         *
+         * Free everything except the first range in the set, which we scavenge
+         * and reuse.
+         */
+        for (x = s->head->next; x != NULL; x = xnext) {
+            xnext = x->next;
+            OPENSSL_free(x);
+        }
+
+        s->head->range.start = start;
+        s->head->range.end   = end;
+        s->head->next = s->head->prev = NULL;
+        s->tail = s->head;
+        s->num_ranges = 1;
+        return 1;
+    }
+
+    /*
+     * Walk backwards since we will most often be inserting at the end. As an
+     * optimization, test the head node first and skip iterating over the
+     * entire list if we are inserting at the start. The assumption is that
+     * insertion at the start and end of the space will be the most common
+     * operations. (*)
+     */
+    z = end < s->head->range.start ? s->head : s->tail;
+
+    for (; z != NULL; z = z->prev) {
+        /* An existing range dwarfs our new range (optimisation). */
+        if (z->range.start <= start && z->range.end >= end)
+            return 1;
+
+        if (pn_range_overlaps(&z->range, range)) {
+            /*
+             * Our new range overlaps an existing range, or possibly several
+             * existing ranges.
+             */
+            struct pn_set_item_st *ovend = z;
+            OSSL_QUIC_ACK_RANGE t;
+            size_t n = 0;
+
+            t.end = ossl_quic_pn_max(end, z->range.end);
+
+            /* Get earliest overlapping range. */
+            for (; z->prev != NULL && pn_range_overlaps(&z->prev->range, range);
+                   z = z->prev);
+
+            t.start = ossl_quic_pn_min(start, z->range.start);
+
+            /* Replace sequence of nodes z..ovend with ovend only. */
+            ovend->range = t;
+            ovend->prev = z->prev;
+            if (z->prev != NULL)
+                z->prev->next = ovend;
+            if (s->head == z)
+                s->head = ovend;
+
+            /* Free now unused nodes. */
+            for (f = z; f != ovend; f = fnext, ++n) {
+                fnext = f->next;
+                OPENSSL_free(f);
+            }
+
+            s->num_ranges -= n;
+            break;
+        } else if (end < z->range.start
+                    && (z->prev == NULL || start > z->prev->range.end)) {
+            if (z->range.start == end + 1) {
+                /* We can extend the following range backwards. */
+                z->range.start = start;
+
+                /*
+                 * If this closes a gap we now need to merge
+                 * consecutive nodes.
+                 */
+                pn_set_merge_adjacent(s, z);
+            } else if (z->prev != NULL && z->prev->range.end + 1 == start) {
+                /* We can extend the preceding range forwards. */
+                z->prev->range.end = end;
+
+                /*
+                 * If this closes a gap we now need to merge
+                 * consecutive nodes.
+                 */
+                pn_set_merge_adjacent(s, z);
+            } else {
+                /*
+                 * The new interval is between intervals without overlapping or
+                 * touching them, so insert between, preserving sort.
+                 */
+                x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+                if (x == NULL)
+                    return 0;
+
+                x->range.start = start;
+                x->range.end   = end;
+
+                x->next = z;
+                x->prev = z->prev;
+                if (x->prev != NULL)
+                    x->prev->next = x;
+                z->prev = x;
+                if (s->head == z)
+                    s->head = x;
+
+                ++s->num_ranges;
+            }
+            break;
+        }
+    }
+
+    return 1;
+}
+
+/*
+ * Remove a range from the set. Returns 0 on allocation failure, in which case
+ * the PN set is unchanged. Otherwise, returns 1. Ranges which are not already
+ * in the set can be removed without issue. If a passed range is not in the PN
+ * set at all, this is a no-op and returns 1.
+ */
+static int pn_set_remove(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
+{
+    struct pn_set_item_st *z, *zprev, *y;
+    QUIC_PN start = range->start, end = range->end;
+
+    if (!ossl_assert(start <= end))
+        return 0;
+
+    /* Walk backwards since we will most often be removing at the end. */
+    for (z = s->tail; z != NULL; z = zprev) {
+        zprev = z->prev;
+
+        if (start > z->range.end)
+            /* No overlapping ranges can exist beyond this point, so stop. */
+            break;
+
+        if (start <= z->range.start && end >= z->range.end) {
+            /*
+             * The range being removed dwarfs this range, so it should be
+             * removed.
+             */
+            if (z->next != NULL)
+                z->next->prev = z->prev;
+            if (z->prev != NULL)
+                z->prev->next = z->next;
+            if (s->head == z)
+                s->head = z->next;
+            if (s->tail == z)
+                s->tail = z->prev;
+
+            OPENSSL_free(z);
+            --s->num_ranges;
+        } else if (start <= z->range.start) {
+            /*
+             * The range being removed includes start of this range, but does
+             * not cover the entire range (as this would be caught by the case
+             * above). Shorten the range.
+             */
+            assert(end < z->range.end);
+            z->range.start = end + 1;
+        } else if (end >= z->range.end) {
+            /*
+             * The range being removed includes the end of this range, but does
+             * not cover the entire range (as this would be caught by the case
+             * above). Shorten the range. We can also stop iterating.
+             */
+            assert(start > z->range.start);
+            assert(start > 0);
+            z->range.end = start - 1;
+            break;
+        } else if (start > z->range.start && end < z->range.end) {
+            /*
+             * The range being removed falls entirely in this range, so cut it
+             * into two. Cases where a zero-length range would be created are
+             * handled by the above cases.
+             */
+            y = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+            if (y == NULL)
+                return 0;
+
+            y->range.end   = z->range.end;
+            y->range.start = end + 1;
+            y->next = z->next;
+            y->prev = z;
+            if (y->next != NULL)
+                y->next->prev = y;
+
+            z->range.end = start - 1;
+            z->next = y;
+
+            if (s->tail == z)
+                s->tail = y;
+
+            ++s->num_ranges;
+            break;
+        } else {
+            /* Assert no partial overlap; all cases should be covered above. */
+            assert(!pn_range_overlaps(&z->range, range));
+        }
+    }
+
+     return 1;
+}
+
+/* Returns 1 iff the given PN is in the PN set. */
+static int pn_set_query(const struct pn_set_st *s, QUIC_PN pn)
+{
+    struct pn_set_item_st *x;
+
+    if (s->head == NULL)
+        return 0;
+
+    for (x = s->tail; x != NULL; x = x->prev)
+        if (x->range.start <= pn && x->range.end >= pn)
+            return 1;
+        else if (x->range.end < pn)
+            return 0;
+
+    return 0;
+}
+
+struct rx_pkt_history_st {
+    struct pn_set_st set;
+
+    /*
+     * Invariant: PNs below this are not in the set.
+     * Invariant: This is monotonic and only ever increases.
+     */
+    QUIC_PN watermark;
+};
+
+static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
+                                         QUIC_PN watermark);
+
+static void rx_pkt_history_init(struct rx_pkt_history_st *h)
+{
+    pn_set_init(&h->set);
+    h->watermark = 0;
+}
+
+static void rx_pkt_history_destroy(struct rx_pkt_history_st *h)
+{
+    pn_set_destroy(&h->set);
+}
+
+/*
+ * Limit the number of ACK ranges we store to prevent resource consumption DoS
+ * attacks.
+ */
+#define MAX_RX_ACK_RANGES   32
+
+static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h)
+{
+    QUIC_PN highest = QUIC_PN_INVALID;
+
+    while (h->set.num_ranges > MAX_RX_ACK_RANGES) {
+        OSSL_QUIC_ACK_RANGE r = h->set.head->range;
+
+        highest = (highest == QUIC_PN_INVALID)
+            ? r.end : ossl_quic_pn_max(highest, r.end);
+
+        pn_set_remove(&h->set, &r);
+    }
+
+    /*
+     * Bump watermark to cover all PNs we removed to avoid accidential
+     * reprocessing of packets.
+     */
+    if (highest != QUIC_PN_INVALID)
+        rx_pkt_history_bump_watermark(h, highest + 1);
+}
+
+static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h,
+                                 QUIC_PN pn)
+{
+    OSSL_QUIC_ACK_RANGE r;
+
+    r.start = pn;
+    r.end   = pn;
+
+    if (pn < h->watermark)
+        return 1; /* consider this a success case */
+
+    if (pn_set_insert(&h->set, &r) != 1)
+        return 0;
+
+    rx_pkt_history_trim_range_count(h);
+    return 1;
+}
+
+static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
+                                         QUIC_PN watermark)
+{
+    OSSL_QUIC_ACK_RANGE r;
+
+    if (watermark <= h->watermark)
+        return 1;
+
+    /* Remove existing PNs below the watermark. */
+    r.start = 0;
+    r.end   = watermark - 1;
+    if (pn_set_remove(&h->set, &r) != 1)
+        return 0;
+
+    h->watermark = watermark;
+    return 1;
+}
+
+/*
+ * ACK Manager Implementation
+ * **************************
+ * Implementation of the ACK manager proper.
+ */
+
+/* Constants used by the ACK manager; see RFC 9002. */
+#define K_GRANULARITY           (1 * OSSL_TIME_MS)
+#define K_PKT_THRESHOLD         3
+#define K_TIME_THRESHOLD_NUM    9
+#define K_TIME_THRESHOLD_DEN    8
+
+/* The maximum number of times we allow PTO to be doubled. */
+#define MAX_PTO_COUNT          16
+
+struct ossl_ackm_st {
+    /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */
+    struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM];
+
+    /* Our list of received PNs which are not yet provably acked. */
+    struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM];
+
+    /* Polymorphic dependencies that we consume. */
+    OSSL_TIME             (*now)(void *arg);
+    void                   *now_arg;
+    OSSL_STATM             *statm;
+    const OSSL_CC_METHOD   *cc_method;
+    OSSL_CC_DATA           *cc_data;
+
+    /* RFC 9002 variables. */
+    uint32_t        pto_count;
+    QUIC_PN         largest_acked_pkt[QUIC_PN_SPACE_NUM];
+    OSSL_TIME       time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM];
+    OSSL_TIME       loss_time[QUIC_PN_SPACE_NUM];
+    OSSL_TIME       loss_detection_deadline;
+
+    /* Lowest PN which is still not known to be ACKed. */
+    QUIC_PN         lowest_unacked_pkt[QUIC_PN_SPACE_NUM];
+
+    /* Time at which we got our first RTT sample, or 0. */
+    OSSL_TIME       first_rtt_sample;
+
+    /*
+     * A packet's num_bytes are added to this if it is inflight,
+     * and removed again once ack'd/lost/discarded.
+     */
+    uint64_t        bytes_in_flight;
+
+    /*
+     * A packet's num_bytes are added to this if it is both inflight and
+     * ack-eliciting, and removed again once ack'd/lost/discarded.
+     */
+    uint64_t        ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM];
+
+    /* Count of ECN-CE events. */
+    uint64_t        peer_ecnce[QUIC_PN_SPACE_NUM];
+
+    /* Set to 1 when the handshake is confirmed. */
+    char            handshake_confirmed;
+
+    /* Set to 1 when the peer has completed address validation. */
+    char            peer_completed_addr_validation;
+
+    /* Set to 1 when a PN space has been discarded. */
+    char            discarded[QUIC_PN_SPACE_NUM];
+
+    /* Set to 1 when we think an ACK frame should be generated. */
+    char            rx_ack_desired[QUIC_PN_SPACE_NUM];
+
+    /* Set to 1 if an ACK frame has ever been generated. */
+    char            rx_ack_generated[QUIC_PN_SPACE_NUM];
+
+    /* Probe request counts for reporting to the user. */
+    OSSL_ACKM_PROBE_INFO    pending_probe;
+
+    /* Generated ACK frames for each PN space. */
+    OSSL_QUIC_FRAME_ACK     ack[QUIC_PN_SPACE_NUM];
+    OSSL_QUIC_ACK_RANGE     ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES];
+
+    /* Other RX state. */
+    /* Largest PN we have RX'd. */
+    QUIC_PN         rx_largest_pn[QUIC_PN_SPACE_NUM];
+
+    /* Time at which the PN in rx_largest_pn was RX'd. */
+    OSSL_TIME       rx_largest_time[QUIC_PN_SPACE_NUM];
+
+    /*
+     * ECN event counters. Each time we receive a packet with a given ECN label,
+     * the corresponding ECN counter here is incremented.
+     */
+    uint64_t        rx_ect0[QUIC_PN_SPACE_NUM];
+    uint64_t        rx_ect1[QUIC_PN_SPACE_NUM];
+    uint64_t        rx_ecnce[QUIC_PN_SPACE_NUM];
+
+    /*
+     * Number of ACK-eliciting packets since last ACK. We use this to defer
+     * emitting ACK frames until a threshold number of ACK-eliciting packets
+     * have been received.
+     */
+    uint32_t        rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM];
+
+    /*
+     * The ACK frame coalescing deadline at which we should flush any unsent ACK
+     * frames.
+     */
+    OSSL_TIME       rx_ack_flush_deadline[QUIC_PN_SPACE_NUM];
+
+    /* Callbacks for deadline updates. */
+    void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg);
+    void *loss_detection_deadline_cb_arg;
+
+    void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg);
+    void *ack_deadline_cb_arg;
+};
+
+static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y)
+{
+    return x < y ? x : y