summaryrefslogtreecommitdiffstats
path: root/ssl/quic/quic_ackm.c
diff options
context:
space:
mode:
Diffstat (limited to 'ssl/quic/quic_ackm.c')
-rw-r--r--ssl/quic/quic_ackm.c2018
1 files changed, 2018 insertions, 0 deletions
diff --git a/ssl/quic/quic_ackm.c b/ssl/quic/quic_ackm.c
new file mode 100644
index 0000000000..670b2d316e
--- /dev/null
+++ b/ssl/quic/quic_ackm.c
@@ -0,0 +1,2018 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/quic_ackm.h"
+#include "internal/common.h"
+#include <assert.h>
+
+/*
+ * TX Packet History
+ * *****************
+ *
+ * The TX Packet History object tracks information about packets which have been
+ * sent for which we later expect to receive an ACK. It is essentially a simple
+ * database keeping a list of packet information structures in packet number
+ * order which can also be looked up directly by packet number.
+ *
+ * We currently only allow packets to be appended to the list (i.e. the packet
+ * numbers of the packets appended to the list must monotonically increase), as
+ * we should not currently need more general functionality such as a sorted list
+ * insert.
+ */
+struct tx_pkt_history_st {
+ /* A linked list of all our packets. */
+ OSSL_ACKM_TX_PKT *head, *tail;
+
+ /* Number of packets in the list. */
+ size_t num_packets;
+
+ /*
+ * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
+ *
+ * Invariant: A packet is in this map if and only if it is in the linked
+ * list.
+ */
+ LHASH_OF(OSSL_ACKM_TX_PKT) *map;
+
+ /*
+ * The lowest packet number which may currently be added to the history list
+ * (inclusive). We do not allow packet numbers to be added to the history
+ * list non-monotonically, so packet numbers must be greater than or equal
+ * to this value.
+ */
+ uint64_t watermark;
+
+ /*
+ * Packet number of the highest packet info structure we have yet appended
+ * to the list. This is usually one less than watermark, except when we have
+ * not added any packet yet.
+ */
+ uint64_t highest_sent;
+};
+
+DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT);
+
+static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt)
+{
+ return pkt->pkt_num;
+}
+
+static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a,
+ const OSSL_ACKM_TX_PKT *b)
+{
+ if (a->pkt_num < b->pkt_num)
+ return -1;
+ if (a->pkt_num > b->pkt_num)
+ return 1;
+ return 0;
+}
+
+static int
+tx_pkt_history_init(struct tx_pkt_history_st *h)
+{
+ h->head = h->tail = NULL;
+ h->num_packets = 0;
+ h->watermark = 0;
+ h->highest_sent = 0;
+
+ h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare);
+ if (h->map == NULL)
+ return 0;
+
+ return 1;
+}
+
+static void
+tx_pkt_history_destroy(struct tx_pkt_history_st *h)
+{
+ lh_OSSL_ACKM_TX_PKT_free(h->map);
+ h->map = NULL;
+ h->head = h->tail = NULL;
+}
+
+static int
+tx_pkt_history_add_actual(struct tx_pkt_history_st *h,
+ OSSL_ACKM_TX_PKT *pkt)
+{
+ OSSL_ACKM_TX_PKT *existing;
+
+ /*
+ * There should not be any existing packet with this number
+ * in our mapping.
+ */
+ existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt);
+ if (!ossl_assert(existing == NULL))
+ return 0;
+
+ /* Should not already be in a list. */
+ if (!ossl_assert(pkt->next == NULL && pkt->prev == NULL))
+ return 0;
+
+ lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt);
+
+ pkt->next = NULL;
+ pkt->prev = h->tail;
+ if (h->tail != NULL)
+ h->tail->next = pkt;
+ h->tail = pkt;
+ if (h->head == NULL)
+ h->head = h->tail;
+
+ ++h->num_packets;
+ return 1;
+}
+
+/* Adds a packet information structure to the history list. */
+static int
+tx_pkt_history_add(struct tx_pkt_history_st *h,
+ OSSL_ACKM_TX_PKT *pkt)
+{
+ if (!ossl_assert(pkt->pkt_num >= h->watermark))
+ return 0;
+
+ if (tx_pkt_history_add_actual(h, pkt) < 1)
+ return 0;
+
+ h->watermark = pkt->pkt_num + 1;
+ h->highest_sent = pkt->pkt_num;
+ return 1;
+}
+
+/* Retrieve a packet information structure by packet number. */
+static OSSL_ACKM_TX_PKT *
+tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num)
+{
+ OSSL_ACKM_TX_PKT key;
+
+ key.pkt_num = pkt_num;
+
+ return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key);
+}
+
+/* Remove a packet information structure from the history log. */
+static int
+tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num)
+{
+ OSSL_ACKM_TX_PKT key, *pkt;
+ key.pkt_num = pkt_num;
+
+ pkt = tx_pkt_history_by_pkt_num(h, pkt_num);
+ if (pkt == NULL)
+ return 0;
+
+ if (pkt->prev != NULL)
+ pkt->prev->next = pkt->next;
+ if (pkt->next != NULL)
+ pkt->next->prev = pkt->prev;
+ if (h->head == pkt)
+ h->head = pkt->next;
+ if (h->tail == pkt)
+ h->tail = pkt->prev;
+
+ pkt->prev = pkt->next = NULL;
+
+ lh_OSSL_ACKM_TX_PKT_delete(h->map, &key);
+ --h->num_packets;
+ return 1;
+}
+
+/*
+ * RX Packet Number Tracking
+ * *************************
+ *
+ * **Background.** The RX side of the ACK manager must track packets we have
+ * received for which we have to generate ACK frames. Broadly, this means we
+ * store a set of packet numbers which we have received but which we do not know
+ * for a fact that the transmitter knows we have received.
+ *
+ * This must handle various situations:
+ *
+ * 1. We receive a packet but have not sent an ACK yet, so the transmitter
+ * does not know whether we have received it or not yet.
+ *
+ * 2. We receive a packet and send an ACK which is lost. We do not
+ * immediately know that the ACK was lost and the transmitter does not know
+ * that we have received the packet.
+ *
+ * 3. We receive a packet and send an ACK which is received by the
+ * transmitter. The transmitter does not immediately respond with an ACK,
+ * or responds with an ACK which is lost. The transmitter knows that we
+ * have received the packet, but we do not know for sure that it knows,
+ * because the ACK we sent could have been lost.
+ *
+ * 4. We receive a packet and send an ACK which is received by the
+ * transmitter. The transmitter subsequently sends us an ACK which confirms
+ * its receipt of the ACK we sent, and we successfully receive that ACK, so
+ * we know that the transmitter knows, that we received the original
+ * packet.
+ *
+ * Only when we reach case (4) are we relieved of any need to track a given
+ * packet number we have received, because only in this case do we know for sure
+ * that the peer knows we have received the packet. Having reached case (4) we
+ * will never again need to generate an ACK containing the PN in question, but
+ * until we reach that point, we must keep track of the PN as not having been
+ * provably ACKed, as we may have to keep generating ACKs for the given PN not
+ * just until the transmitter receives one, but until we know that it has
+ * received one. This will be referred to herein as "provably ACKed".
+ *
+ * **Duplicate handling.** The above discusses the case where we have received a
+ * packet with a given PN but are at best unsure whether the sender knows we
+ * have received it or not. However, we must also handle the case where we have
+ * yet to receive a packet with a given PN in the first place. The reason for
+ * this is because of the requirement expressed by RFC 9000 s. 12.3:
+ *
+ * "A receiver MUST discard a newly unprotected packet unless it is certain
+ * that it has not processed another packet with the same packet number from
+ * the same packet number space."
+ *
+ * We must ensure we never process a duplicate PN. As such, each possible PN we
+ * can receive must exist in one of the following logical states:
+ *
+ * - We have never processed this PN before
+ * (so if we receive such a PN, it can be processed)
+ *
+ * - We have processed this PN but it has not yet been provably ACKed
+ * (and should therefore be in any future ACK frame generated;
+ * if we receive such a PN again, it must be ignored)
+ *
+ * - We have processed this PN and it has been provably ACKed
+ * (if we receive such a PN again, it must be ignored)
+ *
+ * However, if we were to track this state for every PN ever used in the history
+ * of a connection, the amount of state required would increase unboundedly as
+ * the connection goes on (for example, we would have to store a set of every PN
+ * ever received.)
+ *
+ * RFC 9000 s. 12.3 continues:
+ *
+ * "Endpoints that track all individual packets for the purposes of detecting
+ * duplicates are at risk of accumulating excessive state. The data required
+ * for detecting duplicates can be limited by maintaining a minimum packet
+ * number below which all packets are immediately dropped."
+ *
+ * Moreover, RFC 9000 s. 13.2.3 states that:
+ *
+ * "A receiver MUST retain an ACK Range unless it can ensure that it will not
+ * subsequently accept packets with numbers in that range. Maintaining a
+ * minimum packet number that increases as ranges are discarded is one way to
+ * achieve this with minimal state."
+ *
+ * This touches on a subtlety of the original requirement quoted above: the
+ * receiver MUST discard a packet unless it is certain that it has not processed
+ * another packet with the same PN. However, this does not forbid the receiver
+ * from also discarding some PNs even though it has not yet processed them. In
+ * other words, implementations must be conservative and err in the direction of
+ * assuming a packet is a duplicate, but it is acceptable for this to come at
+ * the cost of falsely identifying some packets as duplicates.
+ *
+ * This allows us to bound the amount of state we must keep, and we adopt the
+ * suggested strategy quoted above to do so. We define a watermark PN below
+ * which all PNs are in the same state. This watermark is only ever increased.
+ * Thus the PNs the state for which needs to be explicitly tracked is limited to
+ * only a small number of recent PNs, and all older PNs have an assumed state.
+ *
+ * Any given PN thus falls into one of the following states:
+ *
+ * - (A) The PN is above the watermark but we have not yet received it.
+ *
+ * If we receive such a PN, we should process it and record the PN as
+ * received.
+ *
+ * - (B) The PN is above the watermark and we have received it.
+ *
+ * The PN should be included in any future ACK frame we generate.
+ * If we receive such a PN again, we should ignore it.
+ *
+ * - (C) The PN is below the watermark.
+ *
+ * We do not know whether a packet with the given PN was received or
+ * not. To be safe, if we receive such a packet, it is not processed.
+ *
+ * Note that state (C) corresponds to both "we have processed this PN and it has
+ * been provably ACKed" logical state and a subset of the PNs in the "we have
+ * never processed this PN before" logical state (namely all PNs which were lost
+ * and never received, but which are not recent enough to be above the
+ * watermark). The reason we can merge these states and avoid tracking states
+ * for the PNs in this state is because the provably ACKed and never-received
+ * states are functionally identical in terms of how we need to handle them: we
+ * don't need to do anything for PNs in either of these states, so we don't have
+ * to care about PNs in this state nor do we have to care about distinguishing
+ * the two states for a given PN.
+ *
+ * Note that under this scheme provably ACKed PNs are by definition always below
+ * the watermark; therefore, it follows that when a PN becomes provably ACKed,
+ * the watermark must be immediately increased to exceed it (otherwise we would
+ * keep reporting it in future ACK frames).
+ *
+ * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
+ * to advance the watermark:
+ *
+ * "When a packet containing an ACK frame is sent, the Largest Acknowledged
+ * field in that frame can be saved. When a packet containing an ACK frame is
+ * acknowledged, the receiver can stop acknowledging packets less than or
+ * equal to the Largest Acknowledged field in the sent ACK frame."
+ *
+ * This is where our scheme's false positives arise. When a packet containing an
+ * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
+ * acked, and the watermark is bumped accordingly. However, the Largest
+ * Acknowledged field does not imply that all lower PNs have been received,
+ * because there may be gaps expressed in the ranges of PNs expressed by that
+ * and previous ACK frames. Thus, some unreceived PNs may be moved below the
+ * watermark, and we may subsequently reject those PNs as possibly being
+ * duplicates even though we have not actually received those PNs. Since we bump
+ * the watermark when a PN becomes provably ACKed, it follows that an unreceived
+ * PN falls below the watermark (and thus becomes a false positive for the
+ * purposes of duplicate detection) when a higher-numbered PN becomes provably
+ * ACKed.
+ *
+ * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
+ * n) will no longer be processed. Although datagrams may be reordered in the
+ * network, a PN we receive can only become provably ACKed after our own
+ * subsequently generated ACK frame is sent in a future TX packet, and then we
+ * receive another RX PN acknowleding that TX packet. This means that a given RX
+ * PN can only become provably ACKed at least 1 RTT after it is received; it is
+ * unlikely that any reordered datagrams will still be "in the network" (and not
+ * lost) by this time. If this does occur for whatever reason and a late PN is
+ * received, the packet will be discarded unprocessed and the PN is simply
+ * handled as though lost (a "written off" PN).
+ *
+ * **Data structure.** Our state for the RX handling side of the ACK manager, as
+ * discussed above, mainly comprises:
+ *
+ * a) a logical set of PNs, and
+ * b) a monotonically increasing PN counter (the watermark).
+ *
+ * For (a), we define a data structure which stores a logical set of PNs, which
+ * we use to keep track of which PNs we have received but which have not yet
+ * been provably ACKed, and thus will later need to generate an ACK frame for.
+ *
+ * The correspondance with the logical states discussed above is as follows. A
+ * PN is in state (C) if it is below the watermark; otherwise it is in state (B)
+ * if it is in the logical set of PNs, and in state (A) otherwise.
+ *
+ * Note that PNs are only removed from the PN set (when they become provably
+ * ACKed or written off) by virtue of advancement of the watermark. Removing PNs
+ * from the PN set any other way would be ambiguous as it would be
+ * indistinguishable from a PN we have not yet received and risk us processing a
+ * duplicate packet. In other words, for a given PN:
+ *
+ * - State (A) can transition to state (B) or (C)
+ * - State (B) can transition to state (C) only
+ * - State (C) is the terminal state
+ *
+ * We can query the logical set data structure for PNs which have been received
+ * but which have not been provably ACKed when we want to generate ACK frames.
+ * Since ACK frames can be lost and/or we might not know that the peer has
+ * successfully received them, we might generate multiple ACK frames covering a
+ * given PN until that PN becomes provably ACKed and we finally remove it from
+ * our set (by bumping the watermark) as no longer being our concern.
+ *
+ * The data structure supports the following operations:
+ *
+ * Insert Range: Adds an inclusive range of packet numbers [start, end]
+ * to the set. Equivalent to Insert for each number
+ * in the range. (Used when we receive a new PN.)
+ *
+ * Remove Range: Removes an inclusive range of packet numbers [start, end]
+ * from the set. Not all of the range need already be in
+ * the set, but any part of the range in the set is removed.
+ * (Used when bumping the watermark.)
+ *
+ * Query: Is a PN in the data structure?
+ *
+ * The data structure can be iterated.
+ *
+ * For greater efficiency in tracking large numbers of contiguous PNs, we track
+ * PN ranges rather than individual PNs. The data structure manages a list of PN
+ * ranges [[start, end]...]. Internally this is implemented as a doubly linked
+ * sorted list of range structures, which are automatically split and merged as
+ * necessary.
+ *
+ * This data structure requires O(n) traversal of the list for insertion,
+ * removal and query when we are not adding/removing ranges which are near the
+ * beginning or end of the set of ranges. It is expected that the number of PN
+ * ranges needed at any given time will generally be small and that most
+ * operations will be close to the beginning or end of the range.
+ *
+ * Invariant: The data structure is always sorted in ascending order by PN.
+ *
+ * Invariant: No two adjacent ranges ever 'border' one another (have no
+ * numerical gap between them) as the data structure always ensures
+ * such ranges are merged.
+ *
+ * Invariant: No two ranges ever overlap.
+ *
+ * Invariant: No range [a, b] ever has a > b.
+ *
+ * Invariant: Since ranges are represented using inclusive bounds, no range
+ * item inside the data structure can represent a span of zero PNs.
+ *
+ * **Possible duplicates.** A PN is considered a possible duplicate when either:
+ *
+ * a) its PN is already in the PN set (i.e. has already been received), or
+ * b) its PN is below the watermark (i.e. was provably ACKed or written off).
+ *
+ * A packet with a given PN is considered 'processable' when that PN is not
+ * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
+ *
+ * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
+ * provably ACKed. This occurs when an ACK frame is received by the TX side of
+ * the ACK manager; thus, there is necessary interaction between the TX and RX
+ * sides of the ACK manager.
+ *
+ * This is implemented as follows. When a packet is queued as sent in the TX
+ * side of the ACK manager, it may optionally have a Largest Acked value set on
+ * it. The user of the ACK manager should do this if the packet being
+ * transmitted contains an ACK frame, by setting the field to the Largest Acked
+ * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
+ * When a TX packet is eventually acknowledged which has this field set, it is
+ * used to update the state of the RX side of the ACK manager by bumping the
+ * watermark accordingly.
+ */
+struct pn_set_item_st {
+ struct pn_set_item_st *prev, *next;
+ OSSL_QUIC_ACK_RANGE range;
+};
+
+struct pn_set_st {
+ struct pn_set_item_st *head, *tail;
+
+ /* Number of ranges (not PNs) in the set. */
+ size_t num_ranges;
+};
+
+static void pn_set_init(struct pn_set_st *s)
+{
+ s->head = s->tail = NULL;
+ s->num_ranges = 0;
+}
+
+static void pn_set_destroy(struct pn_set_st *s)
+{
+ struct pn_set_item_st *x, *xnext;
+
+ for (x = s->head; x != NULL; x = xnext) {
+ xnext = x->next;
+ OPENSSL_free(x);
+ }
+}
+
+/* Possible merge of x, x->prev */
+static void pn_set_merge_adjacent(struct pn_set_st *s, struct pn_set_item_st *x)
+{
+ struct pn_set_item_st *xprev = x->prev;
+
+ if (xprev == NULL)
+ return;
+
+ if (x->range.start - 1 != xprev->range.end)
+ return;
+
+ x->range.start = xprev->range.start;
+ x->prev = xprev->prev;
+ if (x->prev != NULL)
+ x->prev->next = x;
+
+ if (s->head == xprev)
+ s->head = x;
+
+ OPENSSL_free(xprev);
+ --s->num_ranges;
+}
+
+/* Returns 1 if there exists a PN x which falls within both ranges a and b. */
+static int pn_range_overlaps(const OSSL_QUIC_ACK_RANGE *a,
+ const OSSL_QUIC_ACK_RANGE *b)
+{
+ return ossl_quic_pn_min(a->end, b->end)
+ >= ossl_quic_pn_max(a->start, b->start);
+}
+
+/*
+ * Insert a range into a PN set. Returns 0 on allocation failure, in which case
+ * the PN set is in a valid but undefined state. Otherwise, returns 1. Ranges
+ * can overlap existing ranges without limitation. If a range is a subset of
+ * an existing range in the set, this is a no-op and returns 1.
+ */
+static int pn_set_insert(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
+{
+ struct pn_set_item_st *x, *z, *xnext, *f, *fnext;
+ QUIC_PN start = range->start, end = range->end;
+
+ if (!ossl_assert(start <= end))
+ return 0;
+
+ if (s->head == NULL) {
+ /* Nothing in the set yet, so just add this range. */
+ x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (x == NULL)
+ return 0;
+
+ x->range.start = start;
+ x->range.end = end;
+ s->head = s->tail = x;
+ ++s->num_ranges;
+ return 1;
+ }
+
+ if (start > s->tail->range.end) {
+ /*
+ * Range is after the latest range in the set, so append.
+ *
+ * Note: The case where the range is before the earliest range in the
+ * set is handled as a degenerate case of the final case below. See
+ * optimization note (*) below.
+ */
+ if (s->tail->range.end + 1 == start) {
+ s->tail->range.end = end;
+ return 1;
+ }
+
+ x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (x == NULL)
+ return 0;
+
+ x->range.start = start;
+ x->range.end = end;
+ x->prev = s->tail;
+ if (s->tail != NULL)
+ s->tail->next = x;
+ s->tail = x;
+ ++s->num_ranges;
+ return 1;
+ }
+
+ if (start <= s->head->range.start && end >= s->tail->range.end) {
+ /*
+ * New range dwarfs all ranges in our set.
+ *
+ * Free everything except the first range in the set, which we scavenge
+ * and reuse.
+ */
+ for (x = s->head->next; x != NULL; x = xnext) {
+ xnext = x->next;
+ OPENSSL_free(x);
+ }
+
+ s->head->range.start = start;
+ s->head->range.end = end;
+ s->head->next = s->head->prev = NULL;
+ s->tail = s->head;
+ s->num_ranges = 1;
+ return 1;
+ }
+
+ /*
+ * Walk backwards since we will most often be inserting at the end. As an
+ * optimization, test the head node first and skip iterating over the
+ * entire list if we are inserting at the start. The assumption is that
+ * insertion at the start and end of the space will be the most common
+ * operations. (*)
+ */
+ z = end < s->head->range.start ? s->head : s->tail;
+
+ for (; z != NULL; z = z->prev) {
+ /* An existing range dwarfs our new range (optimisation). */
+ if (z->range.start <= start && z->range.end >= end)
+ return 1;
+
+ if (pn_range_overlaps(&z->range, range)) {
+ /*
+ * Our new range overlaps an existing range, or possibly several
+ * existing ranges.
+ */
+ struct pn_set_item_st *ovend = z;
+ OSSL_QUIC_ACK_RANGE t;
+ size_t n = 0;
+
+ t.end = ossl_quic_pn_max(end, z->range.end);
+
+ /* Get earliest overlapping range. */
+ for (; z->prev != NULL && pn_range_overlaps(&z->prev->range, range);
+ z = z->prev);
+
+ t.start = ossl_quic_pn_min(start, z->range.start);
+
+ /* Replace sequence of nodes z..ovend with ovend only. */
+ ovend->range = t;
+ ovend->prev = z->prev;
+ if (z->prev != NULL)
+ z->prev->next = ovend;
+ if (s->head == z)
+ s->head = ovend;
+
+ /* Free now unused nodes. */
+ for (f = z; f != ovend; f = fnext, ++n) {
+ fnext = f->next;
+ OPENSSL_free(f);
+ }
+
+ s->num_ranges -= n;
+ break;
+ } else if (end < z->range.start
+ && (z->prev == NULL || start > z->prev->range.end)) {
+ if (z->range.start == end + 1) {
+ /* We can extend the following range backwards. */
+ z->range.start = start;
+
+ /*
+ * If this closes a gap we now need to merge
+ * consecutive nodes.
+ */
+ pn_set_merge_adjacent(s, z);
+ } else if (z->prev != NULL && z->prev->range.end + 1 == start) {
+ /* We can extend the preceding range forwards. */
+ z->prev->range.end = end;
+
+ /*
+ * If this closes a gap we now need to merge
+ * consecutive nodes.
+ */
+ pn_set_merge_adjacent(s, z);
+ } else {
+ /*
+ * The new interval is between intervals without overlapping or
+ * touching them, so insert between, preserving sort.
+ */
+ x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (x == NULL)
+ return 0;
+
+ x->range.start = start;
+ x->range.end = end;
+
+ x->next = z;
+ x->prev = z->prev;
+ if (x->prev != NULL)
+ x->prev->next = x;
+ z->prev = x;
+ if (s->head == z)
+ s->head = x;
+
+ ++s->num_ranges;
+ }
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Remove a range from the set. Returns 0 on allocation failure, in which case
+ * the PN set is unchanged. Otherwise, returns 1. Ranges which are not already
+ * in the set can be removed without issue. If a passed range is not in the PN
+ * set at all, this is a no-op and returns 1.
+ */
+static int pn_set_remove(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
+{
+ struct pn_set_item_st *z, *zprev, *y;
+ QUIC_PN start = range->start, end = range->end;
+
+ if (!ossl_assert(start <= end))
+ return 0;
+
+ /* Walk backwards since we will most often be removing at the end. */
+ for (z = s->tail; z != NULL; z = zprev) {
+ zprev = z->prev;
+
+ if (start > z->range.end)
+ /* No overlapping ranges can exist beyond this point, so stop. */
+ break;
+
+ if (start <= z->range.start && end >= z->range.end) {
+ /*
+ * The range being removed dwarfs this range, so it should be
+ * removed.
+ */
+ if (z->next != NULL)
+ z->next->prev = z->prev;
+ if (z->prev != NULL)
+ z->prev->next = z->next;
+ if (s->head == z)
+ s->head = z->next;
+ if (s->tail == z)
+ s->tail = z->prev;
+
+ OPENSSL_free(z);
+ --s->num_ranges;
+ } else if (start <= z->range.start) {
+ /*
+ * The range being removed includes start of this range, but does
+ * not cover the entire range (as this would be caught by the case
+ * above). Shorten the range.
+ */
+ assert(end < z->range.end);
+ z->range.start = end + 1;
+ } else if (end >= z->range.end) {
+ /*
+ * The range being removed includes the end of this range, but does
+ * not cover the entire range (as this would be caught by the case
+ * above). Shorten the range. We can also stop iterating.
+ */
+ assert(start > z->range.start);
+ assert(start > 0);
+ z->range.end = start - 1;
+ break;
+ } else if (start > z->range.start && end < z->range.end) {
+ /*
+ * The range being removed falls entirely in this range, so cut it
+ * into two. Cases where a zero-length range would be created are
+ * handled by the above cases.
+ */
+ y = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (y == NULL)
+ return 0;
+
+ y->range.end = z->range.end;
+ y->range.start = end + 1;
+ y->next = z->next;
+ y->prev = z;
+ if (y->next != NULL)
+ y->next->prev = y;
+
+ z->range.end = start - 1;
+ z->next = y;
+
+ if (s->tail == z)
+ s->tail = y;
+
+ ++s->num_ranges;
+ break;
+ } else {
+ /* Assert no partial overlap; all cases should be covered above. */
+ assert(!pn_range_overlaps(&z->range, range));
+ }
+ }
+
+ return 1;
+}
+
+/* Returns 1 iff the given PN is in the PN set. */
+static int pn_set_query(const struct pn_set_st *s, QUIC_PN pn)
+{
+ struct pn_set_item_st *x;
+
+ if (s->head == NULL)
+ return 0;
+
+ for (x = s->tail; x != NULL; x = x->prev)
+ if (x->range.start <= pn && x->range.end >= pn)
+ return 1;
+ else if (x->range.end < pn)
+ return 0;
+
+ return 0;
+}
+
+struct rx_pkt_history_st {
+ struct pn_set_st set;
+
+ /*
+ * Invariant: PNs below this are not in the set.
+ * Invariant: This is monotonic and only ever increases.
+ */
+ QUIC_PN watermark;
+};
+
+static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
+ QUIC_PN watermark);
+
+static void rx_pkt_history_init(struct rx_pkt_history_st *h)
+{
+ pn_set_init(&h->set);
+ h->watermark = 0;
+}
+
+static void rx_pkt_history_destroy(struct rx_pkt_history_st *h)
+{
+ pn_set_destroy(&h->set);
+}
+
+/*
+ * Limit the number of ACK ranges we store to prevent resource consumption DoS
+ * attacks.
+ */
+#define MAX_RX_ACK_RANGES 32
+
+static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h)
+{
+ QUIC_PN highest = QUIC_PN_INVALID;
+
+ while (h->set.num_ranges > MAX_RX_ACK_RANGES) {
+ OSSL_QUIC_ACK_RANGE r = h->set.head->range;
+
+ highest = (highest == QUIC_PN_INVALID)
+ ? r.end : ossl_quic_pn_max(highest, r.end);
+
+ pn_set_remove(&h->set, &r);
+ }
+
+ /*
+ * Bump watermark to cover all PNs we removed to avoid accidential
+ * reprocessing of packets.
+ */
+ if (highest != QUIC_PN_INVALID)
+ rx_pkt_history_bump_watermark(h, highest + 1);
+}
+
+static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h,
+ QUIC_PN pn)
+{
+ OSSL_QUIC_ACK_RANGE r;
+
+ r.start = pn;
+ r.end = pn;
+
+ if (pn < h->watermark)
+ return 1; /* consider this a success case */
+
+ if (pn_set_insert(&h->set, &r) != 1)
+ return 0;
+
+ rx_pkt_history_trim_range_count(h);
+ return 1;
+}
+
+static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
+ QUIC_PN watermark)
+{
+ OSSL_QUIC_ACK_RANGE r;
+
+ if (watermark <= h->watermark)
+ return 1;
+
+ /* Remove existing PNs below the watermark. */
+ r.start = 0;
+ r.end = watermark - 1;
+ if (pn_set_remove(&h->set, &r) != 1)
+ return 0;
+
+ h->watermark = watermark;
+ return 1;
+}
+
+/*
+ * ACK Manager Implementation
+ * **************************
+ * Implementation of the ACK manager proper.
+ */
+
+/* Constants used by the ACK manager; see RFC 9002. */
+#define K_GRANULARITY (1 * OSSL_TIME_MS)
+#define K_PKT_THRESHOLD 3
+#define K_TIME_THRESHOLD_NUM 9
+#define K_TIME_THRESHOLD_DEN 8
+
+/* The maximum number of times we allow PTO to be doubled. */
+#define MAX_PTO_COUNT 16
+
+struct ossl_ackm_st {
+ /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */
+ struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM];
+
+ /* Our list of received PNs which are not yet provably acked. */
+ struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM];
+
+ /* Polymorphic dependencies that we consume. */
+ OSSL_TIME (*now)(void *arg);
+ void *now_arg;
+ OSSL_STATM *statm;
+ const OSSL_CC_METHOD *cc_method;
+ OSSL_CC_DATA *cc_data;
+
+ /* RFC 9002 variables. */
+ uint32_t pto_count;
+ QUIC_PN largest_acked_pkt[QUIC_PN_SPACE_NUM];
+ OSSL_TIME time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM];
+ OSSL_TIME loss_time[QUIC_PN_SPACE_NUM];
+ OSSL_TIME loss_detection_deadline;
+
+ /* Lowest PN which is still not known to be ACKed. */
+ QUIC_PN lowest_unacked_pkt[QUIC_PN_SPACE_NUM];
+
+ /* Time at which we got our first RTT sample, or 0. */
+ OSSL_TIME first_rtt_sample;
+
+ /*
+ * A packet's num_bytes are added to this if it is inflight,
+ * and removed again once ack'd/lost/discarded.
+ */
+ uint64_t bytes_in_flight;
+
+ /*
+ * A packet's num_bytes are added to this if it is both inflight and
+ * ack-eliciting, and removed again once ack'd/lost/discarded.
+ */
+ uint64_t ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM];
+
+ /* Count of ECN-CE events. */
+ uint64_t peer_ecnce[QUIC_PN_SPACE_NUM];
+
+ /* Set to 1 when the handshake is confirmed. */
+ char handshake_confirmed;
+
+ /* Set to 1 when the peer has completed address validation. */
+ char peer_completed_addr_validation;
+
+ /* Set to 1 when a PN space has been discarded. */
+ char discarded[QUIC_PN_SPACE_NUM];
+
+ /* Set to 1 when we think an ACK frame should be generated. */
+ char rx_ack_desired[QUIC_PN_SPACE_NUM];
+
+ /* Set to 1 if an ACK frame has ever been generated. */
+ char rx_ack_generated[QUIC_PN_SPACE_NUM];
+
+ /* Probe request counts for reporting to the user. */
+ OSSL_ACKM_PROBE_INFO pending_probe;
+
+ /* Generated ACK frames for each PN space. */
+ OSSL_QUIC_FRAME_ACK ack[QUIC_PN_SPACE_NUM];
+ OSSL_QUIC_ACK_RANGE ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES];
+
+ /* Other RX state. */
+ /* Largest PN we have RX'd. */
+ QUIC_PN rx_largest_pn[QUIC_PN_SPACE_NUM];
+
+ /* Time at which the PN in rx_largest_pn was RX'd. */
+ OSSL_TIME rx_largest_time[QUIC_PN_SPACE_NUM];
+
+ /*
+ * ECN event counters. Each time we receive a packet with a given ECN label,
+ * the corresponding ECN counter here is incremented.
+ */
+ uint64_t rx_ect0[QUIC_PN_SPACE_NUM];
+ uint64_t rx_ect1[QUIC_PN_SPACE_NUM];
+ uint64_t rx_ecnce[QUIC_PN_SPACE_NUM];
+
+ /*
+ * Number of ACK-eliciting packets since last ACK. We use this to defer
+ * emitting ACK frames until a threshold number of ACK-eliciting packets
+ * have been received.
+ */
+ uint32_t rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM];
+
+ /*
+ * The ACK frame coalescing deadline at which we should flush any unsent ACK
+ * frames.
+ */
+ OSSL_TIME rx_ack_flush_deadline[QUIC_PN_SPACE_NUM];
+
+ /* Callbacks for deadline updates. */
+ void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg);
+ void *loss_detection_deadline_cb_arg;
+
+ void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg);
+ void *ack_deadline_cb_arg;
+};
+
+static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y)
+{
+ return x < y ? x : y