summaryrefslogtreecommitdiffstats
path: root/ssl
diff options
context:
space:
mode:
authorHugo Landau <hlandau@openssl.org>2022-06-28 13:52:48 +0100
committerHugo Landau <hlandau@openssl.org>2022-08-24 14:05:46 +0100
commitfa4e92a70a5f363fbbee192c0ecab697e3aa1248 (patch)
tree2452ee420bf5d69585de04252f16e8f51f9d025b /ssl
parent7af110f9f5fb9b039cc09b63768a0b989a7bf5ad (diff)
QUIC ACK Manager, Statistics Manager and Congestion Control API
This is the initial implementation of the ACK Manager for OpenSSL's QUIC support, with supporting design documentation and tests. Because the ACK Manager also depends on the Statistics Manager, it is also implemented here. The Statistics Manager is quite simple, so this does not amount to a large amount of extra code. Because the ACK Manager depends on a congestion controller, it adds a no-op congestion controller, which uses the previously workshopped congestion control API. Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Richard Levitte <levitte@openssl.org> (Merged from https://github.com/openssl/openssl/pull/18676)
Diffstat (limited to 'ssl')
-rw-r--r--ssl/quic/build.info2
-rw-r--r--ssl/quic/cc_dummy.c106
-rw-r--r--ssl/quic/quic_ackm.c2018
-rw-r--r--ssl/quic/quic_statm.c83
4 files changed, 2208 insertions, 1 deletions
diff --git a/ssl/quic/build.info b/ssl/quic/build.info
index 3e76055c62..9e411011e7 100644
--- a/ssl/quic/build.info
+++ b/ssl/quic/build.info
@@ -1,3 +1,3 @@
$LIBSSL=../../libssl
-SOURCE[$LIBSSL]=quic_method.c quic_impl.c quic_wire.c
+SOURCE[$LIBSSL]=quic_method.c quic_impl.c quic_wire.c quic_ackm.c quic_statm.c cc_dummy.c
diff --git a/ssl/quic/cc_dummy.c b/ssl/quic/cc_dummy.c
new file mode 100644
index 0000000000..a5f94d9498
--- /dev/null
+++ b/ssl/quic/cc_dummy.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/quic_cc.h"
+
+typedef struct ossl_cc_dummy_st {
+ char dummy;
+} OSSL_CC_DUMMY;
+
+static OSSL_CC_DATA *dummy_new(OSSL_PARAM *settings, OSSL_PARAM *options,
+ OSSL_PARAM *changeables)
+{
+ return OPENSSL_zalloc(sizeof(OSSL_CC_DUMMY));
+}
+
+static void dummy_free(OSSL_CC_DATA *cc)
+{
+ OPENSSL_free(cc);
+}
+
+static void dummy_reset(OSSL_CC_DATA *cc, int flags)
+{
+
+}
+
+static int dummy_set_exemption(OSSL_CC_DATA *cc, int numpackets)
+{
+ return 1;
+}
+
+static int dummy_get_exemption(OSSL_CC_DATA *cc)
+{
+ return 0;
+}
+
+static int dummy_can_send(OSSL_CC_DATA *cc)
+{
+ return 1;
+}
+
+static size_t dummy_get_send_allowance(OSSL_CC_DATA *cc,
+ OSSL_TIME time_since_last_send,
+ int time_valid)
+{
+ return SIZE_MAX;
+}
+
+static size_t dummy_get_bytes_in_flight_max(OSSL_CC_DATA *cc)
+{
+ return SIZE_MAX;
+}
+
+static int dummy_on_data_sent(OSSL_CC_DATA *cc, size_t num_retransmittable_bytes)
+{
+ return 1;
+}
+
+static int dummy_on_data_invalidated(OSSL_CC_DATA *cc,
+ size_t num_retransmittable_bytes)
+{
+ return 1;
+}
+
+static int dummy_on_data_acked(OSSL_CC_DATA *cc, OSSL_TIME time_now,
+ uint64_t last_pn_acked,
+ size_t num_retransmittable_bytes)
+{
+ return 1;
+}
+
+static void dummy_on_data_lost(OSSL_CC_DATA *cc,
+ uint64_t largest_pn_lost,
+ uint64_t largest_pn_sent,
+ size_t num_retransmittable_bytes,
+ int persistent_congestion)
+{
+
+}
+
+static int dummy_on_spurious_congestion_event(OSSL_CC_DATA *cc)
+{
+ return 1;
+}
+
+const OSSL_CC_METHOD ossl_cc_dummy_method = {
+ NULL,
+ dummy_new,
+ dummy_free,
+ dummy_reset,
+ dummy_set_exemption,
+ dummy_get_exemption,
+ dummy_can_send,
+ dummy_get_send_allowance,
+ dummy_get_bytes_in_flight_max,
+ dummy_on_data_sent,
+ dummy_on_data_invalidated,
+ dummy_on_data_acked,
+ dummy_on_data_lost,
+ dummy_on_spurious_congestion_event,
+};
diff --git a/ssl/quic/quic_ackm.c b/ssl/quic/quic_ackm.c
new file mode 100644
index 0000000000..670b2d316e
--- /dev/null
+++ b/ssl/quic/quic_ackm.c
@@ -0,0 +1,2018 @@
+/*
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#include "internal/quic_ackm.h"
+#include "internal/common.h"
+#include <assert.h>
+
+/*
+ * TX Packet History
+ * *****************
+ *
+ * The TX Packet History object tracks information about packets which have been
+ * sent for which we later expect to receive an ACK. It is essentially a simple
+ * database keeping a list of packet information structures in packet number
+ * order which can also be looked up directly by packet number.
+ *
+ * We currently only allow packets to be appended to the list (i.e. the packet
+ * numbers of the packets appended to the list must monotonically increase), as
+ * we should not currently need more general functionality such as a sorted list
+ * insert.
+ */
+struct tx_pkt_history_st {
+ /* A linked list of all our packets. */
+ OSSL_ACKM_TX_PKT *head, *tail;
+
+ /* Number of packets in the list. */
+ size_t num_packets;
+
+ /*
+ * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
+ *
+ * Invariant: A packet is in this map if and only if it is in the linked
+ * list.
+ */
+ LHASH_OF(OSSL_ACKM_TX_PKT) *map;
+
+ /*
+ * The lowest packet number which may currently be added to the history list
+ * (inclusive). We do not allow packet numbers to be added to the history
+ * list non-monotonically, so packet numbers must be greater than or equal
+ * to this value.
+ */
+ uint64_t watermark;
+
+ /*
+ * Packet number of the highest packet info structure we have yet appended
+ * to the list. This is usually one less than watermark, except when we have
+ * not added any packet yet.
+ */
+ uint64_t highest_sent;
+};
+
+DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT);
+
+static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt)
+{
+ return pkt->pkt_num;
+}
+
+static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a,
+ const OSSL_ACKM_TX_PKT *b)
+{
+ if (a->pkt_num < b->pkt_num)
+ return -1;
+ if (a->pkt_num > b->pkt_num)
+ return 1;
+ return 0;
+}
+
+static int
+tx_pkt_history_init(struct tx_pkt_history_st *h)
+{
+ h->head = h->tail = NULL;
+ h->num_packets = 0;
+ h->watermark = 0;
+ h->highest_sent = 0;
+
+ h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare);
+ if (h->map == NULL)
+ return 0;
+
+ return 1;
+}
+
+static void
+tx_pkt_history_destroy(struct tx_pkt_history_st *h)
+{
+ lh_OSSL_ACKM_TX_PKT_free(h->map);
+ h->map = NULL;
+ h->head = h->tail = NULL;
+}
+
+static int
+tx_pkt_history_add_actual(struct tx_pkt_history_st *h,
+ OSSL_ACKM_TX_PKT *pkt)
+{
+ OSSL_ACKM_TX_PKT *existing;
+
+ /*
+ * There should not be any existing packet with this number
+ * in our mapping.
+ */
+ existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt);
+ if (!ossl_assert(existing == NULL))
+ return 0;
+
+ /* Should not already be in a list. */
+ if (!ossl_assert(pkt->next == NULL && pkt->prev == NULL))
+ return 0;
+
+ lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt);
+
+ pkt->next = NULL;
+ pkt->prev = h->tail;
+ if (h->tail != NULL)
+ h->tail->next = pkt;
+ h->tail = pkt;
+ if (h->head == NULL)
+ h->head = h->tail;
+
+ ++h->num_packets;
+ return 1;
+}
+
+/* Adds a packet information structure to the history list. */
+static int
+tx_pkt_history_add(struct tx_pkt_history_st *h,
+ OSSL_ACKM_TX_PKT *pkt)
+{
+ if (!ossl_assert(pkt->pkt_num >= h->watermark))
+ return 0;
+
+ if (tx_pkt_history_add_actual(h, pkt) < 1)
+ return 0;
+
+ h->watermark = pkt->pkt_num + 1;
+ h->highest_sent = pkt->pkt_num;
+ return 1;
+}
+
+/* Retrieve a packet information structure by packet number. */
+static OSSL_ACKM_TX_PKT *
+tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num)
+{
+ OSSL_ACKM_TX_PKT key;
+
+ key.pkt_num = pkt_num;
+
+ return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key);
+}
+
+/* Remove a packet information structure from the history log. */
+static int
+tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num)
+{
+ OSSL_ACKM_TX_PKT key, *pkt;
+ key.pkt_num = pkt_num;
+
+ pkt = tx_pkt_history_by_pkt_num(h, pkt_num);
+ if (pkt == NULL)
+ return 0;
+
+ if (pkt->prev != NULL)
+ pkt->prev->next = pkt->next;
+ if (pkt->next != NULL)
+ pkt->next->prev = pkt->prev;
+ if (h->head == pkt)
+ h->head = pkt->next;
+ if (h->tail == pkt)
+ h->tail = pkt->prev;
+
+ pkt->prev = pkt->next = NULL;
+
+ lh_OSSL_ACKM_TX_PKT_delete(h->map, &key);
+ --h->num_packets;
+ return 1;
+}
+
+/*
+ * RX Packet Number Tracking
+ * *************************
+ *
+ * **Background.** The RX side of the ACK manager must track packets we have
+ * received for which we have to generate ACK frames. Broadly, this means we
+ * store a set of packet numbers which we have received but which we do not know
+ * for a fact that the transmitter knows we have received.
+ *
+ * This must handle various situations:
+ *
+ * 1. We receive a packet but have not sent an ACK yet, so the transmitter
+ * does not know whether we have received it or not yet.
+ *
+ * 2. We receive a packet and send an ACK which is lost. We do not
+ * immediately know that the ACK was lost and the transmitter does not know
+ * that we have received the packet.
+ *
+ * 3. We receive a packet and send an ACK which is received by the
+ * transmitter. The transmitter does not immediately respond with an ACK,
+ * or responds with an ACK which is lost. The transmitter knows that we
+ * have received the packet, but we do not know for sure that it knows,
+ * because the ACK we sent could have been lost.
+ *
+ * 4. We receive a packet and send an ACK which is received by the
+ * transmitter. The transmitter subsequently sends us an ACK which confirms
+ * its receipt of the ACK we sent, and we successfully receive that ACK, so
+ * we know that the transmitter knows, that we received the original
+ * packet.
+ *
+ * Only when we reach case (4) are we relieved of any need to track a given
+ * packet number we have received, because only in this case do we know for sure
+ * that the peer knows we have received the packet. Having reached case (4) we
+ * will never again need to generate an ACK containing the PN in question, but
+ * until we reach that point, we must keep track of the PN as not having been
+ * provably ACKed, as we may have to keep generating ACKs for the given PN not
+ * just until the transmitter receives one, but until we know that it has
+ * received one. This will be referred to herein as "provably ACKed".
+ *
+ * **Duplicate handling.** The above discusses the case where we have received a
+ * packet with a given PN but are at best unsure whether the sender knows we
+ * have received it or not. However, we must also handle the case where we have
+ * yet to receive a packet with a given PN in the first place. The reason for
+ * this is because of the requirement expressed by RFC 9000 s. 12.3:
+ *
+ * "A receiver MUST discard a newly unprotected packet unless it is certain
+ * that it has not processed another packet with the same packet number from
+ * the same packet number space."
+ *
+ * We must ensure we never process a duplicate PN. As such, each possible PN we
+ * can receive must exist in one of the following logical states:
+ *
+ * - We have never processed this PN before
+ * (so if we receive such a PN, it can be processed)
+ *
+ * - We have processed this PN but it has not yet been provably ACKed
+ * (and should therefore be in any future ACK frame generated;
+ * if we receive such a PN again, it must be ignored)
+ *
+ * - We have processed this PN and it has been provably ACKed
+ * (if we receive such a PN again, it must be ignored)
+ *
+ * However, if we were to track this state for every PN ever used in the history
+ * of a connection, the amount of state required would increase unboundedly as
+ * the connection goes on (for example, we would have to store a set of every PN
+ * ever received.)
+ *
+ * RFC 9000 s. 12.3 continues:
+ *
+ * "Endpoints that track all individual packets for the purposes of detecting
+ * duplicates are at risk of accumulating excessive state. The data required
+ * for detecting duplicates can be limited by maintaining a minimum packet
+ * number below which all packets are immediately dropped."
+ *
+ * Moreover, RFC 9000 s. 13.2.3 states that:
+ *
+ * "A receiver MUST retain an ACK Range unless it can ensure that it will not
+ * subsequently accept packets with numbers in that range. Maintaining a
+ * minimum packet number that increases as ranges are discarded is one way to
+ * achieve this with minimal state."
+ *
+ * This touches on a subtlety of the original requirement quoted above: the
+ * receiver MUST discard a packet unless it is certain that it has not processed
+ * another packet with the same PN. However, this does not forbid the receiver
+ * from also discarding some PNs even though it has not yet processed them. In
+ * other words, implementations must be conservative and err in the direction of
+ * assuming a packet is a duplicate, but it is acceptable for this to come at
+ * the cost of falsely identifying some packets as duplicates.
+ *
+ * This allows us to bound the amount of state we must keep, and we adopt the
+ * suggested strategy quoted above to do so. We define a watermark PN below
+ * which all PNs are in the same state. This watermark is only ever increased.
+ * Thus the PNs the state for which needs to be explicitly tracked is limited to
+ * only a small number of recent PNs, and all older PNs have an assumed state.
+ *
+ * Any given PN thus falls into one of the following states:
+ *
+ * - (A) The PN is above the watermark but we have not yet received it.
+ *
+ * If we receive such a PN, we should process it and record the PN as
+ * received.
+ *
+ * - (B) The PN is above the watermark and we have received it.
+ *
+ * The PN should be included in any future ACK frame we generate.
+ * If we receive such a PN again, we should ignore it.
+ *
+ * - (C) The PN is below the watermark.
+ *
+ * We do not know whether a packet with the given PN was received or
+ * not. To be safe, if we receive such a packet, it is not processed.
+ *
+ * Note that state (C) corresponds to both "we have processed this PN and it has
+ * been provably ACKed" logical state and a subset of the PNs in the "we have
+ * never processed this PN before" logical state (namely all PNs which were lost
+ * and never received, but which are not recent enough to be above the
+ * watermark). The reason we can merge these states and avoid tracking states
+ * for the PNs in this state is because the provably ACKed and never-received
+ * states are functionally identical in terms of how we need to handle them: we
+ * don't need to do anything for PNs in either of these states, so we don't have
+ * to care about PNs in this state nor do we have to care about distinguishing
+ * the two states for a given PN.
+ *
+ * Note that under this scheme provably ACKed PNs are by definition always below
+ * the watermark; therefore, it follows that when a PN becomes provably ACKed,
+ * the watermark must be immediately increased to exceed it (otherwise we would
+ * keep reporting it in future ACK frames).
+ *
+ * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
+ * to advance the watermark:
+ *
+ * "When a packet containing an ACK frame is sent, the Largest Acknowledged
+ * field in that frame can be saved. When a packet containing an ACK frame is
+ * acknowledged, the receiver can stop acknowledging packets less than or
+ * equal to the Largest Acknowledged field in the sent ACK frame."
+ *
+ * This is where our scheme's false positives arise. When a packet containing an
+ * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
+ * acked, and the watermark is bumped accordingly. However, the Largest
+ * Acknowledged field does not imply that all lower PNs have been received,
+ * because there may be gaps expressed in the ranges of PNs expressed by that
+ * and previous ACK frames. Thus, some unreceived PNs may be moved below the
+ * watermark, and we may subsequently reject those PNs as possibly being
+ * duplicates even though we have not actually received those PNs. Since we bump
+ * the watermark when a PN becomes provably ACKed, it follows that an unreceived
+ * PN falls below the watermark (and thus becomes a false positive for the
+ * purposes of duplicate detection) when a higher-numbered PN becomes provably
+ * ACKed.
+ *
+ * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
+ * n) will no longer be processed. Although datagrams may be reordered in the
+ * network, a PN we receive can only become provably ACKed after our own
+ * subsequently generated ACK frame is sent in a future TX packet, and then we
+ * receive another RX PN acknowleding that TX packet. This means that a given RX
+ * PN can only become provably ACKed at least 1 RTT after it is received; it is
+ * unlikely that any reordered datagrams will still be "in the network" (and not
+ * lost) by this time. If this does occur for whatever reason and a late PN is
+ * received, the packet will be discarded unprocessed and the PN is simply
+ * handled as though lost (a "written off" PN).
+ *
+ * **Data structure.** Our state for the RX handling side of the ACK manager, as
+ * discussed above, mainly comprises:
+ *
+ * a) a logical set of PNs, and
+ * b) a monotonically increasing PN counter (the watermark).
+ *
+ * For (a), we define a data structure which stores a logical set of PNs, which
+ * we use to keep track of which PNs we have received but which have not yet
+ * been provably ACKed, and thus will later need to generate an ACK frame for.
+ *
+ * The correspondance with the logical states discussed above is as follows. A
+ * PN is in state (C) if it is below the watermark; otherwise it is in state (B)
+ * if it is in the logical set of PNs, and in state (A) otherwise.
+ *
+ * Note that PNs are only removed from the PN set (when they become provably
+ * ACKed or written off) by virtue of advancement of the watermark. Removing PNs
+ * from the PN set any other way would be ambiguous as it would be
+ * indistinguishable from a PN we have not yet received and risk us processing a
+ * duplicate packet. In other words, for a given PN:
+ *
+ * - State (A) can transition to state (B) or (C)
+ * - State (B) can transition to state (C) only
+ * - State (C) is the terminal state
+ *
+ * We can query the logical set data structure for PNs which have been received
+ * but which have not been provably ACKed when we want to generate ACK frames.
+ * Since ACK frames can be lost and/or we might not know that the peer has
+ * successfully received them, we might generate multiple ACK frames covering a
+ * given PN until that PN becomes provably ACKed and we finally remove it from
+ * our set (by bumping the watermark) as no longer being our concern.
+ *
+ * The data structure supports the following operations:
+ *
+ * Insert Range: Adds an inclusive range of packet numbers [start, end]
+ * to the set. Equivalent to Insert for each number
+ * in the range. (Used when we receive a new PN.)
+ *
+ * Remove Range: Removes an inclusive range of packet numbers [start, end]
+ * from the set. Not all of the range need already be in
+ * the set, but any part of the range in the set is removed.
+ * (Used when bumping the watermark.)
+ *
+ * Query: Is a PN in the data structure?
+ *
+ * The data structure can be iterated.
+ *
+ * For greater efficiency in tracking large numbers of contiguous PNs, we track
+ * PN ranges rather than individual PNs. The data structure manages a list of PN
+ * ranges [[start, end]...]. Internally this is implemented as a doubly linked
+ * sorted list of range structures, which are automatically split and merged as
+ * necessary.
+ *
+ * This data structure requires O(n) traversal of the list for insertion,
+ * removal and query when we are not adding/removing ranges which are near the
+ * beginning or end of the set of ranges. It is expected that the number of PN
+ * ranges needed at any given time will generally be small and that most
+ * operations will be close to the beginning or end of the range.
+ *
+ * Invariant: The data structure is always sorted in ascending order by PN.
+ *
+ * Invariant: No two adjacent ranges ever 'border' one another (have no
+ * numerical gap between them) as the data structure always ensures
+ * such ranges are merged.
+ *
+ * Invariant: No two ranges ever overlap.
+ *
+ * Invariant: No range [a, b] ever has a > b.
+ *
+ * Invariant: Since ranges are represented using inclusive bounds, no range
+ * item inside the data structure can represent a span of zero PNs.
+ *
+ * **Possible duplicates.** A PN is considered a possible duplicate when either:
+ *
+ * a) its PN is already in the PN set (i.e. has already been received), or
+ * b) its PN is below the watermark (i.e. was provably ACKed or written off).
+ *
+ * A packet with a given PN is considered 'processable' when that PN is not
+ * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
+ *
+ * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
+ * provably ACKed. This occurs when an ACK frame is received by the TX side of
+ * the ACK manager; thus, there is necessary interaction between the TX and RX
+ * sides of the ACK manager.
+ *
+ * This is implemented as follows. When a packet is queued as sent in the TX
+ * side of the ACK manager, it may optionally have a Largest Acked value set on
+ * it. The user of the ACK manager should do this if the packet being
+ * transmitted contains an ACK frame, by setting the field to the Largest Acked
+ * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
+ * When a TX packet is eventually acknowledged which has this field set, it is
+ * used to update the state of the RX side of the ACK manager by bumping the
+ * watermark accordingly.
+ */
+struct pn_set_item_st {
+ struct pn_set_item_st *prev, *next;
+ OSSL_QUIC_ACK_RANGE range;
+};
+
+struct pn_set_st {
+ struct pn_set_item_st *head, *tail;
+
+ /* Number of ranges (not PNs) in the set. */
+ size_t num_ranges;
+};
+
+static void pn_set_init(struct pn_set_st *s)
+{
+ s->head = s->tail = NULL;
+ s->num_ranges = 0;
+}
+
+static void pn_set_destroy(struct pn_set_st *s)
+{
+ struct pn_set_item_st *x, *xnext;
+
+ for (x = s->head; x != NULL; x = xnext) {
+ xnext = x->next;
+ OPENSSL_free(x);
+ }
+}
+
+/* Possible merge of x, x->prev */
+static void pn_set_merge_adjacent(struct pn_set_st *s, struct pn_set_item_st *x)
+{
+ struct pn_set_item_st *xprev = x->prev;
+
+ if (xprev == NULL)
+ return;
+
+ if (x->range.start - 1 != xprev->range.end)
+ return;
+
+ x->range.start = xprev->range.start;
+ x->prev = xprev->prev;
+ if (x->prev != NULL)
+ x->prev->next = x;
+
+ if (s->head == xprev)
+ s->head = x;
+
+ OPENSSL_free(xprev);
+ --s->num_ranges;
+}
+
+/* Returns 1 if there exists a PN x which falls within both ranges a and b. */
+static int pn_range_overlaps(const OSSL_QUIC_ACK_RANGE *a,
+ const OSSL_QUIC_ACK_RANGE *b)
+{
+ return ossl_quic_pn_min(a->end, b->end)
+ >= ossl_quic_pn_max(a->start, b->start);
+}
+
+/*
+ * Insert a range into a PN set. Returns 0 on allocation failure, in which case
+ * the PN set is in a valid but undefined state. Otherwise, returns 1. Ranges
+ * can overlap existing ranges without limitation. If a range is a subset of
+ * an existing range in the set, this is a no-op and returns 1.
+ */
+static int pn_set_insert(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
+{
+ struct pn_set_item_st *x, *z, *xnext, *f, *fnext;
+ QUIC_PN start = range->start, end = range->end;
+
+ if (!ossl_assert(start <= end))
+ return 0;
+
+ if (s->head == NULL) {
+ /* Nothing in the set yet, so just add this range. */
+ x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (x == NULL)
+ return 0;
+
+ x->range.start = start;
+ x->range.end = end;
+ s->head = s->tail = x;
+ ++s->num_ranges;
+ return 1;
+ }
+
+ if (start > s->tail->range.end) {
+ /*
+ * Range is after the latest range in the set, so append.
+ *
+ * Note: The case where the range is before the earliest range in the
+ * set is handled as a degenerate case of the final case below. See
+ * optimization note (*) below.
+ */
+ if (s->tail->range.end + 1 == start) {
+ s->tail->range.end = end;
+ return 1;
+ }
+
+ x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (x == NULL)
+ return 0;
+
+ x->range.start = start;
+ x->range.end = end;
+ x->prev = s->tail;
+ if (s->tail != NULL)
+ s->tail->next = x;
+ s->tail = x;
+ ++s->num_ranges;
+ return 1;
+ }
+
+ if (start <= s->head->range.start && end >= s->tail->range.end) {
+ /*
+ * New range dwarfs all ranges in our set.
+ *
+ * Free everything except the first range in the set, which we scavenge
+ * and reuse.
+ */
+ for (x = s->head->next; x != NULL; x = xnext) {
+ xnext = x->next;
+ OPENSSL_free(x);
+ }
+
+ s->head->range.start = start;
+ s->head->range.end = end;
+ s->head->next = s->head->prev = NULL;
+ s->tail = s->head;
+ s->num_ranges = 1;
+ return 1;
+ }
+
+ /*
+ * Walk backwards since we will most often be inserting at the end. As an
+ * optimization, test the head node first and skip iterating over the
+ * entire list if we are inserting at the start. The assumption is that
+ * insertion at the start and end of the space will be the most common
+ * operations. (*)
+ */
+ z = end < s->head->range.start ? s->head : s->tail;
+
+ for (; z != NULL; z = z->prev) {
+ /* An existing range dwarfs our new range (optimisation). */
+ if (z->range.start <= start && z->range.end >= end)
+ return 1;
+
+ if (pn_range_overlaps(&z->range, range)) {
+ /*
+ * Our new range overlaps an existing range, or possibly several
+ * existing ranges.
+ */
+ struct pn_set_item_st *ovend = z;
+ OSSL_QUIC_ACK_RANGE t;
+ size_t n = 0;
+
+ t.end = ossl_quic_pn_max(end, z->range.end);
+
+ /* Get earliest overlapping range. */
+ for (; z->prev != NULL && pn_range_overlaps(&z->prev->range, range);
+ z = z->prev);
+
+ t.start = ossl_quic_pn_min(start, z->range.start);
+
+ /* Replace sequence of nodes z..ovend with ovend only. */
+ ovend->range = t;
+ ovend->prev = z->prev;
+ if (z->prev != NULL)
+ z->prev->next = ovend;
+ if (s->head == z)
+ s->head = ovend;
+
+ /* Free now unused nodes. */
+ for (f = z; f != ovend; f = fnext, ++n) {
+ fnext = f->next;
+ OPENSSL_free(f);
+ }
+
+ s->num_ranges -= n;
+ break;
+ } else if (end < z->range.start
+ && (z->prev == NULL || start > z->prev->range.end)) {
+ if (z->range.start == end + 1) {
+ /* We can extend the following range backwards. */
+ z->range.start = start;
+
+ /*
+ * If this closes a gap we now need to merge
+ * consecutive nodes.
+ */
+ pn_set_merge_adjacent(s, z);
+ } else if (z->prev != NULL && z->prev->range.end + 1 == start) {
+ /* We can extend the preceding range forwards. */
+ z->prev->range.end = end;
+
+ /*
+ * If this closes a gap we now need to merge
+ * consecutive nodes.
+ */
+ pn_set_merge_adjacent(s, z);
+ } else {
+ /*
+ * The new interval is between intervals without overlapping or
+ * touching them, so insert between, preserving sort.
+ */
+ x = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (x == NULL)
+ return 0;
+
+ x->range.start = start;
+ x->range.end = end;
+
+ x->next = z;
+ x->prev = z->prev;
+ if (x->prev != NULL)
+ x->prev->next = x;
+ z->prev = x;
+ if (s->head == z)
+ s->head = x;
+
+ ++s->num_ranges;
+ }
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Remove a range from the set. Returns 0 on allocation failure, in which case
+ * the PN set is unchanged. Otherwise, returns 1. Ranges which are not already
+ * in the set can be removed without issue. If a passed range is not in the PN
+ * set at all, this is a no-op and returns 1.
+ */
+static int pn_set_remove(struct pn_set_st *s, const OSSL_QUIC_ACK_RANGE *range)
+{
+ struct pn_set_item_st *z, *zprev, *y;
+ QUIC_PN start = range->start, end = range->end;
+
+ if (!ossl_assert(start <= end))
+ return 0;
+
+ /* Walk backwards since we will most often be removing at the end. */
+ for (z = s->tail; z != NULL; z = zprev) {
+ zprev = z->prev;
+
+ if (start > z->range.end)
+ /* No overlapping ranges can exist beyond this point, so stop. */
+ break;
+
+ if (start <= z->range.start && end >= z->range.end) {
+ /*
+ * The range being removed dwarfs this range, so it should be
+ * removed.
+ */
+ if (z->next != NULL)
+ z->next->prev = z->prev;
+ if (z->prev != NULL)
+ z->prev->next = z->next;
+ if (s->head == z)
+ s->head = z->next;
+ if (s->tail == z)
+ s->tail = z->prev;
+
+ OPENSSL_free(z);
+ --s->num_ranges;
+ } else if (start <= z->range.start) {
+ /*
+ * The range being removed includes start of this range, but does
+ * not cover the entire range (as this would be caught by the case
+ * above). Shorten the range.
+ */
+ assert(end < z->range.end);
+ z->range.start = end + 1;
+ } else if (end >= z->range.end) {
+ /*
+ * The range being removed includes the end of this range, but does
+ * not cover the entire range (as this would be caught by the case
+ * above). Shorten the range. We can also stop iterating.
+ */
+ assert(start > z->range.start);
+ assert(start > 0);
+ z->range.end = start - 1;
+ break;
+ } else if (start > z->range.start && end < z->range.end) {
+ /*
+ * The range being removed falls entirely in this range, so cut it
+ * into two. Cases where a zero-length range would be created are
+ * handled by the above cases.
+ */
+ y = OPENSSL_zalloc(sizeof(struct pn_set_item_st));
+ if (y == NULL)
+ return 0;
+
+ y->range.end = z->range.end;
+ y->range.start = end + 1;
+ y->next = z->next;
+ y->prev = z;
+ if (y->next != NULL)
+ y->next->prev = y;
+
+ z->range.end = start - 1;
+ z->next = y;
+
+ if (s->tail == z)
+ s->tail = y;
+
+ ++s->num_ranges;
+ break;
+ } else {
+ /* Assert no partial overlap; all cases should be covered above. */
+ assert(!pn_range_overlaps(&z->range, range));
+ }
+ }
+
+ return 1;
+}
+
+/* Returns 1 iff the given PN is in the PN set. */
+static int pn_set_query(const struct pn_set_st *s, QUIC_PN pn)
+{
+ struct pn_set_item_st *x;
+
+ if (s->head == NULL)
+ return 0;
+
+ for (x = s->tail; x != NULL; x = x->prev)
+ if (x->range.start <= pn && x->range.end >= pn)
+ return 1;
+ else if (x->range.end < pn)
+ return 0;
+
+ return 0;
+}
+
+struct rx_pkt_history_st {
+ struct pn_set_st set;
+
+ /*
+ * Invariant: PNs below this are not in the set.
+ * Invariant: This is monotonic and only ever increases.
+ */
+ QUIC_PN watermark;
+};
+
+static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
+ QUIC_PN watermark);
+
+static void rx_pkt_history_init(struct rx_pkt_history_st *h)
+{
+ pn_set_init(&h->set);
+ h->watermark = 0;
+}
+
+static void rx_pkt_history_destroy(struct rx_pkt_history_st *h)
+{
+ pn_set_destroy(&h->set);
+}
+
+/*
+ * Limit the number of ACK ranges we store to prevent resource consumption DoS
+ * attacks.
+ */
+#define MAX_RX_ACK_RANGES 32
+
+static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h)
+{
+ QUIC_PN highest = QUIC_PN_INVALID;
+
+ while (h->set.num_ranges > MAX_RX_ACK_RANGES) {
+ OSSL_QUIC_ACK_RANGE r = h->set.hea