summaryrefslogtreecommitdiffstats
path: root/kernel/printk/printk_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/printk/printk_ringbuffer.c')
-rw-r--r--kernel/printk/printk_ringbuffer.c2083
1 files changed, 2083 insertions, 0 deletions
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
new file mode 100644
index 000000000000..2493348a1631
--- /dev/null
+++ b/kernel/printk/printk_ringbuffer.c
@@ -0,0 +1,2083 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include "printk_ringbuffer.h"
+
+/**
+ * DOC: printk_ringbuffer overview
+ *
+ * Data Structure
+ * --------------
+ * The printk_ringbuffer is made up of 3 internal ringbuffers:
+ *
+ * desc_ring
+ * A ring of descriptors and their meta data (such as sequence number,
+ * timestamp, loglevel, etc.) as well as internal state information about
+ * the record and logical positions specifying where in the other
+ * ringbuffer the text strings are located.
+ *
+ * text_data_ring
+ * A ring of data blocks. A data block consists of an unsigned long
+ * integer (ID) that maps to a desc_ring index followed by the text
+ * string of the record.
+ *
+ * The internal state information of a descriptor is the key element to allow
+ * readers and writers to locklessly synchronize access to the data.
+ *
+ * Implementation
+ * --------------
+ *
+ * Descriptor Ring
+ * ~~~~~~~~~~~~~~~
+ * The descriptor ring is an array of descriptors. A descriptor contains
+ * essential meta data to track the data of a printk record using
+ * blk_lpos structs pointing to associated text data blocks (see
+ * "Data Rings" below). Each descriptor is assigned an ID that maps
+ * directly to index values of the descriptor array and has a state. The ID
+ * and the state are bitwise combined into a single descriptor field named
+ * @state_var, allowing ID and state to be synchronously and atomically
+ * updated.
+ *
+ * Descriptors have four states:
+ *
+ * reserved
+ * A writer is modifying the record.
+ *
+ * committed
+ * The record and all its data are written. A writer can reopen the
+ * descriptor (transitioning it back to reserved), but in the committed
+ * state the data is consistent.
+ *
+ * finalized
+ * The record and all its data are complete and available for reading. A
+ * writer cannot reopen the descriptor.
+ *
+ * reusable
+ * The record exists, but its text and/or meta data may no longer be
+ * available.
+ *
+ * Querying the @state_var of a record requires providing the ID of the
+ * descriptor to query. This can yield a possible fifth (pseudo) state:
+ *
+ * miss
+ * The descriptor being queried has an unexpected ID.
+ *
+ * The descriptor ring has a @tail_id that contains the ID of the oldest
+ * descriptor and @head_id that contains the ID of the newest descriptor.
+ *
+ * When a new descriptor should be created (and the ring is full), the tail
+ * descriptor is invalidated by first transitioning to the reusable state and
+ * then invalidating all tail data blocks up to and including the data blocks
+ * associated with the tail descriptor (for the text ring). Then
+ * @tail_id is advanced, followed by advancing @head_id. And finally the
+ * @state_var of the new descriptor is initialized to the new ID and reserved
+ * state.
+ *
+ * The @tail_id can only be advanced if the new @tail_id would be in the
+ * committed or reusable queried state. This makes it possible that a valid
+ * sequence number of the tail is always available.
+ *
+ * Descriptor Finalization
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ * When a writer calls the commit function prb_commit(), record data is
+ * fully stored and is consistent within the ringbuffer. However, a writer can
+ * reopen that record, claiming exclusive access (as with prb_reserve()), and
+ * modify that record. When finished, the writer must again commit the record.
+ *
+ * In order for a record to be made available to readers (and also become
+ * recyclable for writers), it must be finalized. A finalized record cannot be
+ * reopened and can never become "unfinalized". Record finalization can occur
+ * in three different scenarios:
+ *
+ * 1) A writer can simultaneously commit and finalize its record by calling
+ * prb_final_commit() instead of prb_commit().
+ *
+ * 2) When a new record is reserved and the previous record has been
+ * committed via prb_commit(), that previous record is automatically
+ * finalized.
+ *
+ * 3) When a record is committed via prb_commit() and a newer record
+ * already exists, the record being committed is automatically finalized.
+ *
+ * Data Ring
+ * ~~~~~~~~~
+ * The text data ring is a byte array composed of data blocks. Data blocks are
+ * referenced by blk_lpos structs that point to the logical position of the
+ * beginning of a data block and the beginning of the next adjacent data
+ * block. Logical positions are mapped directly to index values of the byte
+ * array ringbuffer.
+ *
+ * Each data block consists of an ID followed by the writer data. The ID is
+ * the identifier of a descriptor that is associated with the data block. A
+ * given data block is considered valid if all of the following conditions
+ * are met:
+ *
+ * 1) The descriptor associated with the data block is in the committed
+ * or finalized queried state.
+ *
+ * 2) The blk_lpos struct within the descriptor associated with the data
+ * block references back to the same data block.
+ *
+ * 3) The data block is within the head/tail logical position range.
+ *
+ * If the writer data of a data block would extend beyond the end of the
+ * byte array, only the ID of the data block is stored at the logical
+ * position and the full data block (ID and writer data) is stored at the
+ * beginning of the byte array. The referencing blk_lpos will point to the
+ * ID before the wrap and the next data block will be at the logical
+ * position adjacent the full data block after the wrap.
+ *
+ * Data rings have a @tail_lpos that points to the beginning of the oldest
+ * data block and a @head_lpos that points to the logical position of the
+ * next (not yet existing) data block.
+ *
+ * When a new data block should be created (and the ring is full), tail data
+ * blocks will first be invalidated by putting their associated descriptors
+ * into the reusable state and then pushing the @tail_lpos forward beyond
+ * them. Then the @head_lpos is pushed forward and is associated with a new
+ * descriptor. If a data block is not valid, the @tail_lpos cannot be
+ * advanced beyond it.
+ *
+ * Info Array
+ * ~~~~~~~~~~
+ * The general meta data of printk records are stored in printk_info structs,
+ * stored in an array with the same number of elements as the descriptor ring.
+ * Each info corresponds to the descriptor of the same index in the
+ * descriptor ring. Info validity is confirmed by evaluating the corresponding
+ * descriptor before and after loading the info.
+ *
+ * Usage
+ * -----
+ * Here are some simple examples demonstrating writers and readers. For the
+ * examples a global ringbuffer (test_rb) is available (which is not the
+ * actual ringbuffer used by printk)::
+ *
+ * DEFINE_PRINTKRB(test_rb, 15, 5);
+ *
+ * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of
+ * 1 MiB (2 ^ (15 + 5)) for text data.
+ *
+ * Sample writer code::
+ *
+ * const char *textstr = "message text";
+ * struct prb_reserved_entry e;
+ * struct printk_record r;
+ *
+ * // specify how much to allocate
+ * prb_rec_init_wr(&r, strlen(textstr) + 1);
+ *
+ * if (prb_reserve(&e, &test_rb, &r)) {
+ * snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
+ *
+ * r.info->text_len = strlen(textstr);
+ * r.info->ts_nsec = local_clock();
+ * r.info->caller_id = printk_caller_id();
+ *
+ * // commit and finalize the record
+ * prb_final_commit(&e);
+ * }
+ *
+ * Note that additional writer functions are available to extend a record
+ * after it has been committed but not yet finalized. This can be done as
+ * long as no new records have been reserved and the caller is the same.
+ *
+ * Sample writer code (record extending)::
+ *
+ * // alternate rest of previous example
+ *
+ * r.info->text_len = strlen(textstr);
+ * r.info->ts_nsec = local_clock();
+ * r.info->caller_id = printk_caller_id();
+ *
+ * // commit the record (but do not finalize yet)
+ * prb_commit(&e);
+ * }
+ *
+ * ...
+ *
+ * // specify additional 5 bytes text space to extend
+ * prb_rec_init_wr(&r, 5);
+ *
+ * // try to extend, but only if it does not exceed 32 bytes
+ * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) {
+ * snprintf(&r.text_buf[r.info->text_len],
+ * r.text_buf_size - r.info->text_len, "hello");
+ *
+ * r.info->text_len += 5;
+ *
+ * // commit and finalize the record
+ * prb_final_commit(&e);
+ * }
+ *
+ * Sample reader code::
+ *
+ * struct printk_info info;
+ * struct printk_record r;
+ * char text_buf[32];
+ * u64 seq;
+ *
+ * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf));
+ *
+ * prb_for_each_record(0, &test_rb, &seq, &r) {
+ * if (info.seq != seq)
+ * pr_warn("lost %llu records\n", info.seq - seq);
+ *
+ * if (info.text_len > r.text_buf_size) {
+ * pr_warn("record %llu text truncated\n", info.seq);
+ * text_buf[r.text_buf_size - 1] = 0;
+ * }
+ *
+ * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec,
+ * &text_buf[0]);
+ * }
+ *
+ * Note that additional less convenient reader functions are available to
+ * allow complex record access.
+ *
+ * ABA Issues
+ * ~~~~~~~~~~
+ * To help avoid ABA issues, descriptors are referenced by IDs (array index
+ * values combined with tagged bits counting array wraps) and data blocks are
+ * referenced by logical positions (array index values combined with tagged
+ * bits counting array wraps). However, on 32-bit systems the number of
+ * tagged bits is relatively small such that an ABA incident is (at least
+ * theoretically) possible. For example, if 4 million maximally sized (1KiB)
+ * printk messages were to occur in NMI context on a 32-bit system, the
+ * interrupted context would not be able to recognize that the 32-bit integer
+ * completely wrapped and thus represents a different data block than the one
+ * the interrupted context expects.
+ *
+ * To help combat this possibility, additional state checking is performed
+ * (such as using cmpxchg() even though set() would suffice). These extra
+ * checks are commented as such and will hopefully catch any ABA issue that
+ * a 32-bit system might experience.
+ *
+ * Memory Barriers
+ * ~~~~~~~~~~~~~~~
+ * Multiple memory barriers are used. To simplify proving correctness and
+ * generating litmus tests, lines of code related to memory barriers
+ * (loads, stores, and the associated memory barriers) are labeled::
+ *
+ * LMM(function:letter)
+ *
+ * Comments reference the labels using only the "function:letter" part.
+ *
+ * The memory barrier pairs and their ordering are:
+ *
+ * desc_reserve:D / desc_reserve:B
+ * push descriptor tail (id), then push descriptor head (id)
+ *
+ * desc_reserve:D / data_push_tail:B
+ * push data tail (lpos), then set new descriptor reserved (state)
+ *
+ * desc_reserve:D / desc_push_tail:C
+ * push descriptor tail (id), then set new descriptor reserved (state)
+ *
+ * desc_reserve:D / prb_first_seq:C
+ * push descriptor tail (id), then set new descriptor reserved (state)
+ *
+ * desc_reserve:F / desc_read:D
+ * set new descriptor id and reserved (state), then allow writer changes
+ *
+ * data_alloc:A (or data_realloc:A) / desc_read:D
+ * set old descriptor reusable (state), then modify new data block area
+ *
+ * data_alloc:A (or data_realloc:A) / data_push_tail:B
+ * push data tail (lpos), then modify new data block area
+ *
+ * _prb_commit:B / desc_read:B
+ * store writer changes, then set new descriptor committed (state)
+ *
+ * desc_reopen_last:A / _prb_commit:B
+ * set descriptor reserved (state), then read descriptor data
+ *
+ * _prb_commit:B / desc_reserve:D
+ * set new descriptor committed (state), then check descriptor head (id)
+ *
+ * data_push_tail:D / data_push_tail:A
+ * set descriptor reusable (state), then push data tail (lpos)
+ *
+ * desc_push_tail:B / desc_reserve:D
+ * set descriptor reusable (state), then push descriptor tail (id)
+ */
+
+#define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits)
+#define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1)
+
+#define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits)
+#define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1)
+
+/* Determine the data array index from a logical position. */
+#define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring))
+
+/* Determine the desc array index from an ID or sequence number. */
+#define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring))
+
+/* Determine how many times the data array has wrapped. */
+#define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits)
+
+/* Determine if a logical position refers to a data-less block. */
+#define LPOS_DATALESS(lpos) ((lpos) & 1UL)
+#define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \
+ LPOS_DATALESS((blk)->next))
+
+/* Get the logical position at index 0 of the current wrap. */
+#define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
+((lpos) & ~DATA_SIZE_MASK(data_ring))
+
+/* Get the ID for the same index of the previous wrap as the given ID. */
+#define DESC_ID_PREV_WRAP(desc_ring, id) \
+DESC_ID((id) - DESCS_COUNT(desc_ring))
+
+/*
+ * A data block: mapped directly to the beginning of the data block area
+ * specified as a logical position within the data ring.
+ *
+ * @id: the ID of the associated descriptor
+ * @data: the writer data
+ *
+ * Note that the size of a data block is only known by its associated
+ * descriptor.
+ */
+struct prb_data_block {
+ unsigned long id;
+ char data[0];
+};
+
+/*
+ * Return the descriptor associated with @n. @n can be either a
+ * descriptor ID or a sequence number.
+ */
+static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n)
+{
+ return &desc_ring->descs[DESC_INDEX(desc_ring, n)];
+}
+
+/*
+ * Return the printk_info associated with @n. @n can be either a
+ * descriptor ID or a sequence number.
+ */
+static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n)
+{
+ return &desc_ring->infos[DESC_INDEX(desc_ring, n)];
+}
+
+static struct prb_data_block *to_block(struct prb_data_ring *data_ring,
+ unsigned long begin_lpos)
+{
+ return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)];
+}
+
+/*
+ * Increase the data size to account for data block meta data plus any
+ * padding so that the adjacent data block is aligned on the ID size.
+ */
+static unsigned int to_blk_size(unsigned int size)
+{
+ struct prb_data_block *db = NULL;
+
+ size += sizeof(*db);
+ size = ALIGN(size, sizeof(db->id));
+ return size;
+}
+
+/*
+ * Sanity checker for reserve size. The ringbuffer code assumes that a data
+ * block does not exceed the maximum possible size that could fit within the
+ * ringbuffer. This function provides that basic size check so that the
+ * assumption is safe.
+ */
+static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
+{
+ struct prb_data_block *db = NULL;
+
+ if (size == 0)
+ return true;
+
+ /*
+ * Ensure the alignment padded size could possibly fit in the data
+ * array. The largest possible data block must still leave room for
+ * at least the ID of the next block.
+ */
+ size = to_blk_size(size);
+ if (size > DATA_SIZE(data_ring) - sizeof(db->id))
+ return false;
+
+ return true;
+}
+
+/* Query the state of a descriptor. */
+static enum desc_state get_desc_state(unsigned long id,
+ unsigned long state_val)
+{
+ if (id != DESC_ID(state_val))
+ return desc_miss;
+
+ return DESC_STATE(state_val);
+}
+
+/*
+ * Get a copy of a specified descriptor and return its queried state. If the
+ * descriptor is in an inconsistent state (miss or reserved), the caller can
+ * only expect the descriptor's @state_var field to be valid.
+ *
+ * The sequence number and caller_id can be optionally retrieved. Like all
+ * non-state_var data, they are only valid if the descriptor is in a
+ * consistent state.
+ */
+static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
+ unsigned long id, struct prb_desc *desc_out,
+ u64 *seq_out, u32 *caller_id_out)
+{
+ struct printk_info *info = to_info(desc_ring, id);
+ struct prb_desc *desc = to_desc(desc_ring, id);
+ atomic_long_t *state_var = &desc->state_var;
+ enum desc_state d_state;
+ unsigned long state_val;
+
+ /* Check the descriptor state. */
+ state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */
+ d_state = get_desc_state(id, state_val);
+ if (d_state == desc_miss || d_state == desc_reserved) {
+ /*
+ * The descriptor is in an inconsistent state. Set at least
+ * @state_var so that the caller can see the details of
+ * the inconsistent state.
+ */
+ goto out;
+ }
+
+ /*
+ * Guarantee the state is loaded before copying the descriptor
+ * content. This avoids copying obsolete descriptor content that might
+ * not apply to the descriptor state. This pairs with _prb_commit:B.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_read:A reads from _prb_commit:B, then desc_read:C reads
+ * from _prb_commit:A.
+ *
+ * Relies on:
+ *
+ * WMB from _prb_commit:A to _prb_commit:B
+ * matching
+ * RMB from desc_read:A to desc_read:C
+ */
+ smp_rmb(); /* LMM(desc_read:B) */
+
+ /*
+ * Copy the descriptor data. The data is not valid until the
+ * state has been re-checked. A memcpy() for all of @desc
+ * cannot be used because of the atomic_t @state_var field.
+ */
+ memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
+ sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
+ if (seq_out)
+ *seq_out = info->seq; /* also part of desc_read:C */
+ if (caller_id_out)
+ *caller_id_out = info->caller_id; /* also part of desc_read:C */
+
+ /*
+ * 1. Guarantee the descriptor content is loaded before re-checking
+ * the state. This avoids reading an obsolete descriptor state
+ * that may not apply to the copied content. This pairs with
+ * desc_reserve:F.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_read:C reads from desc_reserve:G, then desc_read:E
+ * reads from desc_reserve:F.
+ *
+ * Relies on:
+ *
+ * WMB from desc_reserve:F to desc_reserve:G
+ * matching
+ * RMB from desc_read:C to desc_read:E
+ *
+ * 2. Guarantee the record data is loaded before re-checking the
+ * state. This avoids reading an obsolete descriptor state that may
+ * not apply to the copied data. This pairs with data_alloc:A and
+ * data_realloc:A.
+ *
+ * Memory barrier involvement:
+ *
+ * If copy_data:A reads from data_alloc:B, then desc_read:E
+ * reads from desc_make_reusable:A.
+ *
+ * Relies on:
+ *
+ * MB from desc_make_reusable:A to data_alloc:B
+ * matching
+ * RMB from desc_read:C to desc_read:E
+ *
+ * Note: desc_make_reusable:A and data_alloc:B can be different
+ * CPUs. However, the data_alloc:B CPU (which performs the
+ * full memory barrier) must have previously seen
+ * desc_make_reusable:A.
+ */
+ smp_rmb(); /* LMM(desc_read:D) */
+
+ /*
+ * The data has been copied. Return the current descriptor state,
+ * which may have changed since the load above.
+ */
+ state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
+ d_state = get_desc_state(id, state_val);
+out:
+ atomic_long_set(&desc_out->state_var, state_val);
+ return d_state;
+}
+
+/*
+ * Take a specified descriptor out of the finalized state by attempting
+ * the transition from finalized to reusable. Either this context or some
+ * other context will have been successful.
+ */
+static void desc_make_reusable(struct prb_desc_ring *desc_ring,
+ unsigned long id)
+{
+ unsigned long val_finalized = DESC_SV(id, desc_finalized);
+ unsigned long val_reusable = DESC_SV(id, desc_reusable);
+ struct prb_desc *desc = to_desc(desc_ring, id);
+ atomic_long_t *state_var = &desc->state_var;
+
+ atomic_long_cmpxchg_relaxed(state_var, val_finalized,
+ val_reusable); /* LMM(desc_make_reusable:A) */
+}
+
+/*
+ * Given the text data ring, put the associated descriptor of each
+ * data block from @lpos_begin until @lpos_end into the reusable state.
+ *
+ * If there is any problem making the associated descriptor reusable, either
+ * the descriptor has not yet been finalized or another writer context has
+ * already pushed the tail lpos past the problematic data block. Regardless,
+ * on error the caller can re-load the tail lpos to determine the situation.
+ */
+static bool data_make_reusable(struct printk_ringbuffer *rb,
+ struct prb_data_ring *data_ring,
+ unsigned long lpos_begin,
+ unsigned long lpos_end,
+ unsigned long *lpos_out)
+{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ struct prb_data_block *blk;
+ enum desc_state d_state;
+ struct prb_desc desc;
+ struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos;
+ unsigned long id;
+
+ /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
+ while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
+ blk = to_block(data_ring, lpos_begin);
+
+ /*
+ * Load the block ID from the data block. This is a data race
+ * against a writer that may have newly reserved this data
+ * area. If the loaded value matches a valid descriptor ID,
+ * the blk_lpos of that descriptor will be checked to make
+ * sure it points back to this data block. If the check fails,
+ * the data area has been recycled by another writer.
+ */
+ id = blk->id; /* LMM(data_make_reusable:A) */
+
+ d_state = desc_read(desc_ring, id, &desc,
+ NULL, NULL); /* LMM(data_make_reusable:B) */
+
+ switch (d_state) {
+ case desc_miss:
+ case desc_reserved:
+ case desc_committed:
+ return false;
+ case desc_finalized:
+ /*
+ * This data block is invalid if the descriptor
+ * does not point back to it.
+ */
+ if (blk_lpos->begin != lpos_begin)
+ return false;
+ desc_make_reusable(desc_ring, id);
+ break;
+ case desc_reusable:
+ /*
+ * This data block is invalid if the descriptor
+ * does not point back to it.
+ */
+ if (blk_lpos->begin != lpos_begin)
+ return false;
+ break;
+ }
+
+ /* Advance @lpos_begin to the next data block. */
+ lpos_begin = blk_lpos->next;
+ }
+
+ *lpos_out = lpos_begin;
+ return true;
+}
+
+/*
+ * Advance the data ring tail to at least @lpos. This function puts
+ * descriptors into the reusable state if the tail is pushed beyond
+ * their associated data block.
+ */
+static bool data_push_tail(struct printk_ringbuffer *rb,
+ struct prb_data_ring *data_ring,
+ unsigned long lpos)
+{
+ unsigned long tail_lpos_new;
+ unsigned long tail_lpos;
+ unsigned long next_lpos;
+
+ /* If @lpos is from a data-less block, there is nothing to do. */
+ if (LPOS_DATALESS(lpos))
+ return true;
+
+ /*
+ * Any descriptor states that have transitioned to reusable due to the
+ * data tail being pushed to this loaded value will be visible to this
+ * CPU. This pairs with data_push_tail:D.
+ *
+ * Memory barrier involvement:
+ *
+ * If data_push_tail:A reads from data_push_tail:D, then this CPU can
+ * see desc_make_reusable:A.
+ *
+ * Relies on:
+ *
+ * MB from desc_make_reusable:A to data_push_tail:D
+ * matches
+ * READFROM from data_push_tail:D to data_push_tail:A
+ * thus
+ * READFROM from desc_make_reusable:A to this CPU
+ */
+ tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */
+
+ /*
+ * Loop until the tail lpos is at or beyond @lpos. This condition
+ * may already be satisfied, resulting in no full memory barrier
+ * from data_push_tail:D being performed. However, since this CPU
+ * sees the new tail lpos, any descriptor states that transitioned to
+ * the reusable state must already be visible.
+ */
+ while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
+ /*
+ * Make all descriptors reusable that are associated with
+ * data blocks before @lpos.
+ */
+ if (!data_make_reusable(rb, data_ring, tail_lpos, lpos,
+ &next_lpos)) {
+ /*
+ * 1. Guarantee the block ID loaded in
+ * data_make_reusable() is performed before
+ * reloading the tail lpos. The failed
+ * data_make_reusable() may be due to a newly
+ * recycled data area causing the tail lpos to
+ * have been previously pushed. This pairs with
+ * data_alloc:A and data_realloc:A.
+ *
+ * Memory barrier involvement:
+ *
+ * If data_make_reusable:A reads from data_alloc:B,
+ * then data_push_tail:C reads from
+ * data_push_tail:D.
+ *
+ * Relies on:
+ *
+ * MB from data_push_tail:D to data_alloc:B
+ * matching
+ * RMB from data_make_reusable:A to
+ * data_push_tail:C
+ *
+ * Note: data_push_tail:D and data_alloc:B can be
+ * different CPUs. However, the data_alloc:B
+ * CPU (which performs the full memory
+ * barrier) must have previously seen
+ * data_push_tail:D.
+ *
+ * 2. Guarantee the descriptor state loaded in
+ * data_make_reusable() is performed before
+ * reloading the tail lpos. The failed
+ * data_make_reusable() may be due to a newly
+ * recycled descriptor causing the tail lpos to
+ * have been previously pushed. This pairs with
+ * desc_reserve:D.
+ *
+ * Memory barrier involvement:
+ *
+ * If data_make_reusable:B reads from
+ * desc_reserve:F, then data_push_tail:C reads
+ * from data_push_tail:D.
+ *
+ * Relies on:
+ *
+ * MB from data_push_tail:D to desc_reserve:F
+ * matching
+ * RMB from data_make_reusable:B to
+ * data_push_tail:C
+ *
+ * Note: data_push_tail:D and desc_reserve:F can
+ * be different CPUs. However, the
+ * desc_reserve:F CPU (which performs the
+ * full memory barrier) must have previously
+ * seen data_push_tail:D.
+ */
+ smp_rmb(); /* LMM(data_push_tail:B) */
+
+ tail_lpos_new = atomic_long_read(&data_ring->tail_lpos
+ ); /* LMM(data_push_tail:C) */
+ if (tail_lpos_new == tail_lpos)
+ return false;
+
+ /* Another CPU pushed the tail. Try again. */
+ tail_lpos = tail_lpos_new;
+ continue;
+ }
+
+ /*
+ * Guarantee any descriptor states that have transitioned to
+ * reusable are stored before pushing the tail lpos. A full
+ * memory barrier is needed since other CPUs may have made
+ * the descriptor states reusable. This pairs with
+ * data_push_tail:A.
+ */
+ if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos,
+ next_lpos)) { /* LMM(data_push_tail:D) */
+ break;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Advance the desc ring tail. This function advances the tail by one
+ * descriptor, thus invalidating the oldest descriptor. Before advancing
+ * the tail, the tail descriptor is made reusable and all data blocks up to
+ * and including the descriptor's data block are invalidated (i.e. the data
+ * ring tail is pushed past the data block of the descriptor being made
+ * reusable).
+ */
+static bool desc_push_tail(struct printk_ringbuffer *rb,
+ unsigned long tail_id)
+{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ enum desc_state d_state;
+ struct prb_desc desc;
+
+ d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL);
+
+ switch (d_state) {
+ case desc_miss:
+ /*
+ * If the ID is exactly 1 wrap behind the expected, it is
+ * in the process of being reserved by another writer and
+ * must be considered reserved.
+ */
+ if (DESC_ID(atomic_long_read(&desc.state_var)) ==
+ DESC_ID_PREV_WRAP(desc_ring, tail_id)) {
+ return false;
+ }
+
+ /*
+ * The ID has changed. Another writer must have pushed the
+ * tail and recycled the descriptor already. Success is
+ * returned because the caller is only interested in the
+ * specified tail being pushed, which it was.
+ */
+ return true;
+ case desc_reserved:
+ case desc_committed:
+ return false;
+ case desc_finalized:
+ desc_make_reusable(desc_ring, tail_id);
+ break;
+ case desc_reusable:
+ break;
+ }
+
+ /*
+ * Data blocks must be invalidated before their associated
+ * descriptor can be made available for recycling. Invalidating
+ * them later is not possible because there is no way to trust
+ * data blocks once their associated descriptor is gone.
+ */
+
+ if (!data_push_tail(rb, &rb->text_data_ring, desc.text_blk_lpos.next))
+ return false;
+
+ /*
+ * Check the next descriptor after @tail_id before pushing the tail
+ * to it because the tail must always be in a finalized or reusable
+ * state. The implementation of prb_first_seq() relies on this.
+ *
+ * A successful read implies that the next descriptor is less than or
+ * equal to @head_id so there is no risk of pushing the tail past the
+ * head.
+ */
+ d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc,
+ NULL, NULL); /* LMM(desc_push_tail:A) */
+
+ if (d_state == desc_finalized || d_state == desc_reusable) {
+ /*
+ * Guarantee any descriptor states that have transitioned to
+ * reusable are stored before pushing the tail ID. This allows
+ * verifying the recycled descriptor state. A full memory
+ * barrier is needed since other CPUs may have made the
+ * descriptor states reusable. This pairs with desc_reserve:D.
+ */
+ atomic_long_cmpxchg(&desc_ring->tail_id, tail_id,
+ DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */
+ } else {
+ /*
+ * Guarantee the last state load from desc_read() is before
+ * reloading @tail_id in order to see a new tail ID in the
+ * case that the descriptor has been recycled. This pairs
+ * with desc_reserve:D.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_push_tail:A reads from desc_reserve:F, then
+ * desc_push_tail:D reads from desc_push_tail:B.
+ *
+ * Relies on:
+ *
+ * MB from desc_push_tail:B to desc_reserve:F
+ * matching
+ * RMB from desc_push_tail:A to desc_push_tail:D
+ *
+ * Note: desc_push_tail:B and desc_reserve:F can be different
+ * CPUs. However, the desc_reserve:F CPU (which performs
+ * the full memory barrier) must have previously seen
+ * desc_push_tail:B.
+ */
+ smp_rmb(); /* LMM(desc_push_tail:C) */
+
+ /*
+ * Re-check the tail ID. The descriptor following @tail_id is
+ * not in an allowed tail state. But if the tail has since
+ * been moved by another CPU, then it does not matter.
+ */
+ if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */
+ return false;
+ }
+
+ return true;
+}
+
+/* Reserve a new descriptor, invalidating the oldest if necessary. */
+static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
+{
+ struct prb_desc_ring *desc_ring = &rb->desc_ring;
+ unsigned long prev_state_val;
+ unsigned long id_prev_wrap;
+ struct prb_desc *desc;
+ unsigned long head_id;
+ unsigned long id;
+
+ head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */
+
+ do {
+ desc = to_desc(desc_ring, head_id);
+
+ id = DESC_ID(head_id + 1);
+ id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);
+
+ /*
+ * Guarantee the head ID is read before reading the tail ID.
+ * Since the tail ID is updated before the head ID, this
+ * guarantees that @id_prev_wrap is never ahead of the tail
+ * ID. This pairs with desc_reserve:D.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_reserve:A reads from desc_reserve:D, then
+ * desc_reserve:C reads from desc_push_tail:B.
+ *
+ * Relies on:
+ *
+ * MB from desc_push_tail:B to desc_reserve:D
+ * matching
+ * RMB from desc_reserve:A to desc_reserve:C
+ *
+ * Note: desc_push_tail:B and desc_reserve:D can be different
+ * CPUs. However, the desc_reserve:D CPU (which performs
+ * the full memory barrier) must have previously seen
+ * desc_push_tail:B.
+ */
+ smp_rmb(); /* LMM(desc_reserve:B) */
+
+ if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id
+ )) { /* LMM(desc_reserve:C) */
+ /*
+ * Make space for the new descriptor by
+ * advancing the tail.
+ */
+ if (!desc_push_tail(rb, id_prev_wrap))
+ return false;
+ }
+
+ /*
+ * 1. Guarantee the tail ID is read before validating the
+ * recycled descriptor state. A read memory barrier is
+ * sufficient for this. This pairs with desc_push_tail:B.
+ *
+ * Memory barrier involvement:
+ *
+ * If desc_reserve:C reads from desc_push_tail:B, then
+ * desc_reserve:E reads from desc_make_reusable:A.
+ *
+ * Relies on:
+ *
+ * MB from desc_make_reusable:A to desc_push_tail:B
+ * matching
+ * RMB from desc_reserve:C to desc_reserve:E
+ *
+ * Note: desc_make_reusable:A and desc_push_tail:B can be
+ * different CPUs. However, the desc_push_tail:B CPU
+ * (which performs the full memory barrier) must have
+ * previously seen desc_make_reusable:A.
+ *
+ * 2. Guarantee the tail ID is stored before storing the head
+ * ID. This pairs with desc_reserve:B.
+ *
+ * 3. Guarantee any data ring tail changes are stored before
+ * recycling the descriptor. Data ring tail changes can
+ * happen via desc_push_tail()->data_push_tail(). A full
+ * memory barrier is needed since another CPU may have
+ * pushed the data ring tails. This pairs with
+ * data_push_tail:B.
+ *
+ * 4. Guarantee a new tail ID is stored before recycling the
+ * descriptor. A full memory barrier is needed since
+ * another CPU may have pushed the tail ID. This pairs
+ * with desc_push_tail:C and this also pairs with
+ * prb_first_seq:C.
+ *
+ * 5. Guarantee the head ID is stored before trying to
+ * finalize the previous descriptor. This pairs with
+ * _prb_commit:B.
+ */
+ } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id,
+ id)); /* LMM(desc_reserve:D) */
+
+ desc = to_desc(desc_ring, id);
+
+ /*
+ * If the descriptor has been recycled, verify the old state val.
+ * See "ABA Issues" about why this verification is performed.
+ */
+ prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */
+ if (prev_state_val &&
+ get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /*
+ * Assign the descriptor a new ID and set its state to reserved.
+ * See "ABA Issues" about why cmpxchg() instead of set() is used.
+ *
+ * Guarantee the new descriptor ID and state is stored before making
+ * any other changes. A write memory barrier is sufficient for this.
+ * This pairs with desc_read:D.
+ */
+ if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val,
+ DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /* Now data in @desc can be modified: LMM(desc_reserve:G) */
+
+ *id_out = id;
+ return true;
+}
+
+/* Determine the end of a data block. */
+static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
+ unsigned long lpos, unsigned int size)
+{
+ unsigned long begin_lpos;
+ unsigned long next_lpos;
+
+ begin_lpos = lpos;
+ next_lpos = lpos + size;
+
+ /* First check if the data block does not wrap. */
+ if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
+ return next_lpos;
+
+ /* Wrapping data blocks store their data at the beginning. */
+ return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size);
+}
+
+/*
+ * Allocate a new data block, invalidating the oldest data block(s)
+ * if necessary. This function also associates the data block with
+ * a specified descriptor.
+ */
+static char *data_alloc(struct printk_ringbuffer *rb,
+ struct prb_data_ring *data_ring, unsigned int size,
+ struct prb_data_blk_lpos *blk_lpos, unsigned long id)
+{
+ struct prb_data_block *blk;
+ unsigned long begin_lpos;
+ unsigned long next_lpos;
+
+ if (size == 0) {
+ /* Specify a data-less block. */
+ blk_lpos->begin = NO_LPOS;
+ blk_lpos->next = NO_LPOS;
+ return NULL;
+ }
+
+ size = to_blk_size(size);
+
+ begin_lpos = atomic_long_read(&data_ring->head_lpos);
+
+ do {
+ next_lpos = get_next_lpos(data_ring, begin_lpos, size);<