summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJani Nikula <jani.nikula@intel.com>2018-11-20 13:14:08 +0200
committerJani Nikula <jani.nikula@intel.com>2018-11-20 13:14:08 +0200
commit2ac5e38ea4203852d6e99edd3cf11f044b0a409f (patch)
tree1ef02da98d56309368ad2b6a4e492bafe5bb4faf /lib
parentf48cc647f3e196a3179d695d3c2d56c13e9dec98 (diff)
parent9235dd441af43599b9cdcce599a3da4083fcad3c (diff)
Merge drm/drm-next into drm-intel-next-queued
Pull in v4.20-rc3 via drm-next. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Kconfig.debug20
-rw-r--r--lib/Kconfig.kasan9
-rw-r--r--lib/Makefile9
-rw-r--r--lib/bch.c17
-rw-r--r--lib/bitmap.c22
-rw-r--r--lib/chacha20.c6
-rw-r--r--lib/cpumask.c4
-rw-r--r--lib/crc-t10dif.c57
-rw-r--r--lib/crc32.c11
-rw-r--r--lib/debug_locks.c6
-rw-r--r--lib/idr.c401
-rw-r--r--lib/iov_iter.c125
-rw-r--r--lib/kstrtox.c16
-rw-r--r--lib/lz4/lz4_decompress.c481
-rw-r--r--lib/lz4/lz4defs.h9
-rw-r--r--lib/memcat_p.c34
-rw-r--r--lib/nlattr.c269
-rw-r--r--lib/parser.c16
-rw-r--r--lib/percpu-refcount.c28
-rw-r--r--lib/radix-tree.c834
-rw-r--r--lib/raid6/test/Makefile4
-rw-r--r--lib/sg_pool.c7
-rw-r--r--lib/string.c1
-rw-r--r--lib/test_bpf.c1
-rw-r--r--lib/test_ida.c4
-rw-r--r--lib/test_kasan.c70
-rw-r--r--lib/test_memcat_p.c115
-rw-r--r--lib/test_xarray.c1238
-rw-r--r--lib/ubsan.c3
-rw-r--r--lib/vsprintf.c245
-rw-r--r--lib/xarray.c2036
-rw-r--r--lib/xz/xz_crc32.c1
-rw-r--r--lib/xz/xz_private.h4
-rw-r--r--lib/zlib_inflate/inflate.c12
35 files changed, 4755 insertions, 1365 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index a3928d4438b5..a9965f4af4dd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -399,8 +399,11 @@ config INTERVAL_TREE
for more information.
-config RADIX_TREE_MULTIORDER
+config XARRAY_MULTI
bool
+ help
+ Support entries which occupy multiple consecutive indices in the
+ XArray.
config ASSOCIATIVE_ARRAY
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4966c4fbe7f7..1af29b8224fd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1179,7 +1179,7 @@ config LOCKDEP
bool
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
+ select FRAME_POINTER if !MIPS && !PPC && !ARM && !S390 && !MICROBLAZE && !ARC && !X86
select KALLSYMS
select KALLSYMS_ALL
@@ -1292,7 +1292,7 @@ config DEBUG_KOBJECT
depends on DEBUG_KERNEL
help
If you say Y here, some extra kobject debugging messages will be sent
- to the syslog.
+ to the syslog.
config DEBUG_KOBJECT_RELEASE
bool "kobject release debugging"
@@ -1590,7 +1590,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
depends on !X86_64
select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
help
Provide stacktrace filter for fault-injection capabilities
@@ -1599,7 +1599,7 @@ config LATENCYTOP
depends on DEBUG_KERNEL
depends on STACKTRACE_SUPPORT
depends on PROC_FS
- select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
select KALLSYMS
select KALLSYMS_ALL
select STACKTRACE
@@ -1813,6 +1813,9 @@ config TEST_BITFIELD
config TEST_UUID
tristate "Test functions located in the uuid module at runtime"
+config TEST_XARRAY
+ tristate "Test the XArray code at runtime"
+
config TEST_OVERFLOW
tristate "Test check_*_overflow() functions at runtime"
@@ -1965,11 +1968,18 @@ config TEST_DEBUG_VIRTUAL
If unsure, say N.
+config TEST_MEMCAT_P
+ tristate "Test memcat_p() helper function"
+ help
+ Test the memcat_p() helper for correctly merging two
+ pointer arrays together.
+
+ If unsure, say N.
+
endif # RUNTIME_TESTING_MENU
config MEMTEST
bool "Memtest"
- depends on HAVE_MEMBLOCK
---help---
This option adds a kernel parameter 'memtest', which allows memtest
to be set.
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index befb127507c0..d0bad1bd9a2b 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -57,6 +57,15 @@ config KASAN_INLINE
endchoice
+config KASAN_S390_4_LEVEL_PAGING
+ bool "KASan: use 4-level paging"
+ depends on KASAN && S390
+ help
+ Compiling the kernel with KASan disables automatic 3-level vs
+ 4-level paging selection. 3-level paging is used by default (up
+ to 3TB of RAM with KASan enabled). This options allows to force
+ 4-level paging instead.
+
config TEST_KASAN
tristate "Module for testing kasan for bug detection"
depends on m && KASAN
diff --git a/lib/Makefile b/lib/Makefile
index ca3f7ebb900d..db06d1237898 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,13 +18,13 @@ KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
- rbtree.o radix-tree.o timerqueue.o\
+ rbtree.o radix-tree.o timerqueue.o xarray.o \
idr.o int_sqrt.o extable.o \
sha1.o chacha20.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
- nmi_backtrace.o nodemask.o win_minmax.o
+ nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
lib-$(CONFIG_PRINTK) += dump_stack.o
lib-$(CONFIG_MMU) += ioremap.o
@@ -53,7 +53,9 @@ obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
obj-$(CONFIG_TEST_IDA) += test_ida.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
CFLAGS_test_kasan.o += -fno-builtin
+CFLAGS_test_kasan.o += $(call cc-disable-warning, vla)
obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
+CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
UBSAN_SANITIZE_test_ubsan.o := y
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
@@ -68,9 +70,11 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
obj-$(CONFIG_TEST_KMOD) += test_kmod.o
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
+obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
@@ -119,7 +123,6 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
obj-$(CONFIG_BCH) += bch.o
-CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500)
obj-$(CONFIG_LZO_COMPRESS) += lzo/
obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
obj-$(CONFIG_LZ4_COMPRESS) += lz4/
diff --git a/lib/bch.c b/lib/bch.c
index 7b0f2006698b..5db6d3a4c8a6 100644
--- a/lib/bch.c
+++ b/lib/bch.c
@@ -79,20 +79,19 @@
#define GF_T(_p) (CONFIG_BCH_CONST_T)
#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
#define BCH_MAX_M (CONFIG_BCH_CONST_M)
+#define BCH_MAX_T (CONFIG_BCH_CONST_T)
#else
#define GF_M(_p) ((_p)->m)
#define GF_T(_p) ((_p)->t)
#define GF_N(_p) ((_p)->n)
-#define BCH_MAX_M 15
+#define BCH_MAX_M 15 /* 2KB */
+#define BCH_MAX_T 64 /* 64 bit correction */
#endif
-#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M)
-
#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
-#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8)
#ifndef dbg
#define dbg(_fmt, args...) do {} while (0)
@@ -202,6 +201,9 @@ void encode_bch(struct bch_control *bch, const uint8_t *data,
const uint32_t * const tab3 = tab2 + 256*(l+1);
const uint32_t *pdata, *p0, *p1, *p2, *p3;
+ if (WARN_ON(r_bytes > sizeof(r)))
+ return;
+
if (ecc) {
/* load ecc parity bytes into internal 32-bit buffer */
load_ecc8(bch, bch->ecc_buf, ecc);
@@ -1285,6 +1287,13 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
*/
goto fail;
+ if (t > BCH_MAX_T)
+ /*
+ * we can support larger than 64 bits if necessary, at the
+ * cost of higher stack usage.
+ */
+ goto fail;
+
/* sanity checks */
if ((t < 1) || (m*t >= ((1 << m)-1)))
/* invalid t value */
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2fd07f6df0b8..eead55aa7170 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -13,6 +13,7 @@
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/kernel.h>
+#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
@@ -36,11 +37,6 @@
* carefully filter out these unused bits from impacting their
* results.
*
- * These operations actually hold to a slightly stronger rule:
- * if you don't input any bitmaps to these ops that have some
- * unused bits set, then they won't output any set unused bits
- * in output bitmaps.
- *
* The byte ordering of bitmaps is more natural on little
* endian architectures. See the big-endian headers
* include/asm-ppc64/bitops.h and include/asm-s390/bitops.h
@@ -466,20 +462,18 @@ EXPORT_SYMBOL(bitmap_parse_user);
* ranges if list is specified or hex digits grouped into comma-separated
* sets of 8 digits/set. Returns the number of characters written to buf.
*
- * It is assumed that @buf is a pointer into a PAGE_SIZE area and that
- * sufficient storage remains at @buf to accommodate the
- * bitmap_print_to_pagebuf() output.
+ * It is assumed that @buf is a pointer into a PAGE_SIZE, page-aligned
+ * area and that sufficient storage remains at @buf to accommodate the
+ * bitmap_print_to_pagebuf() output. Returns the number of characters
+ * actually printed to @buf, excluding terminating '\0'.
*/
int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp,
int nmaskbits)
{
- ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
- int n = 0;
+ ptrdiff_t len = PAGE_SIZE - offset_in_page(buf);
- if (len > 1)
- n = list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) :
- scnprintf(buf, len, "%*pb\n", nmaskbits, maskp);
- return n;
+ return list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) :
+ scnprintf(buf, len, "%*pb\n", nmaskbits, maskp);
}
EXPORT_SYMBOL(bitmap_print_to_pagebuf);
diff --git a/lib/chacha20.c b/lib/chacha20.c
index c1cc50fb68c9..d907fec6a9ed 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -16,9 +16,9 @@
#include <asm/unaligned.h>
#include <crypto/chacha20.h>
-void chacha20_block(u32 *state, u32 *stream)
+void chacha20_block(u32 *state, u8 *stream)
{
- u32 x[16], *out = stream;
+ u32 x[16];
int i;
for (i = 0; i < ARRAY_SIZE(x); i++)
@@ -67,7 +67,7 @@ void chacha20_block(u32 *state, u32 *stream)
}
for (i = 0; i < ARRAY_SIZE(x); i++)
- out[i] = cpu_to_le32(x[i] + state[i]);
+ put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
state[12]++;
}
diff --git a/lib/cpumask.c b/lib/cpumask.c
index beca6244671a..8d666ab84b5c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -4,7 +4,7 @@
#include <linux/bitops.h>
#include <linux/cpumask.h>
#include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
/**
* cpumask_next - get the next cpu in a cpumask
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
*/
void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
{
- *mask = memblock_virt_alloc(cpumask_size(), 0);
+ *mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES);
}
/**
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 1ad33e555805..4d0d47c1ffbd 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -14,10 +14,47 @@
#include <linux/err.h>
#include <linux/init.h>
#include <crypto/hash.h>
+#include <crypto/algapi.h>
#include <linux/static_key.h>
+#include <linux/notifier.h>
-static struct crypto_shash *crct10dif_tfm;
+static struct crypto_shash __rcu *crct10dif_tfm;
static struct static_key crct10dif_fallback __read_mostly;
+static DEFINE_MUTEX(crc_t10dif_mutex);
+
+static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct crypto_alg *alg = data;
+ struct crypto_shash *new, *old;
+
+ if (val != CRYPTO_MSG_ALG_LOADED ||
+ static_key_false(&crct10dif_fallback) ||
+ strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING)))
+ return 0;
+
+ mutex_lock(&crc_t10dif_mutex);
+ old = rcu_dereference_protected(crct10dif_tfm,
+ lockdep_is_held(&crc_t10dif_mutex));
+ if (!old) {
+ mutex_unlock(&crc_t10dif_mutex);
+ return 0;
+ }
+ new = crypto_alloc_shash("crct10dif", 0, 0);
+ if (IS_ERR(new)) {
+ mutex_unlock(&crc_t10dif_mutex);
+ return 0;
+ }
+ rcu_assign_pointer(crct10dif_tfm, new);
+ mutex_unlock(&crc_t10dif_mutex);
+
+ synchronize_rcu();
+ crypto_free_shash(old);
+ return 0;
+}
+
+static struct notifier_block crc_t10dif_nb = {
+ .notifier_call = crc_t10dif_rehash,
+};
__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
{
@@ -30,11 +67,14 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
if (static_key_false(&crct10dif_fallback))
return crc_t10dif_generic(crc, buffer, len);
- desc.shash.tfm = crct10dif_tfm;
+ rcu_read_lock();
+ desc.shash.tfm = rcu_dereference(crct10dif_tfm);
desc.shash.flags = 0;
*(__u16 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, buffer, len);
+ rcu_read_unlock();
+
BUG_ON(err);
return *(__u16 *)desc.ctx;
@@ -49,6 +89,7 @@ EXPORT_SYMBOL(crc_t10dif);
static int __init crc_t10dif_mod_init(void)
{
+ crypto_register_notifier(&crc_t10dif_nb);
crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
if (IS_ERR(crct10dif_tfm)) {
static_key_slow_inc(&crct10dif_fallback);
@@ -59,12 +100,24 @@ static int __init crc_t10dif_mod_init(void)
static void __exit crc_t10dif_mod_fini(void)
{
+ crypto_unregister_notifier(&crc_t10dif_nb);
crypto_free_shash(crct10dif_tfm);
}
module_init(crc_t10dif_mod_init);
module_exit(crc_t10dif_mod_fini);
+static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp)
+{
+ if (static_key_false(&crct10dif_fallback))
+ return sprintf(buffer, "fallback\n");
+
+ return sprintf(buffer, "%s\n",
+ crypto_tfm_alg_driver_name(crypto_shash_tfm(crct10dif_tfm)));
+}
+
+module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0644);
+
MODULE_DESCRIPTION("T10 DIF CRC calculation");
MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: crct10dif");
diff --git a/lib/crc32.c b/lib/crc32.c
index a6c9afafc8c8..45b1d67a1767 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -183,21 +183,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
}
#if CRC_LE_BITS == 1
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
}
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
}
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
@@ -206,6 +206,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);
+u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
+u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+
/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 96c4c633d95e..ce51749cc145 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -21,7 +21,7 @@
* that would just muddy the log. So we report the first one and
* shut up after that.
*/
-int debug_locks = 1;
+int debug_locks __read_mostly = 1;
EXPORT_SYMBOL_GPL(debug_locks);
/*
@@ -29,7 +29,7 @@ EXPORT_SYMBOL_GPL(debug_locks);
* 'silent failure': nothing is printed to the console when
* a locking bug is detected.
*/
-int debug_locks_silent;
+int debug_locks_silent __read_mostly;
EXPORT_SYMBOL_GPL(debug_locks_silent);
/*
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
*/
int debug_locks_off(void)
{
- if (__debug_locks_off()) {
+ if (debug_locks && __debug_locks_off()) {
if (!debug_locks_silent) {
console_verbose();
return 1;
diff --git a/lib/idr.c b/lib/idr.c
index fab2fd5bc326..cb1db9b8d3f6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,8 +6,6 @@
#include <linux/spinlock.h>
#include <linux/xarray.h>
-DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
/**
* idr_alloc_u32() - Allocate an ID.
* @idr: IDR handle.
@@ -39,10 +37,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
unsigned int base = idr->idr_base;
unsigned int id = *nextid;
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
- return -EINVAL;
- if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
- idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
+ if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
+ idr->idr_rt.xa_flags |= IDR_RT_MARKER;
id = (id < base) ? 0 : id - base;
radix_tree_iter_init(&iter, id);
@@ -295,15 +291,13 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
void __rcu **slot = NULL;
void *entry;
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
- return ERR_PTR(-EINVAL);
id -= idr->idr_base;
entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
return ERR_PTR(-ENOENT);
- __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
+ __radix_tree_replace(&idr->idr_rt, node, slot, ptr);
return entry;
}
@@ -324,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
* free the individual IDs in it. You can use ida_is_empty() to find
* out whether the IDA has any IDs currently allocated.
*
+ * The IDA handles its own locking. It is safe to call any of the IDA
+ * functions without synchronisation in your code.
+ *
* IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
* limitation, it should be quite straightforward to raise the maximum.
*/
@@ -331,161 +328,197 @@ EXPORT_SYMBOL(idr_replace);
/*
* Developer's notes:
*
- * The IDA uses the functionality provided by the IDR & radix tree to store
- * bitmaps in each entry. The IDR_FREE tag means there is at least one bit
- * free, unlike the IDR where it means at least one entry is free.
+ * The IDA uses the functionality provided by the XArray to store bitmaps in
+ * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap
+ * have been set.
*
- * I considered telling the radix tree that each slot is an order-10 node
- * and storing the bit numbers in the radix tree, but the radix tree can't
- * allow a single multiorder entry at index 0, which would significantly
- * increase memory consumption for the IDA. So instead we divide the index
- * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ * I considered telling the XArray that each slot is an order-10 node
+ * and indexing by bit number, but the XArray can't allow a single multi-index
+ * entry in the head, which would significantly increase memory consumption
+ * for the IDA. So instead we divide the index by the number of bits in the
+ * leaf bitmap before doing a radix tree lookup.
*
* As an optimisation, if there are only a few low bits set in any given
- * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
- * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
- * directly in the entry. By being really tricksy, we could store
- * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
- * for 0-3 allocated IDs.
- *
- * We allow the radix tree 'exceptional' count to get out of date. Nothing
- * in the IDA nor the radix tree code checks it. If it becomes important
- * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
- * calls to radix_tree_iter_replace() which will correct the exceptional
- * count.
- *
- * The IDA always requires a lock to alloc/free. If we add a 'test_bit'
+ * leaf, instead of allocating a 128-byte bitmap, we store the bits
+ * as a value entry. Value entries never have the XA_FREE_MARK cleared
+ * because we can always convert them into a bitmap entry.
+ *
+ * It would be possible to optimise further; once we've run out of a
+ * single 128-byte bitmap, we currently switch to a 576-byte node, put
+ * the 128-byte bitmap in the first entry and then start allocating extra
+ * 128-byte entries. We could instead use the 512 bytes of the node's
+ * data as a bitmap before moving to that scheme. I do not believe this
+ * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
+ * users of the IDA and almost none of them use more than 1024 entries.
+ * Those that do use more than the 8192 IDs that the 512 bytes would
+ * provide.
+ *
+ * The IDA always uses a lock to alloc/free. If we add a 'test_bit'
* equivalent, it will still need locking. Going to RCU lookup would require
* using RCU to free bitmaps, and that's not trivial without embedding an
* RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
* bitmap, which is excessive.
*/
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
-
-static int ida_get_new_above(struct ida *ida, int start)
+/**
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and @max, inclusive. The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+ gfp_t gfp)
{
- struct radix_tree_root *root = &ida->ida_rt;
- void __rcu **slot;
- struct radix_tree_iter iter;
- struct ida_bitmap *bitmap;
- unsigned long index;
- unsigned bit, ebit;
- int new;
-
- index = start / IDA_BITMAP_BITS;
- bit = start % IDA_BITMAP_BITS;
- ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
-
- slot = radix_tree_iter_init(&iter, index);
- for (;;) {
- if (slot)
- slot = radix_tree_next_slot(slot, &iter,
- RADIX_TREE_ITER_TAGGED);
- if (!slot) {
- slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
- if (IS_ERR(slot)) {
- if (slot == ERR_PTR(-ENOMEM))
- return -EAGAIN;
- return PTR_ERR(slot);
+ XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
+ unsigned bit = min % IDA_BITMAP_BITS;
+ unsigned long flags;
+ struct ida_bitmap *bitmap, *alloc = NULL;
+
+ if ((int)min < 0)
+ return -ENOSPC;
+
+ if ((int)max < 0)
+ max = INT_MAX;
+
+retry:
+ xas_lock_irqsave(&xas, flags);
+next:
+ bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
+ if (xas.xa_index > min / IDA_BITMAP_BITS)
+ bit = 0;
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+
+ if (xa_is_value(bitmap)) {
+ unsigned long tmp = xa_to_value(bitmap);
+
+ if (bit < BITS_PER_XA_VALUE) {
+ bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+ if (bit < BITS_PER_XA_VALUE) {
+ tmp |= 1UL << bit;
+ xas_store(&xas, xa_mk_value(tmp));
+ goto out;
}
}
- if (iter.index > index) {
- bit = 0;
- ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
- }
- new = iter.index * IDA_BITMAP_BITS;
- bitmap = rcu_dereference_raw(*slot);
- if (radix_tree_exception(bitmap)) {
- unsigned long tmp = (unsigned long)bitmap;
- ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
- if (ebit < BITS_PER_LONG) {
- tmp |= 1UL << ebit;
- rcu_assign_pointer(*slot, (void *)tmp);
- return new + ebit -
- RADIX_TREE_EXCEPTIONAL_SHIFT;
- }
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
- if (!bitmap)
- return -EAGAIN;
- bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
- rcu_assign_pointer(*slot, bitmap);
+ bitmap = alloc;
+ if (!bitmap)
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+ if (!bitmap)
+ goto alloc;
+ bitmap->bitmap[0] = tmp;
+ xas_store(&xas, bitmap);
+ if (xas_error(&xas)) {
+ bitmap->bitmap[0] = 0;
+ goto out;
}
+ }
- if (bitmap) {
- bit = find_next_zero_bit(bitmap->bitmap,
- IDA_BITMAP_BITS, bit);
- new += bit;
- if (new < 0)
- return -ENOSPC;
- if (bit == IDA_BITMAP_BITS)
- continue;
+ if (bitmap) {
+ bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+ if (bit == IDA_BITMAP_BITS)
+ goto next;
- __set_bit(bit, bitmap->bitmap);
- if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
- radix_tree_iter_tag_clear(root, &iter,
- IDR_FREE);
+ __set_bit(bit, bitmap->bitmap);
+ if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } else {
+ if (bit < BITS_PER_XA_VALUE) {
+ bitmap = xa_mk_value(1UL << bit);
} else {
- new += bit;
- if (new < 0)
- return -ENOSPC;
- if (ebit < BITS_PER_LONG) {
- bitmap = (void *)((1UL << ebit) |
- RADIX_TREE_EXCEPTIONAL_ENTRY);
- radix_tree_iter_replace(root, &iter, slot,
- bitmap);
- return new;
- }
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
+ bitmap = alloc;
if (!bitmap)
- return -EAGAIN;
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+ if (!bitmap)
+ goto alloc;
__set_bit(bit, bitmap->bitmap);
- radix_tree_iter_replace(root, &iter, slot, bitmap);
}
-
- return new;
+ xas_store(&xas, bitmap);
+ }
+out:
+ xas_unlock_irqrestore(&xas, flags);
+ if (xas_nomem(&xas, gfp)) {
+ xas.xa_index = min / IDA_BITMAP_BITS;
+ bit = min % IDA_BITMAP_BITS;
+ goto retry;
}
+ if (bitmap != alloc)
+ kfree(alloc);
+ if (xas_error(&xas))
+ return xas_error(&xas);
+ return xas.xa_index * IDA_BITMAP_BITS + bit;
+alloc:
+ xas_unlock_irqrestore(&xas, flags);
+ alloc = kzalloc(sizeof(*bitmap), gfp);
+ if (!alloc)
+ return -ENOMEM;
+ xas_set(&xas, min / IDA_BITMAP_BITS);
+ bit = min % IDA_BITMAP_BITS;
+ goto retry;
+nospc:
+ xas_unlock_irqrestore(&xas, flags);
+ return -ENOSPC;
}
+EXPORT_SYMBOL(ida_alloc_range);
-static void ida_remove(struct ida *ida, int id)
+/**
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
+ *
+ * Context: Any context.
+ */
+void ida_free(struct ida *ida, unsigned int id)
{
- unsigned long index = id / IDA_BITMAP_BITS;
- unsigned offset = id % IDA_BITMAP_BITS;
+ XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
+ unsigned bit = id % IDA_BITMAP_BITS;
struct ida_bitmap *bitmap;
- unsigned long *btmp;
- struct radix_tree_iter iter;