diff options
author | Jani Nikula <jani.nikula@intel.com> | 2018-11-20 13:14:08 +0200 |
---|---|---|
committer | Jani Nikula <jani.nikula@intel.com> | 2018-11-20 13:14:08 +0200 |
commit | 2ac5e38ea4203852d6e99edd3cf11f044b0a409f (patch) | |
tree | 1ef02da98d56309368ad2b6a4e492bafe5bb4faf /lib | |
parent | f48cc647f3e196a3179d695d3c2d56c13e9dec98 (diff) | |
parent | 9235dd441af43599b9cdcce599a3da4083fcad3c (diff) |
Merge drm/drm-next into drm-intel-next-queued
Pull in v4.20-rc3 via drm-next.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 5 | ||||
-rw-r--r-- | lib/Kconfig.debug | 20 | ||||
-rw-r--r-- | lib/Kconfig.kasan | 9 | ||||
-rw-r--r-- | lib/Makefile | 9 | ||||
-rw-r--r-- | lib/bch.c | 17 | ||||
-rw-r--r-- | lib/bitmap.c | 22 | ||||
-rw-r--r-- | lib/chacha20.c | 6 | ||||
-rw-r--r-- | lib/cpumask.c | 4 | ||||
-rw-r--r-- | lib/crc-t10dif.c | 57 | ||||
-rw-r--r-- | lib/crc32.c | 11 | ||||
-rw-r--r-- | lib/debug_locks.c | 6 | ||||
-rw-r--r-- | lib/idr.c | 401 | ||||
-rw-r--r-- | lib/iov_iter.c | 125 | ||||
-rw-r--r-- | lib/kstrtox.c | 16 | ||||
-rw-r--r-- | lib/lz4/lz4_decompress.c | 481 | ||||
-rw-r--r-- | lib/lz4/lz4defs.h | 9 | ||||
-rw-r--r-- | lib/memcat_p.c | 34 | ||||
-rw-r--r-- | lib/nlattr.c | 269 | ||||
-rw-r--r-- | lib/parser.c | 16 | ||||
-rw-r--r-- | lib/percpu-refcount.c | 28 | ||||
-rw-r--r-- | lib/radix-tree.c | 834 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 4 | ||||
-rw-r--r-- | lib/sg_pool.c | 7 | ||||
-rw-r--r-- | lib/string.c | 1 | ||||
-rw-r--r-- | lib/test_bpf.c | 1 | ||||
-rw-r--r-- | lib/test_ida.c | 4 | ||||
-rw-r--r-- | lib/test_kasan.c | 70 | ||||
-rw-r--r-- | lib/test_memcat_p.c | 115 | ||||
-rw-r--r-- | lib/test_xarray.c | 1238 | ||||
-rw-r--r-- | lib/ubsan.c | 3 | ||||
-rw-r--r-- | lib/vsprintf.c | 245 | ||||
-rw-r--r-- | lib/xarray.c | 2036 | ||||
-rw-r--r-- | lib/xz/xz_crc32.c | 1 | ||||
-rw-r--r-- | lib/xz/xz_private.h | 4 | ||||
-rw-r--r-- | lib/zlib_inflate/inflate.c | 12 |
35 files changed, 4755 insertions, 1365 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index a3928d4438b5..a9965f4af4dd 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -399,8 +399,11 @@ config INTERVAL_TREE for more information. -config RADIX_TREE_MULTIORDER +config XARRAY_MULTI bool + help + Support entries which occupy multiple consecutive indices in the + XArray. config ASSOCIATIVE_ARRAY bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4966c4fbe7f7..1af29b8224fd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1179,7 +1179,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86 + select FRAME_POINTER if !MIPS && !PPC && !ARM && !S390 && !MICROBLAZE && !ARC && !X86 select KALLSYMS select KALLSYMS_ALL @@ -1292,7 +1292,7 @@ config DEBUG_KOBJECT depends on DEBUG_KERNEL help If you say Y here, some extra kobject debugging messages will be sent - to the syslog. + to the syslog. config DEBUG_KOBJECT_RELEASE bool "kobject release debugging" @@ -1590,7 +1590,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86 + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86 help Provide stacktrace filter for fault-injection capabilities @@ -1599,7 +1599,7 @@ config LATENCYTOP depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT depends on PROC_FS - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86 + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86 select KALLSYMS select KALLSYMS_ALL select STACKTRACE @@ -1813,6 +1813,9 @@ config TEST_BITFIELD config TEST_UUID tristate "Test functions located in the uuid module at runtime" +config TEST_XARRAY + tristate "Test the XArray code at runtime" + config TEST_OVERFLOW tristate "Test check_*_overflow() functions at runtime" @@ -1965,11 +1968,18 @@ config TEST_DEBUG_VIRTUAL If unsure, say N. +config TEST_MEMCAT_P + tristate "Test memcat_p() helper function" + help + Test the memcat_p() helper for correctly merging two + pointer arrays together. + + If unsure, say N. + endif # RUNTIME_TESTING_MENU config MEMTEST bool "Memtest" - depends on HAVE_MEMBLOCK ---help--- This option adds a kernel parameter 'memtest', which allows memtest to be set. diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index befb127507c0..d0bad1bd9a2b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -57,6 +57,15 @@ config KASAN_INLINE endchoice +config KASAN_S390_4_LEVEL_PAGING + bool "KASan: use 4-level paging" + depends on KASAN && S390 + help + Compiling the kernel with KASan disables automatic 3-level vs + 4-level paging selection. 3-level paging is used by default (up + to 3TB of RAM with KASan enabled). This options allows to force + 4-level paging instead. + config TEST_KASAN tristate "Module for testing kasan for bug detection" depends on m && KASAN diff --git a/lib/Makefile b/lib/Makefile index ca3f7ebb900d..db06d1237898 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -18,13 +18,13 @@ KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o timerqueue.o\ + rbtree.o radix-tree.o timerqueue.o xarray.o \ idr.o int_sqrt.o extable.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ - nmi_backtrace.o nodemask.o win_minmax.o + nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o @@ -53,7 +53,9 @@ obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o obj-$(CONFIG_TEST_IDA) += test_ida.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o CFLAGS_test_kasan.o += -fno-builtin +CFLAGS_test_kasan.o += $(call cc-disable-warning, vla) obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o +CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla) UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o @@ -68,9 +70,11 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o +obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o +obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -119,7 +123,6 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o -CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500) obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ diff --git a/lib/bch.c b/lib/bch.c index 7b0f2006698b..5db6d3a4c8a6 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -79,20 +79,19 @@ #define GF_T(_p) (CONFIG_BCH_CONST_T) #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) #define BCH_MAX_M (CONFIG_BCH_CONST_M) +#define BCH_MAX_T (CONFIG_BCH_CONST_T) #else #define GF_M(_p) ((_p)->m) #define GF_T(_p) ((_p)->t) #define GF_N(_p) ((_p)->n) -#define BCH_MAX_M 15 +#define BCH_MAX_M 15 /* 2KB */ +#define BCH_MAX_T 64 /* 64 bit correction */ #endif -#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M) - #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) #define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32) -#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8) #ifndef dbg #define dbg(_fmt, args...) do {} while (0) @@ -202,6 +201,9 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, const uint32_t * const tab3 = tab2 + 256*(l+1); const uint32_t *pdata, *p0, *p1, *p2, *p3; + if (WARN_ON(r_bytes > sizeof(r))) + return; + if (ecc) { /* load ecc parity bytes into internal 32-bit buffer */ load_ecc8(bch, bch->ecc_buf, ecc); @@ -1285,6 +1287,13 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) */ goto fail; + if (t > BCH_MAX_T) + /* + * we can support larger than 64 bits if necessary, at the + * cost of higher stack usage. + */ + goto fail; + /* sanity checks */ if ((t < 1) || (m*t >= ((1 << m)-1))) /* invalid t value */ diff --git a/lib/bitmap.c b/lib/bitmap.c index 2fd07f6df0b8..eead55aa7170 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -13,6 +13,7 @@ #include <linux/bitops.h> #include <linux/bug.h> #include <linux/kernel.h> +#include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/uaccess.h> @@ -36,11 +37,6 @@ * carefully filter out these unused bits from impacting their * results. * - * These operations actually hold to a slightly stronger rule: - * if you don't input any bitmaps to these ops that have some - * unused bits set, then they won't output any set unused bits - * in output bitmaps. - * * The byte ordering of bitmaps is more natural on little * endian architectures. See the big-endian headers * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h @@ -466,20 +462,18 @@ EXPORT_SYMBOL(bitmap_parse_user); * ranges if list is specified or hex digits grouped into comma-separated * sets of 8 digits/set. Returns the number of characters written to buf. * - * It is assumed that @buf is a pointer into a PAGE_SIZE area and that - * sufficient storage remains at @buf to accommodate the - * bitmap_print_to_pagebuf() output. + * It is assumed that @buf is a pointer into a PAGE_SIZE, page-aligned + * area and that sufficient storage remains at @buf to accommodate the + * bitmap_print_to_pagebuf() output. Returns the number of characters + * actually printed to @buf, excluding terminating '\0'. */ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits) { - ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; - int n = 0; + ptrdiff_t len = PAGE_SIZE - offset_in_page(buf); - if (len > 1) - n = list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) : - scnprintf(buf, len, "%*pb\n", nmaskbits, maskp); - return n; + return list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) : + scnprintf(buf, len, "%*pb\n", nmaskbits, maskp); } EXPORT_SYMBOL(bitmap_print_to_pagebuf); diff --git a/lib/chacha20.c b/lib/chacha20.c index c1cc50fb68c9..d907fec6a9ed 100644 --- a/lib/chacha20.c +++ b/lib/chacha20.c @@ -16,9 +16,9 @@ #include <asm/unaligned.h> #include <crypto/chacha20.h> -void chacha20_block(u32 *state, u32 *stream) +void chacha20_block(u32 *state, u8 *stream) { - u32 x[16], *out = stream; + u32 x[16]; int i; for (i = 0; i < ARRAY_SIZE(x); i++) @@ -67,7 +67,7 @@ void chacha20_block(u32 *state, u32 *stream) } for (i = 0; i < ARRAY_SIZE(x); i++) - out[i] = cpu_to_le32(x[i] + state[i]); + put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]); state[12]++; } diff --git a/lib/cpumask.c b/lib/cpumask.c index beca6244671a..8d666ab84b5c 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -4,7 +4,7 @@ #include <linux/bitops.h> #include <linux/cpumask.h> #include <linux/export.h> -#include <linux/bootmem.h> +#include <linux/memblock.h> /** * cpumask_next - get the next cpu in a cpumask @@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = memblock_virt_alloc(cpumask_size(), 0); + *mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES); } /** diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index 1ad33e555805..4d0d47c1ffbd 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -14,10 +14,47 @@ #include <linux/err.h> #include <linux/init.h> #include <crypto/hash.h> +#include <crypto/algapi.h> #include <linux/static_key.h> +#include <linux/notifier.h> -static struct crypto_shash *crct10dif_tfm; +static struct crypto_shash __rcu *crct10dif_tfm; static struct static_key crct10dif_fallback __read_mostly; +static DEFINE_MUTEX(crc_t10dif_mutex); + +static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data) +{ + struct crypto_alg *alg = data; + struct crypto_shash *new, *old; + + if (val != CRYPTO_MSG_ALG_LOADED || + static_key_false(&crct10dif_fallback) || + strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING))) + return 0; + + mutex_lock(&crc_t10dif_mutex); + old = rcu_dereference_protected(crct10dif_tfm, + lockdep_is_held(&crc_t10dif_mutex)); + if (!old) { + mutex_unlock(&crc_t10dif_mutex); + return 0; + } + new = crypto_alloc_shash("crct10dif", 0, 0); + if (IS_ERR(new)) { + mutex_unlock(&crc_t10dif_mutex); + return 0; + } + rcu_assign_pointer(crct10dif_tfm, new); + mutex_unlock(&crc_t10dif_mutex); + + synchronize_rcu(); + crypto_free_shash(old); + return 0; +} + +static struct notifier_block crc_t10dif_nb = { + .notifier_call = crc_t10dif_rehash, +}; __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) { @@ -30,11 +67,14 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) if (static_key_false(&crct10dif_fallback)) return crc_t10dif_generic(crc, buffer, len); - desc.shash.tfm = crct10dif_tfm; + rcu_read_lock(); + desc.shash.tfm = rcu_dereference(crct10dif_tfm); desc.shash.flags = 0; *(__u16 *)desc.ctx = crc; err = crypto_shash_update(&desc.shash, buffer, len); + rcu_read_unlock(); + BUG_ON(err); return *(__u16 *)desc.ctx; @@ -49,6 +89,7 @@ EXPORT_SYMBOL(crc_t10dif); static int __init crc_t10dif_mod_init(void) { + crypto_register_notifier(&crc_t10dif_nb); crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); if (IS_ERR(crct10dif_tfm)) { static_key_slow_inc(&crct10dif_fallback); @@ -59,12 +100,24 @@ static int __init crc_t10dif_mod_init(void) static void __exit crc_t10dif_mod_fini(void) { + crypto_unregister_notifier(&crc_t10dif_nb); crypto_free_shash(crct10dif_tfm); } module_init(crc_t10dif_mod_init); module_exit(crc_t10dif_mod_fini); +static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp) +{ + if (static_key_false(&crct10dif_fallback)) + return sprintf(buffer, "fallback\n"); + + return sprintf(buffer, "%s\n", + crypto_tfm_alg_driver_name(crypto_shash_tfm(crct10dif_tfm))); +} + +module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0644); + MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); MODULE_SOFTDEP("pre: crct10dif"); diff --git a/lib/crc32.c b/lib/crc32.c index a6c9afafc8c8..45b1d67a1767 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -183,21 +183,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, } #if CRC_LE_BITS == 1 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); @@ -206,6 +206,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); +u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); + /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 96c4c633d95e..ce51749cc145 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -21,7 +21,7 @@ * that would just muddy the log. So we report the first one and * shut up after that. */ -int debug_locks = 1; +int debug_locks __read_mostly = 1; EXPORT_SYMBOL_GPL(debug_locks); /* @@ -29,7 +29,7 @@ EXPORT_SYMBOL_GPL(debug_locks); * 'silent failure': nothing is printed to the console when * a locking bug is detected. */ -int debug_locks_silent; +int debug_locks_silent __read_mostly; EXPORT_SYMBOL_GPL(debug_locks_silent); /* @@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); */ int debug_locks_off(void) { - if (__debug_locks_off()) { + if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) { console_verbose(); return 1; diff --git a/lib/idr.c b/lib/idr.c index fab2fd5bc326..cb1db9b8d3f6 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -6,8 +6,6 @@ #include <linux/spinlock.h> #include <linux/xarray.h> -DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); - /** * idr_alloc_u32() - Allocate an ID. * @idr: IDR handle. @@ -39,10 +37,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, unsigned int base = idr->idr_base; unsigned int id = *nextid; - if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) - return -EINVAL; - if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR))) - idr->idr_rt.gfp_mask |= IDR_RT_MARKER; + if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR))) + idr->idr_rt.xa_flags |= IDR_RT_MARKER; id = (id < base) ? 0 : id - base; radix_tree_iter_init(&iter, id); @@ -295,15 +291,13 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id) void __rcu **slot = NULL; void *entry; - if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) - return ERR_PTR(-EINVAL); id -= idr->idr_base; entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) return ERR_PTR(-ENOENT); - __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL); + __radix_tree_replace(&idr->idr_rt, node, slot, ptr); return entry; } @@ -324,6 +318,9 @@ EXPORT_SYMBOL(idr_replace); * free the individual IDs in it. You can use ida_is_empty() to find * out whether the IDA has any IDs currently allocated. * + * The IDA handles its own locking. It is safe to call any of the IDA + * functions without synchronisation in your code. + * * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward * limitation, it should be quite straightforward to raise the maximum. */ @@ -331,161 +328,197 @@ EXPORT_SYMBOL(idr_replace); /* * Developer's notes: * - * The IDA uses the functionality provided by the IDR & radix tree to store - * bitmaps in each entry. The IDR_FREE tag means there is at least one bit - * free, unlike the IDR where it means at least one entry is free. + * The IDA uses the functionality provided by the XArray to store bitmaps in + * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap + * have been set. * - * I considered telling the radix tree that each slot is an order-10 node - * and storing the bit numbers in the radix tree, but the radix tree can't - * allow a single multiorder entry at index 0, which would significantly - * increase memory consumption for the IDA. So instead we divide the index - * by the number of bits in the leaf bitmap before doing a radix tree lookup. + * I considered telling the XArray that each slot is an order-10 node + * and indexing by bit number, but the XArray can't allow a single multi-index + * entry in the head, which would significantly increase memory consumption + * for the IDA. So instead we divide the index by the number of bits in the + * leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given - * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional - * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits - * directly in the entry. By being really tricksy, we could store - * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising - * for 0-3 allocated IDs. - * - * We allow the radix tree 'exceptional' count to get out of date. Nothing - * in the IDA nor the radix tree code checks it. If it becomes important - * to maintain an accurate exceptional count, switch the rcu_assign_pointer() - * calls to radix_tree_iter_replace() which will correct the exceptional - * count. - * - * The IDA always requires a lock to alloc/free. If we add a 'test_bit' + * leaf, instead of allocating a 128-byte bitmap, we store the bits + * as a value entry. Value entries never have the XA_FREE_MARK cleared + * because we can always convert them into a bitmap entry. + * + * It would be possible to optimise further; once we've run out of a + * single 128-byte bitmap, we currently switch to a 576-byte node, put + * the 128-byte bitmap in the first entry and then start allocating extra + * 128-byte entries. We could instead use the 512 bytes of the node's + * data as a bitmap before moving to that scheme. I do not believe this + * is a worthwhile optimisation; Rasmus Villemoes surveyed the current + * users of the IDA and almost none of them use more than 1024 entries. + * Those that do use more than the 8192 IDs that the 512 bytes would + * provide. + * + * The IDA always uses a lock to alloc/free. If we add a 'test_bit' * equivalent, it will still need locking. Going to RCU lookup would require * using RCU to free bitmaps, and that's not trivial without embedding an * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte * bitmap, which is excessive. */ -#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1) - -static int ida_get_new_above(struct ida *ida, int start) +/** + * ida_alloc_range() - Allocate an unused ID. + * @ida: IDA handle. + * @min: Lowest ID to allocate. + * @max: Highest ID to allocate. + * @gfp: Memory allocation flags. + * + * Allocate an ID between @min and @max, inclusive. The allocated ID will + * not exceed %INT_MAX, even if @max is larger. + * + * Context: Any context. + * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, + * or %-ENOSPC if there are no free IDs. + */ +int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, + gfp_t gfp) { - struct radix_tree_root *root = &ida->ida_rt; - void __rcu **slot; - struct radix_tree_iter iter; - struct ida_bitmap *bitmap; - unsigned long index; - unsigned bit, ebit; - int new; - - index = start / IDA_BITMAP_BITS; - bit = start % IDA_BITMAP_BITS; - ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; - - slot = radix_tree_iter_init(&iter, index); - for (;;) { - if (slot) - slot = radix_tree_next_slot(slot, &iter, - RADIX_TREE_ITER_TAGGED); - if (!slot) { - slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX); - if (IS_ERR(slot)) { - if (slot == ERR_PTR(-ENOMEM)) - return -EAGAIN; - return PTR_ERR(slot); + XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS); + unsigned bit = min % IDA_BITMAP_BITS; + unsigned long flags; + struct ida_bitmap *bitmap, *alloc = NULL; + + if ((int)min < 0) + return -ENOSPC; + + if ((int)max < 0) + max = INT_MAX; + +retry: + xas_lock_irqsave(&xas, flags); +next: + bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK); + if (xas.xa_index > min / IDA_BITMAP_BITS) + bit = 0; + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + + if (xa_is_value(bitmap)) { + unsigned long tmp = xa_to_value(bitmap); + + if (bit < BITS_PER_XA_VALUE) { + bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit); + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + if (bit < BITS_PER_XA_VALUE) { + tmp |= 1UL << bit; + xas_store(&xas, xa_mk_value(tmp)); + goto out; } } - if (iter.index > index) { - bit = 0; - ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; - } - new = iter.index * IDA_BITMAP_BITS; - bitmap = rcu_dereference_raw(*slot); - if (radix_tree_exception(bitmap)) { - unsigned long tmp = (unsigned long)bitmap; - ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); - if (ebit < BITS_PER_LONG) { - tmp |= 1UL << ebit; - rcu_assign_pointer(*slot, (void *)tmp); - return new + ebit - - RADIX_TREE_EXCEPTIONAL_SHIFT; - } - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; - rcu_assign_pointer(*slot, bitmap); + bitmap = alloc; + if (!bitmap) + bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); + if (!bitmap) + goto alloc; + bitmap->bitmap[0] = tmp; + xas_store(&xas, bitmap); + if (xas_error(&xas)) { + bitmap->bitmap[0] = 0; + goto out; } + } - if (bitmap) { - bit = find_next_zero_bit(bitmap->bitmap, - IDA_BITMAP_BITS, bit); - new += bit; - if (new < 0) - return -ENOSPC; - if (bit == IDA_BITMAP_BITS) - continue; + if (bitmap) { + bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit); + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + if (bit == IDA_BITMAP_BITS) + goto next; - __set_bit(bit, bitmap->bitmap); - if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) - radix_tree_iter_tag_clear(root, &iter, - IDR_FREE); + __set_bit(bit, bitmap->bitmap); + if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) + xas_clear_mark(&xas, XA_FREE_MARK); + } else { + if (bit < BITS_PER_XA_VALUE) { + bitmap = xa_mk_value(1UL << bit); } else { - new += bit; - if (new < 0) - return -ENOSPC; - if (ebit < BITS_PER_LONG) { - bitmap = (void *)((1UL << ebit) | - RADIX_TREE_EXCEPTIONAL_ENTRY); - radix_tree_iter_replace(root, &iter, slot, - bitmap); - return new; - } - bitmap = this_cpu_xchg(ida_bitmap, NULL); + bitmap = alloc; if (!bitmap) - return -EAGAIN; + bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); + if (!bitmap) + goto alloc; __set_bit(bit, bitmap->bitmap); - radix_tree_iter_replace(root, &iter, slot, bitmap); } - - return new; + xas_store(&xas, bitmap); + } +out: + xas_unlock_irqrestore(&xas, flags); + if (xas_nomem(&xas, gfp)) { + xas.xa_index = min / IDA_BITMAP_BITS; + bit = min % IDA_BITMAP_BITS; + goto retry; } + if (bitmap != alloc) + kfree(alloc); + if (xas_error(&xas)) + return xas_error(&xas); + return xas.xa_index * IDA_BITMAP_BITS + bit; +alloc: + xas_unlock_irqrestore(&xas, flags); + alloc = kzalloc(sizeof(*bitmap), gfp); + if (!alloc) + return -ENOMEM; + xas_set(&xas, min / IDA_BITMAP_BITS); + bit = min % IDA_BITMAP_BITS; + goto retry; +nospc: + xas_unlock_irqrestore(&xas, flags); + return -ENOSPC; } +EXPORT_SYMBOL(ida_alloc_range); -static void ida_remove(struct ida *ida, int id) +/** + * ida_free() - Release an allocated ID. + * @ida: IDA handle. + * @id: Previously allocated ID. + * + * Context: Any context. + */ +void ida_free(struct ida *ida, unsigned int id) { - unsigned long index = id / IDA_BITMAP_BITS; - unsigned offset = id % IDA_BITMAP_BITS; + XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS); + unsigned bit = id % IDA_BITMAP_BITS; struct ida_bitmap *bitmap; - unsigned long *btmp; - struct radix_tree_iter iter; |