summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-07 11:39:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-07 11:39:33 -0700
commit81e11336d97e7a4c25a65c302ef2bf9cd9808ed4 (patch)
tree44e069e0bb26068137a1bda7ca5125c5b068b4ab /include
parent5631c5e0eb9035d92ceb20fcd9cdb7779a3f5cc7 (diff)
parent912c05720f00d039103d356a59c37dc7c3995e01 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: - a few MM hotfixes - kthread, tools, scripts, ntfs and ocfs2 - some of MM Subsystems affected by this patch series: kthread, tools, scripts, ntfs, ocfs2 and mm (hofixes, pagealloc, slab-generic, slab, slub, kcsan, debug, pagecache, gup, swap, shmem, memcg, pagemap, mremap, mincore, sparsemem, vmalloc, kasan, pagealloc, hugetlb and vmscan). * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (162 commits) mm: vmscan: consistent update to pgrefill mm/vmscan.c: fix typo khugepaged: khugepaged_test_exit() check mmget_still_valid() khugepaged: retract_page_tables() remember to test exit khugepaged: collapse_pte_mapped_thp() protect the pmd lock khugepaged: collapse_pte_mapped_thp() flush the right range mm/hugetlb: fix calculation of adjust_range_if_pmd_sharing_possible mm: thp: replace HTTP links with HTTPS ones mm/page_alloc: fix memalloc_nocma_{save/restore} APIs mm/page_alloc.c: skip setting nodemask when we are in interrupt mm/page_alloc: fallbacks at most has 3 elements mm/page_alloc: silence a KASAN false positive mm/page_alloc.c: remove unnecessary end_bitidx for [set|get]_pfnblock_flags_mask() mm/page_alloc.c: simplify pageblock bitmap access mm/page_alloc.c: extract the common part in pfn_to_bitidx() mm/page_alloc.c: replace the definition of NR_MIGRATETYPE_BITS with PB_migratetype_bits mm/shuffle: remove dynamic reconfiguration mm/memory_hotplug: document why shuffle_zone() is relevant mm/page_alloc: remove nr_free_pagecache_pages() mm: remove vm_total_pages ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgalloc.h80
-rw-r--r--include/asm-generic/tlb.h1
-rw-r--r--include/crypto/aead.h2
-rw-r--r--include/crypto/akcipher.h2
-rw-r--r--include/crypto/gf128mul.h2
-rw-r--r--include/crypto/hash.h2
-rw-r--r--include/crypto/internal/acompress.h2
-rw-r--r--include/crypto/kpp.h2
-rw-r--r--include/crypto/skcipher.h2
-rw-r--r--include/linux/efi.h4
-rw-r--r--include/linux/fs.h17
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/kasan.h4
-rw-r--r--include/linux/memcontrol.h203
-rw-r--r--include/linux/mm.h86
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/mman.h4
-rw-r--r--include/linux/mmu_notifier.h13
-rw-r--r--include/linux/mmzone.h52
-rw-r--r--include/linux/pageblock-flags.h24
-rw-r--r--include/linux/percpu_counter.h4
-rw-r--r--include/linux/sched/mm.h8
-rw-r--r--include/linux/shmem_fs.h3
-rw-r--r--include/linux/slab.h9
-rw-r--r--include/linux/slab_def.h9
-rw-r--r--include/linux/slub_def.h31
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/vmstat.h14
28 files changed, 419 insertions, 170 deletions
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 73f7421413cb..6f44810921aa 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -102,6 +102,86 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
__free_page(pte_page);
}
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+#ifndef __HAVE_ARCH_PMD_ALLOC_ONE
+/**
+ * pmd_alloc_one - allocate a page for PMD-level page table
+ * @mm: the mm_struct of the current context
+ *
+ * Allocates a page and runs the pgtable_pmd_page_ctor().
+ * Allocations use %GFP_PGTABLE_USER in user context and
+ * %GFP_PGTABLE_KERNEL in kernel context.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ struct page *page;
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ page = alloc_pages(gfp, 0);
+ if (!page)
+ return NULL;
+ if (!pgtable_pmd_page_ctor(page)) {
+ __free_pages(page, 0);
+ return NULL;
+ }
+ return (pmd_t *)page_address(page);
+}
+#endif
+
+#ifndef __HAVE_ARCH_PMD_FREE
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+ pgtable_pmd_page_dtor(virt_to_page(pmd));
+ free_page((unsigned long)pmd);
+}
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#ifndef __HAVE_ARCH_PUD_FREE
+/**
+ * pud_alloc_one - allocate a page for PUD-level page table
+ * @mm: the mm_struct of the current context
+ *
+ * Allocates a page using %GFP_PGTABLE_USER for user context and
+ * %GFP_PGTABLE_KERNEL for kernel context.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (pud_t *)get_zeroed_page(gfp);
+}
+#endif
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+ free_page((unsigned long)pud);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
+#ifndef __HAVE_ARCH_PGD_FREE
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ free_page((unsigned long)pgd);
+}
+#endif
+
#endif /* CONFIG_MMU */
#endif /* __ASM_GENERIC_PGALLOC_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index ef75ec86f865..6661ee1cff47 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -14,7 +14,6 @@
#include <linux/mmu_notifier.h>
#include <linux/swap.h>
#include <linux/hugetlb_inline.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 62c68550aab6..c32a6f5664e9 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -425,7 +425,7 @@ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
*/
static inline void aead_request_free(struct aead_request *req)
{
- kzfree(req);
+ kfree_sensitive(req);
}
/**
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index 6924b091adec..1d3aa252caba 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -207,7 +207,7 @@ static inline struct akcipher_request *akcipher_request_alloc(
*/
static inline void akcipher_request_free(struct akcipher_request *req)
{
- kzfree(req);
+ kfree_sensitive(req);
}
/**
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
index fa0a63d298dc..81330c6446f6 100644
--- a/include/crypto/gf128mul.h
+++ b/include/crypto/gf128mul.h
@@ -230,7 +230,7 @@ void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t);
void gf128mul_x8_ble(le128 *r, const le128 *x);
static inline void gf128mul_free_4k(struct gf128mul_4k *t)
{
- kzfree(t);
+ kfree_sensitive(t);
}
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 19ce91f2359f..0d1b403888c9 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -606,7 +606,7 @@ static inline struct ahash_request *ahash_request_alloc(
*/
static inline void ahash_request_free(struct ahash_request *req)
{
- kzfree(req);
+ kfree_sensitive(req);
}
static inline void ahash_request_zero(struct ahash_request *req)
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index cf478681b53e..cfc47e18820f 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -46,7 +46,7 @@ static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm)
static inline void __acomp_request_free(struct acomp_req *req)
{
- kzfree(req);
+ kfree_sensitive(req);
}
/**
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index cd9a9b500624..88b591215d5c 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -187,7 +187,7 @@ static inline struct kpp_request *kpp_request_alloc(struct crypto_kpp *tfm,
*/
static inline void kpp_request_free(struct kpp_request *req)
{
- kzfree(req);
+ kfree_sensitive(req);
}
/**
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 5663f71198b3..6a733b171a5d 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -508,7 +508,7 @@ static inline struct skcipher_request *skcipher_request_alloc(
*/
static inline void skcipher_request_free(struct skcipher_request *req)
{
- kzfree(req);
+ kfree_sensitive(req);
}
static inline void skcipher_request_zero(struct skcipher_request *req)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 05c47f857383..73db1ae04cef 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -606,7 +606,11 @@ extern void *efi_get_pal_addr (void);
extern void efi_map_pal_code (void);
extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
extern void efi_gettimeofday (struct timespec64 *ts);
+#ifdef CONFIG_EFI
extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */
+#else
+static inline void efi_enter_virtual_mode (void) {}
+#endif
#ifdef CONFIG_X86
extern efi_status_t efi_query_variable_store(u32 attributes,
unsigned long size,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6d1976916635..2df72def1f59 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -528,7 +528,7 @@ static inline int mapping_mapped(struct address_space *mapping)
/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
+ * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
@@ -2950,6 +2950,21 @@ extern void discard_new_inode(struct inode *);
extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb);
+/*
+ * Userspace may rely on the the inode number being non-zero. For example, glibc
+ * simply ignores files with zero i_ino in unlink() and other places.
+ *
+ * As an additional complication, if userspace was compiled with
+ * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
+ * lower 32 bits, so we need to check that those aren't zero explicitly. With
+ * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
+ * better safe than sorry.
+ */
+static inline bool is_zero_ino(ino_t ino)
+{
+ return (u32)ino == 0;
+}
+
extern void __iget(struct inode * inode);
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 71f20776b06c..17c4c4975145 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -42,7 +42,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr, unsigned long old_end,
+ unsigned long new_addr,
pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 82522e996c76..087fba34b209 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -38,7 +38,6 @@ extern void kasan_disable_current(void);
void kasan_unpoison_shadow(const void *address, size_t size);
void kasan_unpoison_task_stack(struct task_struct *task);
-void kasan_unpoison_stack_above_sp_to(const void *watermark);
void kasan_alloc_pages(struct page *page, unsigned int order);
void kasan_free_pages(struct page *page, unsigned int order);
@@ -101,7 +100,6 @@ void kasan_restore_multi_shot(bool enabled);
static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
-static inline void kasan_unpoison_stack_above_sp_to(const void *watermark) {}
static inline void kasan_enable_current(void) {}
static inline void kasan_disable_current(void) {}
@@ -174,11 +172,13 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
void kasan_cache_shrink(struct kmem_cache *cache);
void kasan_cache_shutdown(struct kmem_cache *cache);
+void kasan_record_aux_stack(void *ptr);
#else /* CONFIG_KASAN_GENERIC */
static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
+static inline void kasan_record_aux_stack(void *ptr) {}
#endif /* CONFIG_KASAN_GENERIC */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e77197a62809..1bb49b600310 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -23,6 +23,7 @@
#include <linux/page-flags.h>
struct mem_cgroup;
+struct obj_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
@@ -31,8 +32,6 @@ struct kmem_cache;
enum memcg_stat_item {
MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,
MEMCG_SOCK,
- /* XXX: why are these zone and not node counters? */
- MEMCG_KERNEL_STACK_KB,
MEMCG_NR_STAT,
};
@@ -48,12 +47,6 @@ enum memcg_memory_event {
MEMCG_NR_MEMORY_EVENTS,
};
-enum mem_cgroup_protection {
- MEMCG_PROT_NONE,
- MEMCG_PROT_LOW,
- MEMCG_PROT_MIN,
-};
-
struct mem_cgroup_reclaim_cookie {
pg_data_t *pgdat;
unsigned int generation;
@@ -193,6 +186,22 @@ struct memcg_cgwb_frn {
};
/*
+ * Bucket for arbitrarily byte-sized objects charged to a memory
+ * cgroup. The bucket can be reparented in one piece when the cgroup
+ * is destroyed, without having to round up the individual references
+ * of all live memory objects in the wild.
+ */
+struct obj_cgroup {
+ struct percpu_ref refcnt;
+ struct mem_cgroup *memcg;
+ atomic_t nr_charged_bytes;
+ union {
+ struct list_head list;
+ struct rcu_head rcu;
+ };
+};
+
+/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
* statistics based on the statistics developed by Rik Van Riel for clock-pro,
@@ -300,7 +309,8 @@ struct mem_cgroup {
/* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
enum memcg_kmem_state kmem_state;
- struct list_head kmem_caches;
+ struct obj_cgroup __rcu *objcg;
+ struct list_head objcg_list; /* list of inherited objcgs */
#endif
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -339,12 +349,49 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg,
bool in_low_reclaim)
{
if (mem_cgroup_disabled())
return 0;
+ /*
+ * There is no reclaim protection applied to a targeted reclaim.
+ * We are special casing this specific case here because
+ * mem_cgroup_protected calculation is not robust enough to keep
+ * the protection invariant for calculated effective values for
+ * parallel reclaimers with different reclaim target. This is
+ * especially a problem for tail memcgs (as they have pages on LRU)
+ * which would want to have effective values 0 for targeted reclaim
+ * but a different value for external reclaim.
+ *
+ * Example
+ * Let's have global and A's reclaim in parallel:
+ * |
+ * A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G)
+ * |\
+ * | C (low = 1G, usage = 2.5G)
+ * B (low = 1G, usage = 0.5G)
+ *
+ * For the global reclaim
+ * A.elow = A.low
+ * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow
+ * C.elow = min(C.usage, C.low)
+ *
+ * With the effective values resetting we have A reclaim
+ * A.elow = 0
+ * B.elow = B.low
+ * C.elow = C.low
+ *
+ * If the global reclaim races with A's reclaim then
+ * B.elow = C.elow = 0 because children_low_usage > A.elow)
+ * is possible and reclaiming B would be violating the protection.
+ *
+ */
+ if (root == memcg)
+ return 0;
+
if (in_low_reclaim)
return READ_ONCE(memcg->memory.emin);
@@ -352,8 +399,36 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
READ_ONCE(memcg->memory.elow));
}
-enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
- struct mem_cgroup *memcg);
+void mem_cgroup_calculate_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg);
+
+static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg)
+{
+ /*
+ * The root memcg doesn't account charges, and doesn't support
+ * protection.
+ */
+ return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg);
+
+}
+
+static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+{
+ if (!mem_cgroup_supports_protection(memcg))
+ return false;
+
+ return READ_ONCE(memcg->memory.elow) >=
+ page_counter_read(&memcg->memory);
+}
+
+static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+{
+ if (!mem_cgroup_supports_protection(memcg))
+ return false;
+
+ return READ_ONCE(memcg->memory.emin) >=
+ page_counter_read(&memcg->memory);
+}
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
@@ -416,6 +491,33 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
}
+static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
+{
+ return percpu_ref_tryget(&objcg->refcnt);
+}
+
+static inline void obj_cgroup_get(struct obj_cgroup *objcg)
+{
+ percpu_ref_get(&objcg->refcnt);
+}
+
+static inline void obj_cgroup_put(struct obj_cgroup *objcg)
+{
+ percpu_ref_put(&objcg->refcnt);
+}
+
+/*
+ * After the initialization objcg->memcg is always pointing at
+ * a valid memcg, but can be atomically swapped to the parent memcg.
+ *
+ * The caller must ensure that the returned memcg won't be released:
+ * e.g. acquire the rcu_read_lock or css_set_lock.
+ */
+static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
+{
+ return READ_ONCE(objcg->memcg);
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
if (memcg)
@@ -679,11 +781,34 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return x;
}
+void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
+
void mod_memcg_obj_state(void *p, int idx, int val);
+static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+ int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_lruvec_slab_state(p, idx, val);
+ local_irq_restore(flags);
+}
+
+static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_memcg_lruvec_state(lruvec, idx, val);
+ local_irq_restore(flags);
+}
+
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
@@ -825,16 +950,26 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg,
bool in_low_reclaim)
{
return 0;
}
-static inline enum mem_cgroup_protection mem_cgroup_protected(
- struct mem_cgroup *root, struct mem_cgroup *memcg)
+static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
+ struct mem_cgroup *memcg)
+{
+}
+
+static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
{
- return MEMCG_PROT_NONE;
+ return false;
}
static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
@@ -1057,6 +1192,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
return node_page_state(lruvec_pgdat(lruvec), idx);
}
+static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+}
+
static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
@@ -1089,6 +1229,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val);
}
+static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+ int val)
+{
+ struct page *page = virt_to_head_page(p);
+
+ mod_node_page_state(page_pgdat(page), idx, val);
+}
+
static inline void mod_memcg_obj_state(void *p, int idx, int val)
{
}
@@ -1341,9 +1489,6 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
}
#endif
-struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
-void memcg_kmem_put_cache(struct kmem_cache *cachep);
-
#ifdef CONFIG_MEMCG_KMEM
int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
unsigned int nr_pages);
@@ -1351,8 +1496,12 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);
+struct obj_cgroup *get_obj_cgroup_from_current(void);
+
+int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
+void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
+
extern struct static_key_false memcg_kmem_enabled_key;
-extern struct workqueue_struct *memcg_kmem_cache_wq;
extern int memcg_nr_cache_ids;
void memcg_get_cache_ids(void);
@@ -1368,7 +1517,19 @@ void memcg_put_cache_ids(void);
static inline bool memcg_kmem_enabled(void)
{
- return static_branch_unlikely(&memcg_kmem_enabled_key);
+ return static_branch_likely(&memcg_kmem_enabled_key);
+}
+
+static inline bool memcg_kmem_bypass(void)
+{
+ if (in_interrupt())
+ return true;
+
+ /* Allow remote memcg charging in kthread contexts. */
+ if ((!current->mm || (current->flags & PF_KTHREAD)) &&
+ !current->active_memcg)
+ return true;
+ return false;
}
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6c8333d6c991..f6a82f9bccd7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -206,6 +206,8 @@ int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
loff_t *);
+int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
+ loff_t *);
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
@@ -777,6 +779,11 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
extern void kvfree(const void *addr);
extern void kvfree_sensitive(const void *addr, size_t len);
+static inline int head_mapcount(struct page *head)
+{
+ return atomic_read(compound_mapcount_ptr(head)) + 1;
+}
+
/*
* Mapcount of compound page as a whole, does not include mapped sub-pages.
*
@@ -786,7 +793,7 @@ static inline int compound_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
page = compound_head(page);
- return atomic_read(compound_mapcount_ptr(page)) + 1;
+ return head_mapcount(page);
}
/*
@@ -899,11 +906,16 @@ static inline bool hpage_pincount_available(struct page *page)
return PageCompound(page) && compound_order(page) > 1;
}
+static inline int head_pincount(struct page *head)
+{
+ return atomic_read(compound_pincount_ptr(head));
+}
+
static inline int compound_pincount(struct page *page)
{
VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
page = compound_head(page);
- return atomic_read(compound_pincount_ptr(page));
+ return head_pincount(page);
}
static inline void set_compound_order(struct page *page, unsigned int order)
@@ -2091,51 +2103,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
NULL : pud_offset(p4d, address);
}
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
- unsigned long address,
- pgtbl_mod_mask *mod_mask)
-
-{
- if (unlikely(pgd_none(*pgd))) {
- if (__p4d_alloc(mm, pgd, address))
- return NULL;
- *mod_mask |= PGTBL_PGD_MODIFIED;
- }
-
- return p4d_offset(pgd, address);
-}
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
- unsigned long address,
- pgtbl_mod_mask *mod_mask)
-{
- if (unlikely(p4d_none(*p4d))) {
- if (__pud_alloc(mm, p4d, address))
- return NULL;
- *mod_mask |= PGTBL_P4D_MODIFIED;
- }
-
- return pud_offset(p4d, address);
-}
-
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
-
-static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
- unsigned long address,
- pgtbl_mod_mask *mod_mask)
-{
- if (unlikely(pud_none(*pud))) {
- if (__pmd_alloc(mm, pud, address))
- return NULL;
- *mod_mask |= PGTBL_PUD_MODIFIED;
- }
-
- return pmd_offset(pud, address);
-}
#endif /* CONFIG_MMU */
#if USE_SPLIT_PTE_PTLOCKS
@@ -2251,11 +2223,6 @@ static inline void pgtable_pte_page_dtor(struct page *page)
((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
-#define pte_alloc_kernel_track(pmd, address, mask) \
- ((unlikely(pmd_none(*(pmd))) && \
- (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
- NULL: pte_offset_kernel(pmd, address))
-
#if USE_SPLIT_PMD_PTLOCKS
static struct page *pmd_to_page(pmd_t *pmd)
@@ -2413,9 +2380,6 @@ static inline unsigned long get_num_physpages(void)
* for_each_valid_physical_page_range()
* memblock_add_node(base, size, nid)
* free_area_init(max_zone_pfns);
- *
- * sparse_memory_present_with_active_regions() calls memory_present() for
- * each range when SPARSEMEM is enabled.
*/
void free_area_init(unsigned long *max_zone_pfn);
unsigned long node_map_pfn_alignment(void);
@@ -2426,7 +2390,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
extern unsigned long find_min_pfn_with_active_regions(void);
-extern void sparse_memory_present_with_active_regions(int nid);
#ifndef CONFIG_NEED_MULTIPLE_NODES
static inline int early_pfn_to_nid(unsigned long pfn)
@@ -2577,23 +2540,13 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
struct list_head *uf);
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
- struct list_head *uf);
+ unsigned long pgoff, unsigned long *populate, struct list_head *uf);
extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(unsigned long start, size_t len_in, int behavior);
-static inline unsigned long
-do_mmap_pgoff(struct file *file, unsigned long addr,
- unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate,
- struct list_head *uf)
-{
- return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf);
-}
-
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
@@ -3009,14 +2962,15 @@ pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
-pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
+pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
+ struct vmem_altmap *altmap);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
-void *vmemmap_alloc_block_buf(unsigned long size, int node);
-void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
+void *vmemmap_alloc_block_buf(unsigned long size, int node,
+ struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
- int node);
+ int node, struct vmem_altmap *altmap);
int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 64ede5f150dc..0277fbab7c93 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -198,7 +198,10 @@ struct page {
atomic_t _refcount;