From 3cd14fcd3f128d5eba8575491cb4e1999ee1bad2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Sep 2013 15:13:58 -0700 Subject: thp: account anon transparent huge pages into NR_ANON_PAGES We use NR_ANON_PAGES as base for reporting AnonPages to user. There's not much sense in not accounting transparent huge pages there, but add them on printing to user. Let's account transparent huge pages in NR_ANON_PAGES in the first place. Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc: Andrea Arcangeli Cc: Al Viro Cc: Hugh Dickins Cc: Wu Fengguang Cc: Jan Kara Cc: Mel Gorman Cc: Andi Kleen Cc: Matthew Wilcox Cc: Hillf Danton Cc: Ning Qu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/huge_memory.c') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 963e14c0486f..7cef0c098630 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1661,7 +1661,6 @@ static void __split_huge_page_refcount(struct page *page, BUG_ON(atomic_read(&page->_count) <= 0); __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); - __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); ClearPageCompound(page); compound_unlock(page); -- cgit v1.2.3 From 3122359a64829afd231bad6ed899b557f46280e9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Sep 2013 15:14:01 -0700 Subject: thp: move maybe_pmd_mkwrite() out of mk_huge_pmd() It's confusing that mk_huge_pmd() has semantics different from mk_pte() or mk_pmd(). I spent some time on debugging issue cased by this inconsistency. Let's move maybe_pmd_mkwrite() out of mk_huge_pmd() and adjust prototype to match mk_pte(). Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc: Andrea Arcangeli Cc: Al Viro Cc: Hugh Dickins Cc: Wu Fengguang Cc: Jan Kara Cc: Mel Gorman Cc: Andi Kleen Cc: Matthew Wilcox Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'mm/huge_memory.c') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7cef0c098630..60836870c6f7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -690,11 +690,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) +static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) { pmd_t entry; - entry = mk_pmd(page, vma->vm_page_prot); - entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + entry = mk_pmd(page, prot); entry = pmd_mkhuge(entry); return entry; } @@ -727,7 +726,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pte_free(mm, pgtable); } else { pmd_t entry; - entry = mk_huge_pmd(page, vma); + entry = mk_huge_pmd(page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); @@ -1210,7 +1210,8 @@ alloc: goto out_mn; } else { pmd_t entry; - entry = mk_huge_pmd(new_page, vma); + entry = mk_huge_pmd(new_page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); @@ -2358,7 +2359,8 @@ static void collapse_huge_page(struct mm_struct *mm, __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); - _pmd = mk_huge_pmd(new_page, vma); + _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); + _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); /* * spin_lock() below is not the equivalent of smp_wmb(), so -- cgit v1.2.3 From 128ec037bafe5905b2e6f2796f426a1d247d0066 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Sep 2013 15:14:03 -0700 Subject: thp: do_huge_pmd_anonymous_page() cleanup Minor cleanup: unindent most code of the fucntion by inverting one condition. It's preparation for the next patch. No functional changes. Signed-off-by: Kirill A. Shutemov Acked-by: Hillf Danton Cc: Andrea Arcangeli Cc: Al Viro Cc: Hugh Dickins Cc: Wu Fengguang Cc: Jan Kara Cc: Mel Gorman Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 83 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 42 deletions(-) (limited to 'mm/huge_memory.c') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 60836870c6f7..6551dd06dd64 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -785,55 +785,54 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr = address & HPAGE_PMD_MASK; pte_t *pte; - if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; - if (unlikely(khugepaged_enter(vma))) + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) + goto out; + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + if (unlikely(khugepaged_enter(vma))) + return VM_FAULT_OOM; + if (!(flags & FAULT_FLAG_WRITE) && + transparent_hugepage_use_zero_page()) { + pgtable_t pgtable; + struct page *zero_page; + bool set; + pgtable = pte_alloc_one(mm, haddr); + if (unlikely(!pgtable)) return VM_FAULT_OOM; - if (!(flags & FAULT_FLAG_WRITE) && - transparent_hugepage_use_zero_page()) { - pgtable_t pgtable; - struct page *zero_page; - bool set; - pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) - return VM_FAULT_OOM; - zero_page = get_huge_zero_page(); - if (unlikely(!zero_page)) { - pte_free(mm, pgtable); - count_vm_event(THP_FAULT_FALLBACK); - goto out; - } - spin_lock(&mm->page_table_lock); - set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_page); - spin_unlock(&mm->page_table_lock); - if (!set) { - pte_free(mm, pgtable); - put_huge_zero_page(); - } - return 0; - } - page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); - if (unlikely(!page)) { + zero_page = get_huge_zero_page(); + if (unlikely(!zero_page)) { + pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); goto out; } - count_vm_event(THP_FAULT_ALLOC); - if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { - put_page(page); - goto out; - } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, - page))) { - mem_cgroup_uncharge_page(page); - put_page(page); - goto out; + spin_lock(&mm->page_table_lock); + set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, + zero_page); + spin_unlock(&mm->page_table_lock); + if (!set) { + pte_free(mm, pgtable); + put_huge_zero_page(); } - return 0; } + page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), + vma, haddr, numa_node_id(), 0); + if (unlikely(!page)) { + count_vm_event(THP_FAULT_FALLBACK); + goto out; + } + count_vm_event(THP_FAULT_ALLOC); + if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { + put_page(page); + goto out; + } + if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { + mem_cgroup_uncharge_page(page); + put_page(page); + goto out; + } + + return 0; out: /* * Use __pte_alloc instead of pte_alloc_map, because we can't -- cgit v1.2.3 From c02925540ca7019465a43c00f8a3c0186ddace2b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Sep 2013 15:14:05 -0700 Subject: thp: consolidate code between handle_mm_fault() and do_huge_pmd_anonymous_page() do_huge_pmd_anonymous_page() has copy-pasted piece of handle_mm_fault() to handle fallback path. Let's consolidate code back by introducing VM_FAULT_FALLBACK return code. Signed-off-by: Kirill A. Shutemov Acked-by: Hillf Danton Cc: Andrea Arcangeli Cc: Al Viro Cc: Hugh Dickins Cc: Wu Fengguang Cc: Jan Kara Cc: Mel Gorman Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) (limited to 'mm/huge_memory.c') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6551dd06dd64..243f4cc75777 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -783,10 +783,9 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; - pte_t *pte; if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) - goto out; + return VM_FAULT_FALLBACK; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; if (unlikely(khugepaged_enter(vma))) @@ -803,7 +802,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!zero_page)) { pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); - goto out; + return VM_FAULT_FALLBACK; } spin_lock(&mm->page_table_lock); set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, @@ -819,40 +818,20 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, vma, haddr, numa_node_id(), 0); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); - goto out; + return VM_FAULT_FALLBACK; } count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { put_page(page); - goto out; + return VM_FAULT_FALLBACK; } if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { mem_cgroup_uncharge_page(page); put_page(page); - goto out; + return VM_FAULT_FALLBACK; } return 0; -out: - /* - * Use __pte_alloc instead of pte_alloc_map, because we can't - * run pte_offset_map on the pmd, if an huge pmd could - * materialize from under us from a different thread. - */ - if (unlikely(pmd_none(*pmd)) && - unlikely(__pte_alloc(mm, vma, pmd, address))) - return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) - return 0; - /* - * A regular pmd is established and it can't morph into a huge pmd - * from under us anymore at this point because we hold the mmap_sem - * read mode and khugepaged takes it in write mode. So now it's - * safe to run pte_offset_map(). - */ - pte = pte_offset_map(pmd, address); - return handle_pte_fault(mm, vma, address, pte, pmd, flags); } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, -- cgit v1.2.3 From 17766dde364813568e4f876517c72bab70838646 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 12 Sep 2013 15:14:06 -0700 Subject: mm, thp: count thp_fault_fallback anytime thp fault fails Currently, thp_fault_fallback in vmstat only gets incremented if a hugepage allocation fails. If current's memcg hits its limit or the page fault handler returns an error, it is incorrectly accounted as a successful thp_fault_alloc. Count thp_fault_fallback anytime the page fault handler falls back to using regular pages and only count thp_fault_alloc when a hugepage has actually been faulted. Signed-off-by: David Rientjes Cc: Mel Gorman Cc: Andrea Arcangeli Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'mm/huge_memory.c') diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 243f4cc75777..f60c4ebaa30c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -820,17 +820,19 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { put_page(page); + count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { mem_cgroup_uncharge_page(page); put_page(page); + count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } + count_vm_event(THP_FAULT_ALLOC); return 0; } @@ -1143,7 +1145,6 @@ alloc: new_page = NULL; if (unlikely(!new_page)) { - count_vm_event(THP_FAULT_FALLBACK); if (is_huge_zero_pmd(orig_pmd)) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, address, pmd, orig_pmd, haddr); @@ -1154,9 +1155,9 @@ alloc: split_huge_page(page); put_page(page); } + count_vm_event(THP_FAULT_FALLBACK); goto out; } - count_vm_event(THP_FAULT_ALLOC); if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); @@ -1164,10 +1165,13 @@ alloc: split_huge_page(page); put_page(page); } + count_vm_event(THP_FAULT_FALLBACK); ret |= VM_FAULT_OOM; goto out; } + count_vm_event(THP_FAULT_ALLOC); + if (is_huge_zero_pmd(orig_pmd)) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else -- cgit v1.2.3