From cfd9d70a855edf6adb37d0ed88be9e35274dbe49 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Nov 2014 19:27:24 +0530 Subject: ARCv2: mm: TLB Miss optim: SMP builds can cache pgd pointer in mmu scratch reg ARC700 exception (and intr handling) didn't have auto stack switching thus had to rely on stashing a reg temporarily (to free it up) at a known place in memory, allowing to code up the low level stack switching. This however was not re-entrant in SMP which thus had to repurpose the per-cpu MMU SCRATCH DATA register otherwise used to "cache" the task pdg pointer (vs. reading it from mm struct) The newer HS cores do have auto-stack switching and thus even SMP builds can use the MMU SCRATCH reg as originally intended. This patch fixes the restriction to ARC700 SMP builds only Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 2 +- arch/arc/mm/tlbex.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 10025e199353..417f05ac4397 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -868,7 +868,7 @@ void arc_mmu_init(void) write_aux_reg(ARC_REG_PID, MMU_ENABLE); /* In smp we use this reg for interrupt 1 scratch */ -#ifndef CONFIG_SMP +#ifdef ARC_USE_SCRATCH_REG /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); #endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index c55d95dd2f39..d6fbdeda400a 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -193,7 +193,7 @@ ex_saved_reg1: lr r2, [efa] -#ifndef CONFIG_SMP +#ifdef ARC_USE_SCRATCH_REG lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd #else GET_CURR_TASK_ON_CPU r1 -- cgit v1.2.3 From 0fb1f35ed9cc2115a88cc73a02e56d288bf2aa8f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Feb 2015 18:37:43 +0530 Subject: ARCv2: mm: TLB Miss optim: Use double world load/stores LDD/STD Signed-off-by: Vineet Gupta --- arch/arc/mm/tlbex.S | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index d6fbdeda400a..110c72536e8b 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -122,17 +122,27 @@ ex_saved_reg1: #else /* ARCv2 */ .macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_ARC_HAS_LL64 + std r0, [sp, -16] + std r2, [sp, -8] +#else PUSH r0 PUSH r1 PUSH r2 PUSH r3 +#endif .endm .macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_ARC_HAS_LL64 + ldd r0, [sp, -16] + ldd r2, [sp, -8] +#else POP r3 POP r2 POP r1 POP r0 +#endif .endm #endif -- cgit v1.2.3 From f4e2f7cc6999943e0a649cbc4618428181aad58f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 29 Oct 2015 15:47:57 +0530 Subject: ARC: mm: TLB Miss optim: avoid re-reading ECR For setting PTE Dirty bit, reuse the prior test for ST miss. No need to reload ECR and test for ST cause code as the prev condition code is still valid (uncloberred) Signed-off-by: Vineet Gupta --- arch/arc/mm/tlbex.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 110c72536e8b..4c88148d4cd1 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -380,9 +380,7 @@ ENTRY(EV_TLBMissD) ;---------------------------------------------------------------- ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty - lr r3, [ecr] or r0, r0, _PAGE_ACCESSED ; Accessed bit always - btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE -- cgit v1.2.3 From ad4c40e937f6d6a08a579c4a78206039618426b7 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 17 Nov 2015 10:10:29 +0530 Subject: ARC: mm: tlb flush optim: Make TLBWriteNI fallback to TLBWrite if not available TLBWriteNI was introduced in MMUv2 (to not invalidate uTLBs in Fast Path TLB Refill Handler). To avoid #ifdef'ery make it fallback to TLBWrite availabel on all MMUs. This will also help with next change Signed-off-by: Vineet Gupta --- arch/arc/mm/tlbex.S | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 4c88148d4cd1..2efaf6ca0c06 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -292,11 +292,7 @@ ex_saved_reg1: sr TLBGetIndex, [ARC_REG_TLBCOMMAND] /* Commit the Write */ -#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ sr TLBWriteNI, [ARC_REG_TLBCOMMAND] -#else - sr TLBWrite, [ARC_REG_TLBCOMMAND] -#endif #else sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] -- cgit v1.2.3 From 1355ea2e603d76af6b1381873e37b1aec22a18a0 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 17 Oct 2015 16:54:14 +0530 Subject: ARC: mm: tlb flush optim: elide repeated uTLB invalidate in loop The unconditional full TLB flush (on say ASID rollover) iterates over each entry and uses TLBWrite to zero it out. TLBWrite by design also invalidates the uTLBs thus we end up invalidating it as many times as numbe rof entries (512 or 1k) Optimize this by using a weaker TLBWriteNI cmd in loop, which doesn't tinker with uTLBs and an explicit one time IVUTLB, outside the loop to invalidate them all once. And given the optimiztion, the IVUTLB is now needed on MMUv4 too where the uTLBs and JTLBs are otherwise coherent given the TLBInsertEntry / TLBDeleteEntry commands Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 74 ++++++++++++++++++++++--------------------------------- 1 file changed, 29 insertions(+), 45 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 417f05ac4397..210d807983dd 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -118,6 +118,33 @@ static inline void __tlb_entry_erase(void) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +static void utlb_invalidate(void) +{ +#if (CONFIG_ARC_MMU_VER >= 2) + +#if (CONFIG_ARC_MMU_VER == 2) + /* MMU v2 introduced the uTLB Flush command. + * There was however an obscure hardware bug, where uTLB flush would + * fail when a prior probe for J-TLB (both totally unrelated) would + * return lkup err - because the entry didn't exist in MMU. + * The Workround was to set Index reg with some valid value, prior to + * flush. This was fixed in MMU v3 + */ + unsigned int idx; + + /* make sure INDEX Reg is valid */ + idx = read_aux_reg(ARC_REG_TLBINDEX); + + /* If not write some dummy val */ + if (unlikely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBINDEX, 0xa); +#endif + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); +#endif + +} + #if (CONFIG_ARC_MMU_VER < 4) static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) @@ -149,44 +176,6 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) } } -/**************************************************************************** - * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs) - * - * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB - * - * utlb_invalidate ( ) - * -For v2 MMU calls Flush uTLB Cmd - * -For v1 MMU does nothing (except for Metal Fix v1 MMU) - * This is because in v1 TLBWrite itself invalidate uTLBs - ***************************************************************************/ - -static void utlb_invalidate(void) -{ -#if (CONFIG_ARC_MMU_VER >= 2) - -#if (CONFIG_ARC_MMU_VER == 2) - /* MMU v2 introduced the uTLB Flush command. - * There was however an obscure hardware bug, where uTLB flush would - * fail when a prior probe for J-TLB (both totally unrelated) would - * return lkup err - because the entry didn't exist in MMU. - * The Workround was to set Index reg with some valid value, prior to - * flush. This was fixed in MMU v3 hence not needed any more - */ - unsigned int idx; - - /* make sure INDEX Reg is valid */ - idx = read_aux_reg(ARC_REG_TLBINDEX); - - /* If not write some dummy val */ - if (unlikely(idx & TLB_LKUP_ERR)) - write_aux_reg(ARC_REG_TLBINDEX, 0xa); -#endif - - write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); -#endif - -} - static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { unsigned int idx; @@ -219,11 +208,6 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1) #else /* CONFIG_ARC_MMU_VER >= 4) */ -static void utlb_invalidate(void) -{ - /* No need since uTLB is always in sync with JTLB */ -} - static void tlb_entry_erase(unsigned int vaddr_n_asid) { write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); @@ -267,7 +251,7 @@ noinline void local_flush_tlb_all(void) for (entry = 0; entry < num_tlb; entry++) { /* write this entry to the TLB */ write_aux_reg(ARC_REG_TLBINDEX, entry); - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); } if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { @@ -278,7 +262,7 @@ noinline void local_flush_tlb_all(void) for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { write_aux_reg(ARC_REG_TLBINDEX, entry); - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); } } -- cgit v1.2.3 From 2f4ecf68a048de44d72157d637bf9cbbbdb357b0 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 10 Sep 2019 15:38:10 -0700 Subject: ARC: mm: tlb flush optim: elide redundant uTLB invalidates for MMUv3 For MMUv3 (and prior) the flush_tlb_{range,mm,page} API use the MMU TLBWrite cmd which already nukes the entire uTLB, so NO need for additional IVUTLB cmd from utlb_invalidate() - hence this patch local_flush_tlb_all() is special since it uses a weaker TLBWriteNI cmd (prec commit) to shoot down JTLB, hence we retain the explicit uTLB flush Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/arc/mm') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 210d807983dd..c340acd989a0 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -339,8 +339,6 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } } - utlb_invalidate(); - local_irq_restore(flags); } @@ -369,8 +367,6 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) start += PAGE_SIZE; } - utlb_invalidate(); - local_irq_restore(flags); } @@ -391,7 +387,6 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); - utlb_invalidate(); } local_irq_restore(flags); -- cgit v1.2.3