From 4363287178a85e41cd59f9f1d423fbe1f9048ec8 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 19 Aug 2020 17:10:11 +0300 Subject: riscv/mm: Simplify retry logic in do_page_fault() Let's combine the two retry logic if statements in do_page_fault() to simplify the code. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 716d64e36f83..f5c2e4a249eb 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -127,17 +127,15 @@ good_area: BUG(); } - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_RETRY) { - flags |= FAULT_FLAG_TRIED; - - /* - * No need to mmap_read_unlock(mm) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ - goto retry; - } + if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { + flags |= FAULT_FLAG_TRIED; + + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; } mmap_read_unlock(mm); -- cgit v1.2.3 From cac4d1dc85be2996ea19aea4f6ac6525f4c97171 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 18:38:58 +0300 Subject: riscv/mm/fault: Move no context handling to no_context() This patch moves the no context handling in do_page_fault() to no_context() function and converts gotos to calls to the new function. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 83 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 31 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index f5c2e4a249eb..1612552478c5 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -19,6 +19,24 @@ #include "../kernel/head.h" +static inline void no_context(struct pt_regs *regs, unsigned long addr) +{ + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + die(regs, "Oops"); + do_exit(SIGKILL); +} + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -59,8 +77,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ - if (unlikely(faulthandler_disabled() || !mm)) - goto no_context; + if (unlikely(faulthandler_disabled() || !mm)) { + no_context(regs, addr); + return; + } if (user_mode(regs)) flags |= FAULT_FLAG_USER; @@ -153,21 +173,8 @@ bad_area: return; } -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); - die(regs, "Oops"); - do_exit(SIGKILL); + no_context(regs, addr); + return; /* * We ran out of memory, call the OOM killer, and return the userspace @@ -175,16 +182,20 @@ no_context: */ out_of_memory: mmap_read_unlock(mm); - if (!user_mode(regs)) - goto no_context; + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } pagefault_out_of_memory(); return; do_sigbus: mmap_read_unlock(mm); /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - goto no_context; + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; @@ -213,19 +224,25 @@ vmalloc_fault: pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; pgd_k = init_mm.pgd + index; - if (!pgd_present(*pgd_k)) - goto no_context; + if (!pgd_present(*pgd_k)) { + no_context(regs, addr); + return; + } set_pgd(pgd, *pgd_k); p4d = p4d_offset(pgd, addr); p4d_k = p4d_offset(pgd_k, addr); - if (!p4d_present(*p4d_k)) - goto no_context; + if (!p4d_present(*p4d_k)) { + no_context(regs, addr); + return; + } pud = pud_offset(p4d, addr); pud_k = pud_offset(p4d_k, addr); - if (!pud_present(*pud_k)) - goto no_context; + if (!pud_present(*pud_k)) { + no_context(regs, addr); + return; + } /* * Since the vmalloc area is global, it is unnecessary @@ -233,8 +250,10 @@ vmalloc_fault: */ pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); - if (!pmd_present(*pmd_k)) - goto no_context; + if (!pmd_present(*pmd_k)) { + no_context(regs, addr); + return; + } set_pmd(pmd, *pmd_k); /* @@ -244,8 +263,10 @@ vmalloc_fault: * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); - if (!pte_present(*pte_k)) - goto no_context; + if (!pte_present(*pte_k)) { + no_context(regs, addr); + return; + } /* * The kernel assumes that TLBs don't cache invalid -- cgit v1.2.3 From a51271d99cdd04910227060936d0598ba49fb1cc Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 18:48:01 +0300 Subject: riscv/mm/fault: Move bad area handling to bad_area() This patch moves the bad area handling in do_page_fault() to bad_area() function and converts gotos to calls to the new function. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 67 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 1612552478c5..ac9a99255365 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -37,6 +37,22 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) do_exit(SIGKILL); } +static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) +{ + /* + * Something tried to access memory that isn't in our memory map. + * Fix it, but check if it's kernel or user first. + */ + mmap_read_unlock(mm); + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + do_trap(regs, SIGSEGV, code, addr); + return; + } + + no_context(regs, addr); +} + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -90,14 +106,20 @@ asmlinkage void do_page_fault(struct pt_regs *regs) retry: mmap_read_lock(mm); vma = find_vma(mm, addr); - if (unlikely(!vma)) - goto bad_area; + if (unlikely(!vma)) { + bad_area(regs, mm, code, addr); + return; + } if (likely(vma->vm_start <= addr)) goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) - goto bad_area; - if (unlikely(expand_stack(vma, addr))) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, mm, code, addr); + return; + } + if (unlikely(expand_stack(vma, addr))) { + bad_area(regs, mm, code, addr); + return; + } /* * Ok, we have a good vm_area for this memory access, so @@ -108,16 +130,22 @@ good_area: switch (cause) { case EXC_INST_PAGE_FAULT: - if (!(vma->vm_flags & VM_EXEC)) - goto bad_area; + if (!(vma->vm_flags & VM_EXEC)) { + bad_area(regs, mm, code, addr); + return; + } break; case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) - goto bad_area; + if (!(vma->vm_flags & VM_READ)) { + bad_area(regs, mm, code, addr); + return; + } break; case EXC_STORE_PAGE_FAULT: - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; + if (!(vma->vm_flags & VM_WRITE)) { + bad_area(regs, mm, code, addr); + return; + } flags |= FAULT_FLAG_WRITE; break; default: @@ -161,21 +189,6 @@ good_area: mmap_read_unlock(mm); return; - /* - * Something tried to access memory that isn't in our memory map. - * Fix it, but check if it's kernel or user first. - */ -bad_area: - mmap_read_unlock(mm); - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) { - do_trap(regs, SIGSEGV, code, addr); - return; - } - - no_context(regs, addr); - return; - /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). -- cgit v1.2.3 From ac416a724f113207407848ef58a0270cd03d94d1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 18:54:26 +0300 Subject: riscv/mm/fault: Move vmalloc fault handling to vmalloc_fault() This patch moves the vmalloc fault handling in do_page_fault() to vmalloc_fault() function and converts gotos to calls to the new function. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 164 +++++++++++++++++++++++++------------------------- 1 file changed, 82 insertions(+), 82 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index ac9a99255365..460ea1d6c24e 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -53,6 +53,84 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code no_context(regs, addr); } +static void inline vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) +{ + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + p4d_t *p4d, *p4d_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + int index; + + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) + return do_trap(regs, SIGSEGV, code, addr); + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk->active_mm->pgd" here. + * We might be inside an interrupt in the middle + * of a task switch. + */ + index = pgd_index(addr); + pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) { + no_context(regs, addr); + return; + } + set_pgd(pgd, *pgd_k); + + p4d = p4d_offset(pgd, addr); + p4d_k = p4d_offset(pgd_k, addr); + if (!p4d_present(*p4d_k)) { + no_context(regs, addr); + return; + } + + pud = pud_offset(p4d, addr); + pud_k = pud_offset(p4d_k, addr); + if (!pud_present(*pud_k)) { + no_context(regs, addr); + return; + } + + /* + * Since the vmalloc area is global, it is unnecessary + * to copy individual PTEs + */ + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + if (!pmd_present(*pmd_k)) { + no_context(regs, addr); + return; + } + set_pmd(pmd, *pmd_k); + + /* + * Make sure the actual PTE exists as well to + * catch kernel vmalloc-area accesses to non-mapped + * addresses. If we don't do this, this will just + * silently loop forever. + */ + pte_k = pte_offset_kernel(pmd_k, addr); + if (!pte_present(*pte_k)) { + no_context(regs, addr); + return; + } + + /* + * The kernel assumes that TLBs don't cache invalid + * entries, but in RISC-V, SFENCE.VMA specifies an + * ordering constraint, not a cache flush; it is + * necessary even after writing invalid entries. + */ + local_flush_tlb_page(addr); +} + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -82,8 +160,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) - goto vmalloc_fault; + if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) { + vmalloc_fault(regs, code, addr); + return; + } /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->status & SR_PIE)) @@ -211,84 +291,4 @@ do_sigbus: } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; - -vmalloc_fault: - { - pgd_t *pgd, *pgd_k; - pud_t *pud, *pud_k; - p4d_t *p4d, *p4d_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - int index; - - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) - return do_trap(regs, SIGSEGV, code, addr); - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk->active_mm->pgd" here. - * We might be inside an interrupt in the middle - * of a task switch. - */ - index = pgd_index(addr); - pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) { - no_context(regs, addr); - return; - } - set_pgd(pgd, *pgd_k); - - p4d = p4d_offset(pgd, addr); - p4d_k = p4d_offset(pgd_k, addr); - if (!p4d_present(*p4d_k)) { - no_context(regs, addr); - return; - } - - pud = pud_offset(p4d, addr); - pud_k = pud_offset(p4d_k, addr); - if (!pud_present(*pud_k)) { - no_context(regs, addr); - return; - } - - /* - * Since the vmalloc area is global, it is unnecessary - * to copy individual PTEs - */ - pmd = pmd_offset(pud, addr); - pmd_k = pmd_offset(pud_k, addr); - if (!pmd_present(*pmd_k)) { - no_context(regs, addr); - return; - } - set_pmd(pmd, *pmd_k); - - /* - * Make sure the actual PTE exists as well to - * catch kernel vmalloc-area accesses to non-mapped - * addresses. If we don't do this, this will just - * silently loop forever. - */ - pte_k = pte_offset_kernel(pmd_k, addr); - if (!pte_present(*pte_k)) { - no_context(regs, addr); - return; - } - - /* - * The kernel assumes that TLBs don't cache invalid - * entries, but in RISC-V, SFENCE.VMA specifies an - * ordering constraint, not a cache flush; it is - * necessary even after writing invalid entries. - */ - local_flush_tlb_page(addr); - - return; - } } -- cgit v1.2.3 From bda281d5bfb70f895880ebfb94a7f20d0604437f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 19:01:43 +0300 Subject: riscv/mm/fault: Simplify fault error handling Move fault error handling after retry logic. This simplifies the code flow and makes it easier to move fault error handling to its own function. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 460ea1d6c24e..bfb40927cb7a 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -247,14 +247,6 @@ good_area: if (fault_signal_pending(fault, regs)) return; - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { flags |= FAULT_FLAG_TRIED; @@ -267,6 +259,14 @@ good_area: } mmap_read_unlock(mm); + + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } return; /* @@ -274,7 +274,6 @@ good_area: * (which will retry the fault, or kill us if we got oom-killed). */ out_of_memory: - mmap_read_unlock(mm); if (!user_mode(regs)) { no_context(regs, addr); return; @@ -283,7 +282,6 @@ out_of_memory: return; do_sigbus: - mmap_read_unlock(mm); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { no_context(regs, addr); -- cgit v1.2.3 From 6c11ffbfd849830e8cede5fb0699828e74a7d26b Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 19:04:09 +0300 Subject: riscv/mm/fault: Move fault error handling to mm_fault_error() This patch moves the fault error handling to mm_fault_error() function and converts gotos to calls to the new function. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 56 ++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index bfb40927cb7a..49b190d0c088 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -37,6 +37,36 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) do_exit(SIGKILL); } +static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) +{ + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ +out_of_memory: + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + pagefault_out_of_memory(); + return; + +do_sigbus: + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + do_trap(regs, SIGBUS, BUS_ADRERR, addr); + return; +} + static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) { /* @@ -261,32 +291,8 @@ good_area: mmap_read_unlock(mm); if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - return; - - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ -out_of_memory: - if (!user_mode(regs)) { - no_context(regs, addr); + mm_fault_error(regs, addr, fault); return; } - pagefault_out_of_memory(); - return; - -do_sigbus: - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } - do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; } -- cgit v1.2.3 From 7a75f3d47a0b1be6eeb67d14e4003b2b91f8aa59 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 19:05:44 +0300 Subject: riscv/mm/fault: Simplify mm_fault_error() Simplify the mm_fault_error() handling function by eliminating the unnecessary gotos. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 49b190d0c088..3b430fb18de3 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -39,32 +39,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ -out_of_memory: - if (!user_mode(regs)) { - no_context(regs, addr); + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + pagefault_out_of_memory(); return; - } - pagefault_out_of_memory(); - return; - -do_sigbus: - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); + } else if (fault & VM_FAULT_SIGBUS) { + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; } - do_trap(regs, SIGBUS, BUS_ADRERR, addr); - return; + BUG(); } static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) -- cgit v1.2.3 From 6747430197ed414be37e843064a7f365f4d1fd57 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 19:42:54 +0300 Subject: riscv/mm/fault: Move FAULT_FLAG_WRITE handling in do_page_fault() Let's handle the translation of EXC_STORE_PAGE_FAULT to FAULT_FLAG_WRITE once before looking up the VMA. This makes it easier to extract access error logic in the next patch. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3b430fb18de3..bdc70d3d507f 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -208,6 +208,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (cause == EXC_STORE_PAGE_FAULT) + flags |= FAULT_FLAG_WRITE; + retry: mmap_read_lock(mm); vma = find_vma(mm, addr); @@ -251,7 +254,6 @@ good_area: bad_area(regs, mm, code, addr); return; } - flags |= FAULT_FLAG_WRITE; break; default: panic("%s: unhandled cause %lu", __func__, cause); -- cgit v1.2.3 From afb8c6fee8ce9b54b9c810eea0597ef6a876abb7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 25 Aug 2020 19:41:17 +0300 Subject: riscv/mm/fault: Move access error check to function Move the access error check into a access_error() function to simplify the control flow in do_page_fault(). Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index bdc70d3d507f..a23eaf5ce95c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -156,6 +156,30 @@ static void inline vmalloc_fault(struct pt_regs *regs, int code, unsigned long a local_flush_tlb_page(addr); } +static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) +{ + switch (cause) { + case EXC_INST_PAGE_FAULT: + if (!(vma->vm_flags & VM_EXEC)) { + return true; + } + break; + case EXC_LOAD_PAGE_FAULT: + if (!(vma->vm_flags & VM_READ)) { + return true; + } + break; + case EXC_STORE_PAGE_FAULT: + if (!(vma->vm_flags & VM_WRITE)) { + return true; + } + break; + default: + panic("%s: unhandled cause %lu", __func__, cause); + } + return false; +} + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -236,27 +260,9 @@ retry: good_area: code = SEGV_ACCERR; - switch (cause) { - case EXC_INST_PAGE_FAULT: - if (!(vma->vm_flags & VM_EXEC)) { - bad_area(regs, mm, code, addr); - return; - } - break; - case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) { - bad_area(regs, mm, code, addr); - return; - } - break; - case EXC_STORE_PAGE_FAULT: - if (!(vma->vm_flags & VM_WRITE)) { - bad_area(regs, mm, code, addr); - return; - } - break; - default: - panic("%s: unhandled cause %lu", __func__, cause); + if (unlikely(access_error(cause, vma))) { + bad_area(regs, mm, code, addr); + return; } /* -- cgit v1.2.3 From baf7cbd94b5688f167443a2cc3dcea3300132099 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 31 Aug 2020 15:33:48 +0800 Subject: riscv: Set more data to cacheinfo Set cacheinfo.{size,sets,line_size} for each cache node, then we can get these information from userland through auxiliary vector. Signed-off-by: Zong Li Reviewed-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cacheinfo.c | 66 +++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 15 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index bd0f122965c3..291d7d8f748b 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -25,12 +25,53 @@ cache_get_priv_group(struct cacheinfo *this_leaf) return NULL; } -static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, - enum cache_type type, unsigned int level) +static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, unsigned int size, + unsigned int sets, unsigned int line_size) { this_leaf->level = level; this_leaf->type = type; + this_leaf->size = size; + this_leaf->number_of_sets = sets; + this_leaf->coherency_line_size = line_size; + + /* + * If the cache is fully associative, there is no need to + * check the other properties. + */ + if (sets == 1) + return; + + /* + * Set the ways number for n-ways associative, make sure + * all properties are big than zero. + */ + if (sets > 0 && size > 0 && line_size > 0) + this_leaf->ways_of_associativity = (size / sets) / line_size; +} + +static void fill_cacheinfo(struct cacheinfo **this_leaf, + struct device_node *node, unsigned int level) +{ + unsigned int size, sets, line_size; + + if (!of_property_read_u32(node, "cache-size", &size) && + !of_property_read_u32(node, "cache-block-size", &line_size) && + !of_property_read_u32(node, "cache-sets", &sets)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size); + } + + if (!of_property_read_u32(node, "i-cache-size", &size) && + !of_property_read_u32(node, "i-cache-sets", &sets) && + !of_property_read_u32(node, "i-cache-block-size", &line_size)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size); + } + + if (!of_property_read_u32(node, "d-cache-size", &size) && + !of_property_read_u32(node, "d-cache-sets", &sets) && + !of_property_read_u32(node, "d-cache-block-size", &line_size)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size); + } } static int __init_cache_level(unsigned int cpu) @@ -83,29 +124,24 @@ static int __populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + /* Level 1 caches in cpu node */ + fill_cacheinfo(&this_leaf, np, level); + /* Next level caches in cache nodes */ prev = np; while ((np = of_find_next_cache_node(np))) { of_node_put(prev); prev = np; + if (!of_device_is_compatible(np, "cache")) break; if (of_property_read_u32(np, "cache-level", &level)) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + + fill_cacheinfo(&this_leaf, np, level); + levels = level; } of_node_put(np); -- cgit v1.2.3 From b5fca7c55f9fbab5ad732c3bce00f31af6ba5cfa Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 31 Aug 2020 15:33:49 +0800 Subject: riscv: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO AT_VECTOR_SIZE_ARCH should be defined with the maximum number of NEW_AUX_ENT entries that ARCH_DLINFO can contain, but it wasn't defined for RISC-V at all even though ARCH_DLINFO will contain one NEW_AUX_ENT for the VDSO address. Signed-off-by: Zong Li Reviewed-by: Palmer Dabbelt Reviewed-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index d86cb17bbabe..22e0ae888406 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -10,4 +10,7 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 1 + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ -- cgit v1.2.3 From 38f5bd23deae24c8fa67a2c574b6d43df27a8aa8 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 31 Aug 2020 15:33:50 +0800 Subject: riscv: Add cache information in AUX vector There are no standard CSR registers to provide cache information, the way for RISC-V is to get this information from DT. Currently, AT_L1I_X, AT_L1D_X and AT_L2_X are present in glibc header, and sysconf syscall could use them to get information of cache through AUX vector. The result of 'getconf -a' as follows: LEVEL1_ICACHE_SIZE 32768 LEVEL1_ICACHE_ASSOC 8 LEVEL1_ICACHE_LINESIZE 64 LEVEL1_DCACHE_SIZE 32768 LEVEL1_DCACHE_ASSOC 8 LEVEL1_DCACHE_LINESIZE 64 LEVEL2_CACHE_SIZE 2097152 LEVEL2_CACHE_ASSOC 32 LEVEL2_CACHE_LINESIZE 64 Signed-off-by: Zong Li Reviewed-by: Palmer Dabbelt Reviewed-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheinfo.h | 5 +++++ arch/riscv/include/asm/elf.h | 13 +++++++++++++ arch/riscv/include/uapi/asm/auxvec.h | 23 ++++++++++++++++++++++- arch/riscv/kernel/cacheinfo.c | 32 +++++++++++++++++++++++++++++++- 4 files changed, 71 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/cacheinfo.h b/arch/riscv/include/asm/cacheinfo.h index 5d9662e9aba8..d1a365215ec0 100644 --- a/arch/riscv/include/asm/cacheinfo.h +++ b/arch/riscv/include/asm/cacheinfo.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 SiFive + */ #ifndef _ASM_RISCV_CACHEINFO_H #define _ASM_RISCV_CACHEINFO_H @@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops { }; void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops); +uintptr_t get_cache_size(u32 level, enum cache_type type); +uintptr_t get_cache_geometry(u32 level, enum cache_type type); #endif /* _ASM_RISCV_CACHEINFO_H */ diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index d83a4efd052b..5c725e1df58b 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * These are used to set parameters in the core dumps. @@ -61,6 +62,18 @@ extern unsigned long elf_hwcap; do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (elf_addr_t)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_L1I_CACHESIZE, \ + get_cache_size(1, CACHE_TYPE_INST)); \ + NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, \ + get_cache_geometry(1, CACHE_TYPE_INST)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, \ + get_cache_size(1, CACHE_TYPE_DATA)); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, \ + get_cache_geometry(1, CACHE_TYPE_DATA)); \ + NEW_AUX_ENT(AT_L2_CACHESIZE, \ + get_cache_size(2, CACHE_TYPE_UNIFIED)); \ + NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \ + get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 22e0ae888406..32c73ba1d531 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -10,7 +10,28 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* + * The set of entries below represent more extensive information + * about the caches, in the form of two entry per cache type, + * one entry containing the cache size in bytes, and the other + * containing the cache line size in bytes in the bottom 16 bits + * and the cache associativity in the next 16 bits. + * + * The associativity is such that if N is the 16-bit value, the + * cache is N way set associative. A value if 0xffff means fully + * associative, a value of 1 means directly mapped. + * + * For all these fields, a value of 0 means that the information + * is not known. + */ +#define AT_L1I_CACHESIZE 40 +#define AT_L1I_CACHEGEOMETRY 41 +#define AT_L1D_CACHESIZE 42 +#define AT_L1D_CACHEGEOMETRY 43 +#define AT_L2_CACHESIZE 44 +#define AT_L2_CACHEGEOMETRY 45 + /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 1 +#define AT_VECTOR_SIZE_ARCH 7 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 291d7d8f748b..de59dd457b41 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,7 +3,6 @@ * Copyright (C) 2017 SiFive */ -#include #include #include #include @@ -25,6 +24,37 @@ cache_get_priv_group(struct cacheinfo *this_leaf) return NULL; } +static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id()); + struct cacheinfo *this_leaf; + int index; + + for (index = 0; index < this_cpu_ci->num_leaves; index++) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level == level && this_leaf->type == type) + return this_leaf; + } + + return NULL; +} + +uintptr_t get_cache_size(u32 level, enum cache_type type) +{ + struct cacheinfo *this_leaf = get_cacheinfo(level, type); + + return this_leaf ? this_leaf->size : 0; +} + +uintptr_t get_cache_geometry(u32 level, enum cache_type type) +{ + struct cacheinfo *this_leaf = get_cacheinfo(level, type); + + return this_leaf ? (this_leaf->ways_of_associativity << 16 | + this_leaf->coherency_line_size) : + 0; +} + static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, unsigned int level, unsigned int size, unsigned int sets, unsigned int line_size) -- cgit v1.2.3 From 2baa6d9506f24d52f53317f60ccbcdbb2c4f4c40 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 5 Sep 2020 08:52:52 +0300 Subject: riscv/mm/fault: Fix inline placement in vmalloc_fault() declaration The "inline" keyword is in the wrong place in vmalloc_fault() declaration: >> arch/riscv/mm/fault.c:56:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] 56 | static void inline vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) | ^~~~~~ Fix that up. Reported-by: kernel test robot Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a23eaf5ce95c..a173432ccf82 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -78,7 +78,7 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code no_context(regs, addr); } -static void inline vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) +static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) { pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; -- cgit v1.2.3 From a960c1323749383e62b67f5d49cdfbdcccde0ef6 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 5 Sep 2020 09:07:26 +0300 Subject: riscv/mm/fault: Set FAULT_FLAG_INSTRUCTION flag in do_page_fault() If the page fault "cause" is EXC_INST_PAGE_FAULT, set the FAULT_FLAG_INSTRUCTION flag to let handle_mm_fault() and friends know about it. This has no functional changes because RISC-V uses the default arch_vma_access_permitted() implementation, which always returns true. However, dax_pmd_fault(), for example, has a tracepoint that uses FAULT_FLAG_INSTRUCTION, so we might as well set it. Signed-off-by: Pekka Enberg Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index a173432ccf82..1359e21c0c62 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -234,7 +234,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs) if (cause == EXC_STORE_PAGE_FAULT) flags |= FAULT_FLAG_WRITE; - + else if (cause == EXC_INST_PAGE_FAULT) + flags |= FAULT_FLAG_INSTRUCTION; retry: mmap_read_lock(mm); vma = find_vma(mm, addr); -- cgit v1.2.3 From 54701a0d12e2e2c9f0814572b42bdd3067ffcf15 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 14 Sep 2020 08:52:02 +0800 Subject: RISC-V: Fix duplicate included thread_info.h asm/thread_info.h is included more than once, Remove the one that isn't necessary. Signed-off-by: Tian Tao Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 0a4e81b8dc79..9ade707b8ed8 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -3,7 +3,6 @@ * Copyright (C) 2012 Regents of the University of California */ -#include #include #include #include -- cgit v1.2.3 From 8f3a2b4a96dc014e99e1df327db1450fdbbd5e15 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 17 Sep 2020 15:37:10 -0700 Subject: RISC-V: Move DT mapping outof fixmap Currently, RISC-V reserves 1MB of fixmap memory for device tree. However, it maps only single PMD (2MB) space for fixmap which leaves only < 1MB space left for other kernel features such as early ioremap which requires fixmap as well. The fixmap size can be increased by another 2MB but it brings additional complexity and changes the virtual memory layout as well. If we require some additional feature requiring fixmap again, it has to be moved again. Technically, DT doesn't need a fixmap as the memory occupied by the DT is only used during boot. That's why, We map device tree in early page table using two consecutive PGD mappings at lower addresses (< PAGE_OFFSET). This frees lot of space in fixmap and also makes maximum supported device tree size supported as PGDIR_SIZE. Thus, init memory section can be used for the same purpose as well. This simplifies fixmap implementation. Signed-off-by: Anup Patel Signed-off-by: Atish Patra Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/fixmap.h | 3 --- arch/riscv/include/asm/pgtable.h | 1 + arch/riscv/kernel/head.S | 1 - arch/riscv/kernel/head.h | 2 -- arch/riscv/kernel/setup.c | 9 +++++++-- arch/riscv/mm/init.c | 26 ++++++++++++-------------- 6 files changed, 20 insertions(+), 22 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 1ff075a8dfc7..11613f38228a 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -22,9 +22,6 @@ */ enum fixed_addresses { FIX_HOLE, -#define FIX_FDT_SIZE SZ_1M - FIX_FDT_END, - FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, FIX_PTE, FIX_PMD, FIX_TEXT_POKE1, diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index eaea1f717010..815f8c959dd4 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -464,6 +464,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl #define kern_addr_valid(addr) (1) /* FIXME */ extern void *dtb_early_va; +extern uintptr_t dtb_early_pa; void setup_bootmem(void); void paging_init(void); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 9ade707b8ed8..703e59d717f0 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -258,7 +258,6 @@ clear_bss_done: #endif /* Start the kernel */ call soc_early_init - call parse_dtb tail start_kernel .Lsecondary_start: diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index 105fb0496b24..b48dda3d04f6 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); extern void *__cpu_up_stack_pointer[]; extern void *__cpu_up_task_pointer[]; -void __init parse_dtb(void); - #endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 2c6dd329312b..edea7ef88402 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -48,8 +48,9 @@ atomic_t hart_lottery __section(.sdata); unsigned long boot_cpu_hartid; static DEFINE_PER_CPU(struct cpu, cpu_devices); -void __init parse_dtb(void) +static void __init parse_dtb(void) { + /* Early scan of device tree from init memory */ if (early_init_dt_scan(dtb_early_va)) return; @@ -62,6 +63,7 @@ void __init parse_dtb(void) void __init setup_arch(char **cmdline_p) { + parse_dtb(); init_mm.start_code = (unsigned long) _stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; @@ -76,7 +78,10 @@ void __init setup_arch(char **cmdline_p) #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + if (early_init_dt_verify(__va(dtb_early_pa))) + unflatten_device_tree(); + else + pr_err("No DTB found in kernel mappings\n"); #endif #ifdef CONFIG_SWIOTLB diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 787c75f751a5..2b651f63f5c4 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -28,7 +28,9 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] EXPORT_SYMBOL(empty_zero_page); extern char _start[]; -void *dtb_early_va; +#define DTB_EARLY_BASE_VA PGDIR_SIZE +void *dtb_early_va __initdata; +uintptr_t dtb_early_pa __initdata; static void __init zone_sizes_init(void) { @@ -141,8 +143,6 @@ disable: } #endif /* CONFIG_BLK_DEV_INITRD */ -static phys_addr_t dtb_early_pa __initdata; - void __init setup_bootmem(void) { struct memblock_region *reg; @@ -399,7 +399,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t va, end_va; + uintptr_t va, pa, end_va; uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); @@ -448,16 +448,13 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) load_pa + (va - PAGE_OFFSET), map_size, PAGE_KERNEL_EXEC); - /* Create fixed mapping for early FDT parsing */ - end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; - for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) - create_pte_mapping(fixmap_pte, va, - dtb_pa + (va - __fix_to_virt(FIX_FDT)), - PAGE_SIZE, PAGE_KERNEL); - - /* Save pointer to DTB for early FDT parsing */ - dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); - /* Save physical address for memblock reservation */ + /* Create two consecutive PGD mappings for FDT early scan */ + pa = dtb_pa & ~(PGDIR_SIZE - 1); + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + pa, PGDIR_SIZE, PAGE_KERNEL); + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, + pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); dtb_early_pa = dtb_pa; } @@ -516,6 +513,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #else dtb_early_va = (void *)dtb_pa; #endif + dtb_early_pa = dtb_pa; } static inline void setup_vm_final(void) -- cgit v1.2.3 From 6262f661ff5d7d6a2613b95d0b7820c60b46b0b5 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 17 Sep 2020 15:37:11 -0700 Subject: RISC-V: Add early ioremap support UEFI uses early IO or memory mappings for runtime services before normal ioremap() is usable. Add the necessary fixmap bindings and pmd mappings for generic ioremap support to work. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/Kbuild | 1 + arch/riscv/include/asm/fixmap.h | 13 +++++++++++++ arch/riscv/include/asm/io.h | 1 + arch/riscv/kernel/setup.c | 2 ++ arch/riscv/mm/init.c | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 51 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index df18372861d8..5e4ace64acbc 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -37,6 +37,7 @@ config RISCV select GENERIC_ARCH_TOPOLOGY if SMP select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS + select GENERIC_EARLY_IOREMAP select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO select GENERIC_IOREMAP select GENERIC_IRQ_MULTI_HANDLER diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 3d9410bb4de0..59dd7be55005 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += kvm_para.h diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 11613f38228a..54cbf07fb4e9 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -27,6 +27,19 @@ enum fixed_addresses { FIX_TEXT_POKE1, FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, + + __end_of_permanent_fixed_addresses, + /* + * Temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + */ +#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 3835c3295dc5..c025a746a148 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -14,6 +14,7 @@ #include #include #include +#include /* * MMIO access functions are separated out to break dependency cycles diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index edea7ef88402..41ef96d0d97a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -71,6 +72,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_ioremap_setup(); parse_early_param(); setup_bootmem(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 2b651f63f5c4..b75ebe8e7a92 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -403,6 +403,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t fix_bmap_spmd, fix_bmap_epmd; +#endif va_pa_offset = PAGE_OFFSET - load_pa; pfn_base = PFN_DOWN(load_pa); @@ -456,6 +459,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); dtb_early_pa = dtb_pa; + + /* + * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap + * range can not span multiple pmds. + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + +#ifndef __PAGETABLE_PMD_FOLDED + /* + * Early ioremap fixmap is already created as it lies within first 2MB + * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END + * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn + * the user if not. + */ + fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))]; + fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))]; + if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) { + WARN_ON(1); + pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n", + pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd)); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +#endif } static void __init setup_vm_final(void) -- cgit v1.2.3 From e8dcb61f2ade040a372d66907d220dd3fdee2505 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 17 Sep 2020 15:37:12 -0700 Subject: RISC-V: Implement late mapping page table allocation functions Currently, page table setup is done during setup_va_final where fixmap can be used to create the temporary mappings. The physical frame is allocated from memblock_alloc_* functions. However, this won't work if page table mapping needs to be created for a different mm context (i.e. efi mm) at a later point of time. Use generic kernel page allocation function & macros for any mapping after setup_vm_final. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Acked-by: Mike Rapoport Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 130 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 99 insertions(+), 31 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b75ebe8e7a92..63acc8185bfa 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -32,6 +32,15 @@ extern char _start[]; void *dtb_early_va __initdata; uintptr_t dtb_early_pa __initdata; +struct pt_alloc_ops { + pte_t *(*get_pte_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pte)(uintptr_t va); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t *(*get_pmd_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pmd)(uintptr_t va); +#endif +}; + static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; @@ -203,6 +212,8 @@ void __init setup_bootmem(void) } #ifdef CONFIG_MMU +static struct pt_alloc_ops pt_ops; + unsigned long va_pa_offset; EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; @@ -211,7 +222,6 @@ EXPORT_SYMBOL(pfn_base); pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; -static bool mmu_enabled; #define MAX_EARLY_MAPPING_SIZE SZ_128M @@ -234,27 +244,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) } } -static pte_t *__init get_pte_virt(phys_addr_t pa) +static inline pte_t *__init get_pte_virt_early(phys_addr_t pa) { - if (mmu_enabled) { - clear_fixmap(FIX_PTE); - return (pte_t *)set_fixmap_offset(FIX_PTE, pa); - } else { - return (pte_t *)((uintptr_t)pa); - } + return (pte_t *)((uintptr_t)pa); } -static phys_addr_t __init alloc_pte(uintptr_t va) +static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, pa); +} + +static inline pte_t *get_pte_virt_late(phys_addr_t pa) +{ + return (pte_t *) __va(pa); +} + +static inline phys_addr_t __init alloc_pte_early(uintptr_t va) { /* * We only create PMD or PGD early mappings so we * should never reach here with MMU disabled. */ - BUG_ON(!mmu_enabled); + BUG(); +} +static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) +{ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } +static phys_addr_t alloc_pte_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr))) + BUG(); + return __pa(vaddr); +} + static void __init create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -279,28 +308,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); -static pmd_t *__init get_pmd_virt(phys_addr_t pa) +static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { - if (mmu_enabled) { - clear_fixmap(FIX_PMD); - return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); - } else { - return (pmd_t *)((uintptr_t)pa); - } + /* Before MMU is enabled */ + return (pmd_t *)((uintptr_t)pa); } -static phys_addr_t __init alloc_pmd(uintptr_t va) +static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa) { - uintptr_t pmd_num; + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); +} + +static pmd_t *get_pmd_virt_late(phys_addr_t pa) +{ + return (pmd_t *) __va(pa); +} - if (mmu_enabled) - return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +static phys_addr_t __init alloc_pmd_early(uintptr_t va) +{ + uintptr_t pmd_num; pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; BUG_ON(pmd_num >= NUM_EARLY_PMDS); return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; } +static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_pmd_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + static void __init create_pmd_mapping(pmd_t *pmdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -316,28 +363,28 @@ static void __init create_pmd_mapping(pmd_t *pmdp, } if (pmd_none(pmdp[pmd_idx])) { - pte_phys = alloc_pte(va); + pte_phys = pt_ops.alloc_pte(va); pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); - ptep = get_pte_virt(pte_phys); + ptep = pt_ops.get_pte_virt(pte_phys); memset(ptep, 0, PAGE_SIZE); } else { pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); - ptep = get_pte_virt(pte_phys); + ptep = pt_ops.get_pte_virt(pte_phys); } create_pte_mapping(ptep, va, pa, sz, prot); } #define pgd_next_t pmd_t -#define alloc_pgd_next(__va) alloc_pmd(__va) -#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) +#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) +#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pmd #else #define pgd_next_t pte_t -#define alloc_pgd_next(__va) alloc_pte(__va) -#define get_pgd_next_virt(__pa) get_pte_virt(__pa) +#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) +#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pte @@ -421,6 +468,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((load_pa % map_size) != 0); BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; +#endif /* Setup early PGD for fixmap */ create_pgd_mapping(early_pg_dir, FIXADDR_START, (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); @@ -497,9 +550,16 @@ static void __init setup_vm_final(void) phys_addr_t pa, start, end; struct memblock_region *reg; - /* Set mmu_enabled flag */ - mmu_enabled = true; - + /** + * MMU is enabled at this point. But page table setup is not complete yet. + * fixmap page table alloc functions should be used at this point + */ + pt_ops.alloc_pte = alloc_pte_fixmap; + pt_ops.get_pte_virt = get_pte_virt_fixmap; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_fixmap; + pt_ops.get_pmd_virt = get_pmd_virt_fixmap; +#endif /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), @@ -533,6 +593,14 @@ static void __init setup_vm_final(void) /* Move to swapper page table */ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); + + /* generic page allocation functions must be used to setup page table */ + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; +#endif } #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) -- cgit v1.2.3 From cb7d2dd5612a77a2597c00fce770a52c921e2ea5 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 17 Sep 2020 15:37:13 -0700 Subject: RISC-V: Add PE/COFF header for EFI stub Linux kernel Image can appear as an EFI application With appropriate PE/COFF header fields in the beginning of the Image header. An EFI application loader can directly load a Linux kernel Image and an EFI stub residing in kernel can boot Linux kernel directly. Add the necessary PE/COFF header. Signed-off-by: Atish Patra Link: https://lore.kernel.org/r/20200421033336.9663-3-atish.patra@wdc.com [ardb: - use C prefix for c.li to ensure the expected opcode is emitted - align all image sections according to PE/COFF section alignment ] Signed-off-by: Ard Biesheuvel Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sections.h | 13 +++++ arch/riscv/kernel/efi-header.S | 111 ++++++++++++++++++++++++++++++++++++++ arch/riscv/kernel/head.S | 16 ++++++ arch/riscv/kernel/image-vars.h | 51 ++++++++++++++++++ arch/riscv/kernel/vmlinux.lds.S | 23 +++++++- 5 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/include/asm/sections.h create mode 100644 arch/riscv/kernel/efi-header.S create mode 100644 arch/riscv/kernel/image-vars.h (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h new file mode 100644 index 000000000000..3a9971b1210f --- /dev/null +++ b/arch/riscv/include/asm/sections.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#ifndef __ASM_SECTIONS_H +#define __ASM_SECTIONS_H + +#include + +extern char _start[]; +extern char _start_kernel[]; + +#endif /* __ASM_SECTIONS_H */ diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S new file mode 100644 index 000000000000..8e733aa48ba6 --- /dev/null +++ b/arch/riscv/kernel/efi-header.S @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Adapted from arch/arm64/kernel/efi-header.S + */ + +#include +#include + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: +#ifdef CONFIG_64BIT + .short IMAGE_FILE_MACHINE_RISCV64 // Machine +#else + .short IMAGE_FILE_MACHINE_RISCV32 // Machine +#endif + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: +#ifdef CONFIG_64BIT + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format +#else + .short PE_OPT_MAGIC_PE32 // PE32 format +#endif + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __pecoff_text_end - efi_header_end // SizeOfCode + .long __pecoff_data_virt_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint + .long efi_header_end - _start // BaseOfCode +#ifdef CONFIG_32BIT + .long __pecoff_text_end - _start // BaseOfData +#endif + +extra_header_fields: + .quad 0 // ImageBase + .long PECOFF_SECTION_ALIGNMENT // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion + .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _start // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _start // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __pecoff_text_end - efi_header_end // VirtualSize + .long efi_header_end - _start // VirtualAddress + .long __pecoff_text_end - efi_header_end // SizeOfRawData + .long efi_header_end - _start // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_virt_size // VirtualSize + .long __pecoff_text_end - _start // VirtualAddress + .long __pecoff_data_raw_size // SizeOfRawData + .long __pecoff_text_end - _start // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + + .balign 0x1000 +efi_header_end: + .endm diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 703e59d717f0..11e2a4fe66e0 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -12,6 +12,7 @@ #include #include #include +#include "efi-header.S" __HEAD ENTRY(_start) @@ -21,10 +22,18 @@ ENTRY(_start) * Do not modify it without modifying the structure and all bootloaders * that expects this header format!! */ +#ifdef CONFIG_EFI + /* + * This instruction decodes to "MZ" ASCII required by UEFI. + */ + c.li s4,-13 + j _start_kernel +#else /* jump to start kernel */ j _start_kernel /* reserved */ .word 0 +#endif .balign 8 #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ @@ -42,7 +51,14 @@ ENTRY(_start) .ascii RISCV_IMAGE_MAGIC .balign 4 .ascii RISCV_IMAGE_MAGIC2 +#ifdef CONFIG_EFI + .word pe_head_start - _start +pe_head_start: + + __EFI_PE_HEADER +#else .word 0 +#endif