summaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 11:17:08 +0200
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 11:17:08 +0200
commit185f3d38900f750a4566f87cde6a178f3595a115 (patch)
treed463f6da1af452b1bbdf476828ea88427087f255 /arch/x86_64
parent51b2833060f26258ea2da091c7b9c6a358ac9dd2 (diff)
x86_64: move lib
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Makefile2
-rw-r--r--arch/x86_64/lib/Makefile5
-rw-r--r--arch/x86_64/lib/Makefile_6413
-rw-r--r--arch/x86_64/lib/bitops_64.c175
-rw-r--r--arch/x86_64/lib/bitstr_64.c28
-rw-r--r--arch/x86_64/lib/clear_page_64.S59
-rw-r--r--arch/x86_64/lib/copy_page_64.S119
-rw-r--r--arch/x86_64/lib/copy_user_64.S354
-rw-r--r--arch/x86_64/lib/copy_user_nocache_64.S217
-rw-r--r--arch/x86_64/lib/csum-copy_64.S249
-rw-r--r--arch/x86_64/lib/csum-partial_64.c150
-rw-r--r--arch/x86_64/lib/csum-wrappers_64.c135
-rw-r--r--arch/x86_64/lib/delay_64.c57
-rw-r--r--arch/x86_64/lib/getuser_64.S109
-rw-r--r--arch/x86_64/lib/io_64.c23
-rw-r--r--arch/x86_64/lib/iomap_copy_64.S30
-rw-r--r--arch/x86_64/lib/memcpy_64.S131
-rw-r--r--arch/x86_64/lib/memmove_64.c21
-rw-r--r--arch/x86_64/lib/memset_64.S133
-rw-r--r--arch/x86_64/lib/msr-on-cpu.c1
-rw-r--r--arch/x86_64/lib/putuser_64.S106
-rw-r--r--arch/x86_64/lib/rwlock_64.S38
-rw-r--r--arch/x86_64/lib/thunk_64.S67
-rw-r--r--arch/x86_64/lib/usercopy_64.c166
24 files changed, 1 insertions, 2387 deletions
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 265484c17234..ae48f179d719 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -73,7 +73,7 @@ AFLAGS += -m64
head-y := arch/x86_64/kernel/head_64.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task_64.o
-libs-y += arch/x86_64/lib/
+libs-y += arch/x86/lib/
core-y += arch/x86_64/kernel/ \
arch/x86_64/mm/ \
arch/x86/crypto/ \
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
deleted file mode 100644
index 2d7d724a2a6a..000000000000
--- a/arch/x86_64/lib/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/lib/Makefile_32
-else
-include ${srctree}/arch/x86_64/lib/Makefile_64
-endif
diff --git a/arch/x86_64/lib/Makefile_64 b/arch/x86_64/lib/Makefile_64
deleted file mode 100644
index bbabad3c9335..000000000000
--- a/arch/x86_64/lib/Makefile_64
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# Makefile for x86_64-specific library files.
-#
-
-CFLAGS_csum-partial_64.o := -funroll-loops
-
-obj-y := io_64.o iomap_copy_64.o
-obj-$(CONFIG_SMP) += msr-on-cpu.o
-
-lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \
- usercopy_64.o getuser_64.o putuser_64.o \
- thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o
-lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o
diff --git a/arch/x86_64/lib/bitops_64.c b/arch/x86_64/lib/bitops_64.c
deleted file mode 100644
index 95b6d9639fba..000000000000
--- a/arch/x86_64/lib/bitops_64.c
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <linux/bitops.h>
-
-#undef find_first_zero_bit
-#undef find_next_zero_bit
-#undef find_first_bit
-#undef find_next_bit
-
-static inline long
-__find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
- long d0, d1, d2;
- long res;
-
- /*
- * We must test the size in words, not in bits, because
- * otherwise incoming sizes in the range -63..-1 will not run
- * any scasq instructions, and then the flags used by the je
- * instruction will have whatever random value was in place
- * before. Nobody should call us like that, but
- * find_next_zero_bit() does when offset and size are at the
- * same word and it fails to find a zero itself.
- */
- size += 63;
- size >>= 6;
- if (!size)
- return 0;
- asm volatile(
- " repe; scasq\n"
- " je 1f\n"
- " xorq -8(%%rdi),%%rax\n"
- " subq $8,%%rdi\n"
- " bsfq %%rax,%%rdx\n"
- "1: subq %[addr],%%rdi\n"
- " shlq $3,%%rdi\n"
- " addq %%rdi,%%rdx"
- :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
- :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
- [addr] "S" (addr) : "memory");
- /*
- * Any register would do for [addr] above, but GCC tends to
- * prefer rbx over rsi, even though rsi is readily available
- * and doesn't have to be saved.
- */
- return res;
-}
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-long find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
- return __find_first_zero_bit (addr, size);
-}
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_zero_bit (const unsigned long * addr, long size, long offset)
-{
- const unsigned long * p = addr + (offset >> 6);
- unsigned long set = 0;
- unsigned long res, bit = offset&63;
-
- if (bit) {
- /*
- * Look for zero in first word
- */
- asm("bsfq %1,%0\n\t"
- "cmoveq %2,%0"
- : "=r" (set)
- : "r" (~(*p >> bit)), "r"(64L));
- if (set < (64 - bit))
- return set + offset;
- set = 64 - bit;
- p++;
- }
- /*
- * No zero yet, search remaining full words for a zero
- */
- res = __find_first_zero_bit (p, size - 64 * (p - addr));
-
- return (offset + set + res);
-}
-
-static inline long
-__find_first_bit(const unsigned long * addr, unsigned long size)
-{
- long d0, d1;
- long res;
-
- /*
- * We must test the size in words, not in bits, because
- * otherwise incoming sizes in the range -63..-1 will not run
- * any scasq instructions, and then the flags used by the jz
- * instruction will have whatever random value was in place
- * before. Nobody should call us like that, but
- * find_next_bit() does when offset and size are at the same
- * word and it fails to find a one itself.
- */
- size += 63;
- size >>= 6;
- if (!size)
- return 0;
- asm volatile(
- " repe; scasq\n"
- " jz 1f\n"
- " subq $8,%%rdi\n"
- " bsfq (%%rdi),%%rax\n"
- "1: subq %[addr],%%rdi\n"
- " shlq $3,%%rdi\n"
- " addq %%rdi,%%rax"
- :"=a" (res), "=&c" (d0), "=&D" (d1)
- :"0" (0ULL), "1" (size), "2" (addr),
- [addr] "r" (addr) : "memory");
- return res;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-long find_first_bit(const unsigned long * addr, unsigned long size)
-{
- return __find_first_bit(addr,size);
-}
-
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_bit(const unsigned long * addr, long size, long offset)
-{
- const unsigned long * p = addr + (offset >> 6);
- unsigned long set = 0, bit = offset & 63, res;
-
- if (bit) {
- /*
- * Look for nonzero in the first 64 bits:
- */
- asm("bsfq %1,%0\n\t"
- "cmoveq %2,%0\n\t"
- : "=r" (set)
- : "r" (*p >> bit), "r" (64L));
- if (set < (64 - bit))
- return set + offset;
- set = 64 - bit;
- p++;
- }
- /*
- * No set bit yet, search remaining full words for a bit
- */
- res = __find_first_bit (p, size - 64 * (p - addr));
- return (offset + set + res);
-}
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(find_next_bit);
-EXPORT_SYMBOL(find_first_bit);
-EXPORT_SYMBOL(find_first_zero_bit);
-EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86_64/lib/bitstr_64.c b/arch/x86_64/lib/bitstr_64.c
deleted file mode 100644
index 24676609a6ac..000000000000
--- a/arch/x86_64/lib/bitstr_64.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <linux/module.h>
-#include <linux/bitops.h>
-
-/* Find string of zero bits in a bitmap */
-unsigned long
-find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len)
-{
- unsigned long n, end, i;
-
- again:
- n = find_next_zero_bit(bitmap, nbits, start);
- if (n == -1)
- return -1;
-
- /* could test bitsliced, but it's hardly worth it */
- end = n+len;
- if (end >= nbits)
- return -1;
- for (i = n+1; i < end; i++) {
- if (test_bit(i, bitmap)) {
- start = i+1;
- goto again;
- }
- }
- return n;
-}
-
-EXPORT_SYMBOL(find_next_zero_string);
diff --git a/arch/x86_64/lib/clear_page_64.S b/arch/x86_64/lib/clear_page_64.S
deleted file mode 100644
index 9a10a78bb4a4..000000000000
--- a/arch/x86_64/lib/clear_page_64.S
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-/*
- * Zero a page.
- * rdi page
- */
- ALIGN
-clear_page_c:
- CFI_STARTPROC
- movl $4096/8,%ecx
- xorl %eax,%eax
- rep stosq
- ret
- CFI_ENDPROC
-ENDPROC(clear_page)
-
-ENTRY(clear_page)
- CFI_STARTPROC
- xorl %eax,%eax
- movl $4096/64,%ecx
- .p2align 4
-.Lloop:
- decl %ecx
-#define PUT(x) movq %rax,x*8(%rdi)
- movq %rax,(%rdi)
- PUT(1)
- PUT(2)
- PUT(3)
- PUT(4)
- PUT(5)
- PUT(6)
- PUT(7)
- leaq 64(%rdi),%rdi
- jnz .Lloop
- nop
- ret
- CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
-
- /* Some CPUs run faster using the string instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad clear_page
- .quad 1b
- .byte X86_FEATURE_REP_GOOD
- .byte .Lclear_page_end - clear_page
- .byte 2b - 1b
- .previous
diff --git a/arch/x86_64/lib/copy_page_64.S b/arch/x86_64/lib/copy_page_64.S
deleted file mode 100644
index 727a5d46d2fc..000000000000
--- a/arch/x86_64/lib/copy_page_64.S
+++ /dev/null
@@ -1,119 +0,0 @@
-/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
- ALIGN
-copy_page_c:
- CFI_STARTPROC
- movl $4096/8,%ecx
- rep movsq
- ret
- CFI_ENDPROC
-ENDPROC(copy_page_c)
-
-/* Don't use streaming store because it's better when the target
- ends up in cache. */
-
-/* Could vary the prefetch distance based on SMP/UP */
-
-ENTRY(copy_page)
- CFI_STARTPROC
- subq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET 3*8
- movq %rbx,(%rsp)
- CFI_REL_OFFSET rbx, 0
- movq %r12,1*8(%rsp)
- CFI_REL_OFFSET r12, 1*8
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13, 2*8
-
- movl $(4096/64)-5,%ecx
- .p2align 4
-.Loop64:
- dec %rcx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- prefetcht0 5*64(%rsi)
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64 (%rsi), %rsi
- leaq 64 (%rdi), %rdi
-
- jnz .Loop64
-
- movl $5,%ecx
- .p2align 4
-.Loop2:
- decl %ecx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
-
- jnz .Loop2
-
- movq (%rsp),%rbx
- CFI_RESTORE rbx
- movq 1*8(%rsp),%r12
- CFI_RESTORE r12
- movq 2*8(%rsp),%r13
- CFI_RESTORE r13
- addq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET -3*8
- ret
-.Lcopy_page_end:
- CFI_ENDPROC
-ENDPROC(copy_page)
-
- /* Some CPUs run faster using the string copy instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad copy_page
- .quad 1b
- .byte X86_FEATURE_REP_GOOD
- .byte .Lcopy_page_end - copy_page
- .byte 2b - 1b
- .previous
diff --git a/arch/x86_64/lib/copy_user_64.S b/arch/x86_64/lib/copy_user_64.S
deleted file mode 100644
index 70bebd310408..000000000000
--- a/arch/x86_64/lib/copy_user_64.S
+++ /dev/null
@@ -1,354 +0,0 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
- *
- * Functions to copy from and to user space.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
-
- .macro ALTERNATIVE_JUMP feature,orig,alt
-0:
- .byte 0xe9 /* 32bit jump */
- .long \orig-1f /* by default jump to orig */
-1:
- .section .altinstr_replacement,"ax"
-2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt-1b /* offset */ /* or alternatively to alt */
- .previous
- .section .altinstructions,"a"
- .align 8
- .quad 0b
- .quad 2b
- .byte \feature /* when feature is set */
- .byte 5
- .byte 5
- .previous
- .endm
-
-/* Standard copy_to_user with segment limit checking */
-ENTRY(copy_to_user)
- CFI_STARTPROC
- GET_THREAD_INFO(%rax)
- movq %rdi,%rcx
- addq %rdx,%rcx
- jc bad_to_user
- cmpq threadinfo_addr_limit(%rax),%rcx
- jae bad_to_user
- xorl %eax,%eax /* clear zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-
-ENTRY(copy_user_generic)
- CFI_STARTPROC
- movl $1,%ecx /* set zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-
-ENTRY(__copy_from_user_inatomic)
- CFI_STARTPROC
- xorl %ecx,%ecx /* clear zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(copy_from_user)
- CFI_STARTPROC
- GET_THREAD_INFO(%rax)
- movq %rsi,%rcx
- addq %rdx,%rcx
- jc bad_from_user
- cmpq threadinfo_addr_limit(%rax),%rcx
- jae bad_from_user
- movl $1,%ecx /* set zero flag */
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-ENDPROC(copy_from_user)
-
- .section .fixup,"ax"
- /* must zero dest */
-bad_from_user:
- CFI_STARTPROC
- movl %edx,%ecx
- xorl %eax,%eax
- rep
- stosb
-bad_to_user:
- movl %edx,%eax
- ret
- CFI_ENDPROC
-END(bad_from_user)
- .previous
-
-
-/*
- * copy_user_generic_unrolled - memory copy with exception handling.
- * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- * ecx zero flag -- if true zero destination on error
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_generic_unrolled)
- CFI_STARTPROC
- pushq %rbx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbx, 0
- pushq %rcx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rcx, 0
- xorl %eax,%eax /*zero for the exception handler */
-
-#ifdef FIX_ALIGNMENT
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jnz .Lbad_alignment
-.Lafter_bad_alignment:
-#endif
-
- movq %rdx,%rcx
-
- movl $64,%ebx
- shrq $6,%rdx
- decq %rdx
- js .Lhandle_tail
-
- .p2align 4
-.Lloop:
-.Ls1: movq (%rsi),%r11
-.Ls2: movq 1*8(%rsi),%r8
-.Ls3: movq 2*8(%rsi),%r9
-.Ls4: movq 3*8(%rsi),%r10
-.Ld1: movq %r11,(%rdi)
-.Ld2: movq %r8,1*8(%rdi)
-.Ld3: movq %r9,2*8(%rdi)
-.Ld4: movq %r10,3*8(%rdi)
-
-.Ls5: movq 4*8(%rsi),%r11
-.Ls6: movq 5*8(%rsi),%r8
-.Ls7: movq 6*8(%rsi),%r9
-.Ls8: movq 7*8(%rsi),%r10
-.Ld5: movq %r11,4*8(%rdi)
-.Ld6: movq %r8,5*8(%rdi)
-.Ld7: movq %r9,6*8(%rdi)
-.Ld8: movq %r10,7*8(%rdi)
-
- decq %rdx
-
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
-
- jns .Lloop
-
- .p2align 4
-.Lhandle_tail:
- movl %ecx,%edx
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lhandle_7
- movl $8,%ebx
- .p2align 4
-.Lloop_8:
-.Ls9: movq (%rsi),%r8
-.Ld9: movq %r8,(%rdi)
- decl %ecx
- leaq 8(%rdi),%rdi
- leaq 8(%rsi),%rsi
- jnz .Lloop_8
-
-.Lhandle_7:
- movl %edx,%ecx
- andl $7,%ecx
- jz .Lende
- .p2align 4
-.Lloop_1:
-.Ls10: movb (%rsi),%bl
-.Ld10: movb %bl,(%rdi)
- incq %rdi
- incq %rsi
- decl %ecx
- jnz .Lloop_1
-
- CFI_REMEMBER_STATE
-.Lende:
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rcx
- popq %rbx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rbx
- ret
- CFI_RESTORE_STATE
-
-#ifdef FIX_ALIGNMENT
- /* align destination */
- .p2align 4
-.Lbad_alignment:
- movl $8,%r9d
- subl %ecx,%r9d
- movl %r9d,%ecx
- cmpq %r9,%rdx
- jz .Lhandle_7
- js .Lhandle_7
-.Lalign_1:
-.Ls11: movb (%rsi),%bl
-.Ld11: movb %bl,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .Lalign_1
- subq %r9,%rdx
- jmp .Lafter_bad_alignment
-#endif
-
- /* table sorted by exception address */
- .section __ex_table,"a"
- .align 8
- .quad .Ls1,.Ls1e
- .quad .Ls2,.Ls2e
- .quad .Ls3,.Ls3e
- .quad .Ls4,.Ls4e
- .quad .Ld1,.Ls1e
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
- .quad .Ls5,.Ls5e
- .quad .Ls6,.Ls6e
- .quad .Ls7,.Ls7e
- .quad .Ls8,.Ls8e
- .quad .Ld5,.Ls5e
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
- .quad .Ls10,.Le_byte
- .quad .Ld10,.Le_byte
-#ifdef FIX_ALIGNMENT
- .quad .Ls11,.Lzero_rest
- .quad .Ld11,.Lzero_rest
-#endif
- .quad .Le5,.Le_zero
- .previous
-
- /* compute 64-offset for main loop. 8 bytes accuracy with error on the
- pessimistic side. this is gross. it would be better to fix the
- interface. */
- /* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
- addq %rbx,%rdi /* +64 */
- subq %rax,%rdi /* correct destination with computed offset */
-
- shlq $6,%rdx /* loop counter * 64 (stride length) */
- addq %rax,%rdx /* add offset to loopcnt */
- andl $63,%ecx /* remaining bytes */
- addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
-
- /* exception on quad word loop in tail handling */
- /* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
- andl $7,%edx
- addl %ecx,%edx
- /* edx: bytes to zero, rdi: dest, eax:zero */
-.Lzero_rest:
- cmpl $0,(%rsp)
- jz .Le_zero
- movq %rdx,%rcx
-.Le_byte:
- xorl %eax,%eax
-.Le5: rep
- stosb
- /* when there is another exception while zeroing the rest just return */
-.Le_zero:
- movq %rdx,%rax
- jmp .Lende
- CFI_ENDPROC
-ENDPROC(copy_user_generic)
-
-
- /* Some CPUs run faster using the string copy instructions.
- This is also a lot simpler. Use them when possible.
- Patch in jmps to this code instead of copying it fully
- to avoid unwanted aliasing in the exception tables. */
-
- /* rdi destination
- * rsi source
- * rdx count
- * ecx zero flag
- *
- * Output:
- * eax uncopied bytes or 0 if successfull.
- *
- * Only 4GB of copy is supported. This shouldn't be a problem
- * because the kernel normally only writes from/to page sized chunks
- * even if user space passed a longer buffer.
- * And more would be dangerous because both Intel and AMD have
- * errata with rep movsq > 4GB. If someone feels the need to fix
- * this please consider this.
- */
-ENTRY(copy_user_generic_string)
- CFI_STARTPROC
- movl %ecx,%r8d /* save zero flag */
- movl %edx,%ecx
- shrl $3,%ecx
- andl $7,%edx
- jz 10f
-1: rep
- movsq
- movl %edx,%ecx
-2: rep
- movsb
-9: movl %ecx,%eax
- ret
-
- /* multiple of 8 byte */
-10: rep
- movsq
- xor %eax,%eax
- ret
-
- /* exception handling */
-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
- jmp 6f
-5: movl %ecx,%eax /* exception on byte loop */
- /* eax: left over bytes */
-6: testl %r8d,%r8d /* zero flag set? */
- jz 7f
- movl %eax,%ecx /* initialize x86 loop counter */
- push %rax
- xorl %eax,%eax
-8: rep
- stosb /* zero the rest */
-11: pop %rax
-7: ret
- CFI_ENDPROC
-END(copy_user_generic_c)
-
- .section __ex_table,"a"
- .quad 1b,3b
- .quad 2b,5b
- .quad 8b,11b
- .quad 10b,3b
- .previous
diff --git a/arch/x86_64/lib/copy_user_nocache_64.S b/arch/x86_64/lib/copy_user_nocache_64.S
deleted file mode 100644
index 4620efb12f13..000000000000
--- a/arch/x86_64/lib/copy_user_nocache_64.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU Public License v2.
- *
- * Functions to copy from and to user space.
- */
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeature.h>
-
-/*
- * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- * rcx zero flag when 1 zero on exception
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(__copy_user_nocache)
- CFI_STARTPROC
- pushq %rbx
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbx, 0
- pushq %rcx /* save zero flag */
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rcx, 0
-
- xorl %eax,%eax /* zero for the exception handler */
-
-#ifdef FIX_ALIGNMENT
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jnz .Lbad_alignment
-.Lafter_bad_alignment:
-#endif
-
- movq %rdx,%rcx
-
- movl $64,%ebx
- shrq $6,%rdx
- decq %rdx
- js .Lhandle_tail
-
- .p2align 4
-.Lloop:
-.Ls1: movq (%rsi),%r11
-.Ls2: movq 1*8(%rsi),%r8
-.Ls3: movq 2*8(%rsi),%r9
-.Ls4: movq 3*8(%rsi),%r10
-.Ld1: movnti %r11,(%rdi)
-.Ld2: movnti %r8,1*8(%rdi)
-.Ld3: movnti %r9,2*8(%rdi)
-.Ld4: movnti %r10,3*8(%rdi)
-
-.Ls5: movq 4*8(%rsi),%r11
-.Ls6: movq 5*8(%rsi),%r8
-.Ls7: movq 6*8(%rsi),%r9
-.Ls8: movq 7*8(%rsi),%r10
-.Ld5: movnti %r11,4*8(%rdi)
-.Ld6: movnti %r8,5*8(%rdi)
-.Ld7: movnti %r9,6*8(%rdi)
-.Ld8: movnti %r10,7*8(%rdi)
-
- dec %rdx
-
- leaq 64(%rsi),%rsi
- leaq 64(%rdi),%rdi
-
- jns .Lloop
-
- .p2align 4
-.Lhandle_tail:
- movl %ecx,%edx
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lhandle_7
- movl $8,%ebx
- .p2align 4
-.Lloop_8:
-.Ls9: movq (%rsi),%r8
-.Ld9: movnti %r8,(%rdi)
- decl %ecx
- leaq 8(%rdi),%rdi
- leaq 8(%rsi),%rsi
- jnz .Lloop_8
-
-.Lhandle_7:
- movl %edx,%ecx
- andl $7,%ecx
- jz .Lende
- .p2align 4
-.Lloop_1:
-.Ls10: movb (%rsi),%bl
-.Ld10: movb %bl,(%rdi)
- incq %rdi
- incq %rsi
- decl %ecx
- jnz .Lloop_1
-
- CFI_REMEMBER_STATE
-.Lende:
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE %rcx
- popq %rbx
- CFI_ADJUST_CFA_OFFSET -8
- CFI_RESTORE rbx
- ret
- CFI_RESTORE_STATE
-
-#ifdef FIX_ALIGNMENT
- /* align destination */
- .p2align 4
-.Lbad_alignment:
- movl $8,%r9d
- subl %ecx,%r9d
- movl %r9d,%ecx
- cmpq %r9,%rdx
- jz .Lhandle_7
- js .Lhandle_7
-.Lalign_1:
-.Ls11: movb (%rsi),%bl
-.Ld11: movb %bl,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .Lalign_1
- subq %r9,%rdx
- jmp .Lafter_bad_alignment
-#endif
-
- /* table sorted by exception address */
- .section __ex_table,"a"
- .align 8
- .quad .Ls1,.Ls1e
- .quad .Ls2,.Ls2e
- .quad .Ls3,.Ls3e
- .quad .Ls4,.Ls4e
- .quad .Ld1,.Ls1e
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
- .quad .Ls5,.Ls5e
- .quad .Ls6,.Ls6e
- .quad .Ls7,.Ls7e
- .quad .Ls8,.Ls8e
- .quad .Ld5,.Ls5e
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
- .quad .Ls10,.Le_byte
- .quad .Ld10,.Le_byte
-#ifdef FIX_ALIGNMENT
- .quad .Ls11,.Lzero_rest
- .quad .Ld11,.Lzero_rest
-#endif
- .quad .Le5,.Le_zero
- .previous
-
- /* compute 64-offset for main loop. 8 bytes accuracy with error on the
- pessimistic side. this is gross. it would be better to fix the
- interface. */
- /* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
- addq %rbx,%rdi /* +64 */
- subq %rax,%rdi /* correct destination with computed offset */
-
- shlq $6,%rdx /* loop counter * 64 (stride length) */
- addq %rax,%rdx /* add offset to loopcnt */
- andl $63,%ecx /* remaining bytes */
- addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
-
- /* exception on quad word loop in tail handling */
- /* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
- andl $7,%edx
- addl %ecx,%edx
- /* edx: bytes to zero, rdi: dest, eax:zero */
-.Lzero_rest:
- cmpl $0,(%rsp) /* zero flag set? */
- jz .Le_zero
- movq %rdx,%rcx
-.Le_byte:
- xorl %eax,%eax
-.Le5: rep
- stosb
- /* when there is another exception while zeroing the rest just return */
-.Le_zero:
- movq %rdx,%rax
- jmp .Lende
- CFI_ENDPROC
-ENDPROC(__copy_user_nocache)
-
-
diff --git a/arch/x86_64/lib/csum-copy_64.S b/arch/x86_64/lib/csum-copy_64.S
deleted file mode 100644
index f0dba36578ea..000000000000
--- a/arch/x86_64/lib/csum-copy_64.S
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file COPYING in the main directory of this archive
- * for more details. No warranty for anything given at all.
- */
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/errno.h>
-
-/*
- * Checksum copy with exception handling.
- * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
- * destination is zeroed.
- *
- * Input
- * rdi source
- * rsi destination
- * edx len (32bit)
- * ecx sum (32bit)
- * r8 src_err_ptr (int)
- * r9 dst_err_ptr (int)
- *
- * Output
- * eax 64bit sum. undefined in case of exception.
- *
- * Wrappers need to take care of valid exception sum and zeroing.
- * They also should align source or destination to 8 bytes.
- */
-
- .macro source
-10:
- .section __ex_table,"a"
- .align 8
- .quad 10b,.Lbad_source
- .previous
- .endm
-
- .macro dest
-20:
- .section __ex_table,"a"
- .align 8
- .quad 20b,.Lbad_dest
- .previous
- .endm
-
- .macro ignore L=.Lignore
-30:
- .section __ex_table,"a"
- .align 8
- .quad 30b,\L
- .previous
- .endm<