add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
#
# Makefile for x86 specific library files.
#
obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o
lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
obj-y += msr.o msr-reg.o msr-reg-export.o
ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
lib-y += checksum_32.o
lib-y += strstr_32.o
lib-y += semaphore_32.o string_32.o
ifneq ($(CONFIG_X86_CMPXCHG64),y)
lib-y += cmpxchg8b_emu.o
endif
lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
obj-y += io_64.o iomap_copy_64.o
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
lib-y += thunk_64.o clear_page_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
endif

View File

@@ -0,0 +1,230 @@
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/types.h>
#include <asm/processor.h>
#include <asm/cmpxchg.h>
#include <asm/atomic.h>
static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)
{
u32 low = new;
u32 high = new >> 32;
asm volatile(
LOCK_PREFIX "cmpxchg8b %1\n"
: "+A" (old), "+m" (*ptr)
: "b" (low), "c" (high)
);
return old;
}
u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val)
{
return cmpxchg8b(&ptr->counter, old_val, new_val);
}
EXPORT_SYMBOL(atomic64_cmpxchg);
/**
* atomic64_xchg - xchg atomic64 variable
* @ptr: pointer to type atomic64_t
* @new_val: value to assign
*
* Atomically xchgs the value of @ptr to @new_val and returns
* the old value.
*/
u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)
{
/*
* Try first with a (possibly incorrect) assumption about
* what we have there. We'll do two loops most likely,
* but we'll get an ownership MESI transaction straight away
* instead of a read transaction followed by a
* flush-for-ownership transaction:
*/
u64 old_val, real_val = 0;
do {
old_val = real_val;
real_val = atomic64_cmpxchg(ptr, old_val, new_val);
} while (real_val != old_val);
return old_val;
}
EXPORT_SYMBOL(atomic64_xchg);
/**
* atomic64_set - set atomic64 variable
* @ptr: pointer to type atomic64_t
* @new_val: value to assign
*
* Atomically sets the value of @ptr to @new_val.
*/
void atomic64_set(atomic64_t *ptr, u64 new_val)
{
atomic64_xchg(ptr, new_val);
}
EXPORT_SYMBOL(atomic64_set);
/**
EXPORT_SYMBOL(atomic64_read);
* atomic64_add_return - add and return
* @delta: integer value to add
* @ptr: pointer to type atomic64_t
*
* Atomically adds @delta to @ptr and returns @delta + *@ptr
*/
noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
{
/*
* Try first with a (possibly incorrect) assumption about
* what we have there. We'll do two loops most likely,
* but we'll get an ownership MESI transaction straight away
* instead of a read transaction followed by a
* flush-for-ownership transaction:
*/
u64 old_val, new_val, real_val = 0;
do {
old_val = real_val;
new_val = old_val + delta;
real_val = atomic64_cmpxchg(ptr, old_val, new_val);
} while (real_val != old_val);
return new_val;
}
EXPORT_SYMBOL(atomic64_add_return);
u64 atomic64_sub_return(u64 delta, atomic64_t *ptr)
{
return atomic64_add_return(-delta, ptr);
}
EXPORT_SYMBOL(atomic64_sub_return);
u64 atomic64_inc_return(atomic64_t *ptr)
{
return atomic64_add_return(1, ptr);
}
EXPORT_SYMBOL(atomic64_inc_return);
u64 atomic64_dec_return(atomic64_t *ptr)
{
return atomic64_sub_return(1, ptr);
}
EXPORT_SYMBOL(atomic64_dec_return);
/**
* atomic64_add - add integer to atomic64 variable
* @delta: integer value to add
* @ptr: pointer to type atomic64_t
*
* Atomically adds @delta to @ptr.
*/
void atomic64_add(u64 delta, atomic64_t *ptr)
{
atomic64_add_return(delta, ptr);
}
EXPORT_SYMBOL(atomic64_add);
/**
* atomic64_sub - subtract the atomic64 variable
* @delta: integer value to subtract
* @ptr: pointer to type atomic64_t
*
* Atomically subtracts @delta from @ptr.
*/
void atomic64_sub(u64 delta, atomic64_t *ptr)
{
atomic64_add(-delta, ptr);
}
EXPORT_SYMBOL(atomic64_sub);
/**
* atomic64_sub_and_test - subtract value from variable and test result
* @delta: integer value to subtract
* @ptr: pointer to type atomic64_t
*
* Atomically subtracts @delta from @ptr and returns
* true if the result is zero, or false for all
* other cases.
*/
int atomic64_sub_and_test(u64 delta, atomic64_t *ptr)
{
u64 new_val = atomic64_sub_return(delta, ptr);
return new_val == 0;
}
EXPORT_SYMBOL(atomic64_sub_and_test);
/**
* atomic64_inc - increment atomic64 variable
* @ptr: pointer to type atomic64_t
*
* Atomically increments @ptr by 1.
*/
void atomic64_inc(atomic64_t *ptr)
{
atomic64_add(1, ptr);
}
EXPORT_SYMBOL(atomic64_inc);
/**
* atomic64_dec - decrement atomic64 variable
* @ptr: pointer to type atomic64_t
*
* Atomically decrements @ptr by 1.
*/
void atomic64_dec(atomic64_t *ptr)
{
atomic64_sub(1, ptr);
}
EXPORT_SYMBOL(atomic64_dec);
/**
* atomic64_dec_and_test - decrement and test
* @ptr: pointer to type atomic64_t
*
* Atomically decrements @ptr by 1 and
* returns true if the result is 0, or false for all other
* cases.
*/
int atomic64_dec_and_test(atomic64_t *ptr)
{
return atomic64_sub_and_test(1, ptr);
}
EXPORT_SYMBOL(atomic64_dec_and_test);
/**
* atomic64_inc_and_test - increment and test
* @ptr: pointer to type atomic64_t
*
* Atomically increments @ptr by 1
* and returns true if the result is zero, or false for all
* other cases.
*/
int atomic64_inc_and_test(atomic64_t *ptr)
{
return atomic64_sub_and_test(-1, ptr);
}
EXPORT_SYMBOL(atomic64_inc_and_test);
/**
* atomic64_add_negative - add and test if negative
* @delta: integer value to add
* @ptr: pointer to type atomic64_t
*
* Atomically adds @delta to @ptr and returns true
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
int atomic64_add_negative(u64 delta, atomic64_t *ptr)
{
s64 new_val = atomic64_add_return(delta, ptr);
return new_val < 0;
}
EXPORT_SYMBOL(atomic64_add_negative);

View File

@@ -0,0 +1,19 @@
#include <linux/smp.h>
#include <linux/module.h>
static void __wbinvd(void *dummy)
{
wbinvd();
}
void wbinvd_on_cpu(int cpu)
{
smp_call_function_single(cpu, __wbinvd, NULL, 1);
}
EXPORT_SYMBOL(wbinvd_on_cpu);
int wbinvd_on_all_cpus(void)
{
return on_each_cpu(__wbinvd, NULL, 1);
}
EXPORT_SYMBOL(wbinvd_on_all_cpus);

View File

@@ -0,0 +1,546 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IP/TCP/UDP checksumming routines
*
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Tom May, <ftom@netcom.com>
* Pentium Pro/II routines:
* Alexander Kjeldaas <astor@guardian.no>
* Finn Arne Gangstad <finnag@guardian.no>
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
* handling.
* Andi Kleen, add zeroing on error
* converted to pure assembler
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
/*
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
.text
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
/*
* Experiments with Ethernet and SLIP connections show that buff
* is aligned on either a 2-byte or 4-byte boundary. We get at
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
ENTRY(csum_partial)
CFI_STARTPROC
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff
testl $3, %esi # Check alignment.
jz 2f # Jump if alignment is ok.
testl $1, %esi # Check alignment.
jz 10f # Jump if alignment is boundary of 2bytes.
# buf is odd
dec %ecx
jl 8f
movzbl (%esi), %ebx
adcl %ebx, %eax
roll $8, %eax
inc %esi
testl $2, %esi
jz 2f
10:
subl $2, %ecx # Alignment uses up two bytes.
jae 1f # Jump if we had at least two bytes.
addl $2, %ecx # ecx was < 2. Deal with it.
jmp 4f
1: movw (%esi), %bx
addl $2, %esi
addw %bx, %ax
adcl $0, %eax
2:
movl %ecx, %edx
shrl $5, %ecx
jz 2f
testl %esi, %esi
1: movl (%esi), %ebx
adcl %ebx, %eax
movl 4(%esi), %ebx
adcl %ebx, %eax
movl 8(%esi), %ebx
adcl %ebx, %eax
movl 12(%esi), %ebx
adcl %ebx, %eax
movl 16(%esi), %ebx
adcl %ebx, %eax
movl 20(%esi), %ebx
adcl %ebx, %eax
movl 24(%esi), %ebx
adcl %ebx, %eax
movl 28(%esi), %ebx
adcl %ebx, %eax
lea 32(%esi), %esi
dec %ecx
jne 1b
adcl $0, %eax
2: movl %edx, %ecx
andl $0x1c, %edx
je 4f
shrl $2, %edx # This clears CF
3: adcl (%esi), %eax
lea 4(%esi), %esi
dec %edx
jne 3b
adcl $0, %eax
4: andl $3, %ecx
jz 7f
cmpl $2, %ecx
jb 5f
movw (%esi),%cx
leal 2(%esi),%esi
je 6f
shll $16,%ecx
5: movb (%esi),%cl
6: addl %ecx,%eax
adcl $0, %eax
7:
testl $1, 12(%esp)
jz 8f
roll $8, %eax
8:
popl %ebx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx
popl %esi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
#else
/* Version for PentiumII/PPro */
ENTRY(csum_partial)
CFI_STARTPROC
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
testl $3, %esi
jnz 25f
10:
movl %ecx, %edx
movl %ecx, %ebx
andl $0x7c, %ebx
shrl $7, %ecx
addl %ebx,%esi
shrl $2, %ebx
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
jmp *%ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
lea 2(%esi), %esi
adcl $0, %eax
jmp 10b
25:
testl $1, %esi
jz 30f
# buf is odd
dec %ecx
jl 90f
movzbl (%esi), %ebx
addl %ebx, %eax
adcl $0, %eax
roll $8, %eax
inc %esi
testl $2, %esi
jz 10b
30: subl $2, %ecx
ja 20b
je 32f
addl $2, %ecx
jz 80f
movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
addl %ebx, %eax
adcl $0, %eax
jmp 80f
32:
addw (%esi), %ax # csumming 2 bytes, 2-aligned
adcl $0, %eax
jmp 80f
40:
addl -128(%esi), %eax
adcl -124(%esi), %eax
adcl -120(%esi), %eax
adcl -116(%esi), %eax
adcl -112(%esi), %eax
adcl -108(%esi), %eax
adcl -104(%esi), %eax
adcl -100(%esi), %eax
adcl -96(%esi), %eax
adcl -92(%esi), %eax
adcl -88(%esi), %eax
adcl -84(%esi), %eax
adcl -80(%esi), %eax
adcl -76(%esi), %eax
adcl -72(%esi), %eax
adcl -68(%esi), %eax
adcl -64(%esi), %eax
adcl -60(%esi), %eax
adcl -56(%esi), %eax
adcl -52(%esi), %eax
adcl -48(%esi), %eax
adcl -44(%esi), %eax
adcl -40(%esi), %eax
adcl -36(%esi), %eax
adcl -32(%esi), %eax
adcl -28(%esi), %eax
adcl -24(%esi), %eax
adcl -20(%esi), %eax
adcl -16(%esi), %eax
adcl -12(%esi), %eax
adcl -8(%esi), %eax
adcl -4(%esi), %eax
45:
lea 128(%esi), %esi
adcl $0, %eax
dec %ecx
jge 40b
movl %edx, %ecx
50: andl $3, %ecx
jz 80f
# Handle the last 1-3 bytes without jumping
notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
movl $0xffffff,%ebx # by the shll and shrl instructions
shll $3,%ecx
shrl %cl,%ebx
andl -128(%esi),%ebx # esi is 4-aligned so should be ok
addl %ebx,%eax
adcl $0,%eax
80:
testl $1, 12(%esp)
jz 90f
roll $8, %eax
90:
popl %ebx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx
popl %esi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
#endif
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst,
int len, int sum, int *src_err_ptr, int *dst_err_ptr)
*/
/*
* Copy from ds while checksumming, otherwise like csum_partial
*
* The macros SRC and DST specify the type of access for the instruction.
* thus we can call a custom exception handler for all access types.
*
* FIXME: could someone double-check whether I haven't mixed up some SRC and
* DST definitions? It's damn hard to trigger all cases. I hope I got
* them all but there's no guarantee.
*/
#define SRC(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6001f ; \
.previous
#define DST(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6002f ; \
.previous
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
#define ARGBASE 16
#define FP 12
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
movl ARGBASE+8(%esp),%edi # dst
testl $2, %edi # Check alignment.
jz 2f # Jump if alignment is ok.
subl $2, %ecx # Alignment uses up two bytes.
jae 1f # Jump if we had at least two bytes.
addl $2, %ecx # ecx was < 2. Deal with it.
jmp 4f
SRC(1: movw (%esi), %bx )
addl $2, %esi
DST( movw %bx, (%edi) )
addl $2, %edi
addw %bx, %ax
adcl $0, %eax
2:
movl %ecx, FP(%esp)
shrl $5, %ecx
jz 2f
testl %esi, %esi
SRC(1: movl (%esi), %ebx )
SRC( movl 4(%esi), %edx )
adcl %ebx, %eax
DST( movl %ebx, (%edi) )
adcl %edx, %eax
DST( movl %edx, 4(%edi) )
SRC( movl 8(%esi), %ebx )
SRC( movl 12(%esi), %edx )
adcl %ebx, %eax
DST( movl %ebx, 8(%edi) )
adcl %edx, %eax
DST( movl %edx, 12(%edi) )
SRC( movl 16(%esi), %ebx )
SRC( movl 20(%esi), %edx )
adcl %ebx, %eax
DST( movl %ebx, 16(%edi) )
adcl %edx, %eax
DST( movl %edx, 20(%edi) )
SRC( movl 24(%esi), %ebx )
SRC( movl 28(%esi), %edx )
adcl %ebx, %eax
DST( movl %ebx, 24(%edi) )
adcl %edx, %eax
DST( movl %edx, 28(%edi) )
lea 32(%esi), %esi
lea 32(%edi), %edi
dec %ecx
jne 1b
adcl $0, %eax
2: movl FP(%esp), %edx
movl %edx, %ecx
andl $0x1c, %edx
je 4f
shrl $2, %edx # This clears CF
SRC(3: movl (%esi), %ebx )
adcl %ebx, %eax
DST( movl %ebx, (%edi) )
lea 4(%esi), %esi
lea 4(%edi), %edi
dec %edx
jne 3b
adcl $0, %eax
4: andl $3, %ecx
jz 7f
cmpl $2, %ecx
jb 5f
SRC( movw (%esi), %cx )
leal 2(%esi), %esi
DST( movw %cx, (%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%ecx
SRC(5: movb (%esi), %cl )
DST( movb %cl, (%edi) )
6: addl %ecx, %eax
adcl $0, %eax
7:
5000:
# Exception handler:
.section .fixup, "ax"
6001:
movl ARGBASE+20(%esp), %ebx # src_err_ptr
movl $-EFAULT, (%ebx)
# zero the complete destination - computing the rest
# is too much work
movl ARGBASE+8(%esp), %edi # dst
movl ARGBASE+12(%esp), %ecx # len
xorl %eax,%eax
rep ; stosb
jmp 5000b
6002:
movl ARGBASE+24(%esp), %ebx # dst_err_ptr
movl $-EFAULT,(%ebx)
jmp 5000b
.previous
popl %ebx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx
popl %esi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi
popl %edi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edi
popl %ecx # equivalent to addl $4,%esp
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
#else
/* Version for PentiumII/PPro */
#define ROUND1(x) \
SRC(movl x(%esi), %ebx ) ; \
addl %ebx, %eax ; \
DST(movl %ebx, x(%edi) ) ;
#define ROUND(x) \
SRC(movl x(%esi), %ebx ) ; \
adcl %ebx, %eax ; \
DST(movl %ebx, x(%edi) ) ;
#define ARGBASE 12
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
pushl %ebx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
pushl %edi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edi, 0
pushl %esi
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET esi, 0
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
movl ARGBASE+16(%esp),%eax #sum
# movl %ecx, %edx
movl %ecx, %ebx
movl %esi, %edx
shrl $6, %ecx
andl $0x3c, %ebx
negl %ebx
subl %ebx, %esi
subl %ebx, %edi
lea -1(%esi),%edx
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
jmp *%ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
3: adcl $0,%eax
addl $64, %edx
dec %ecx
jge 1b
4: movl ARGBASE+12(%esp),%edx #len
andl $3, %edx
jz 7f
cmpl $2, %edx
jb 5f
SRC( movw (%esi), %dx )
leal 2(%esi), %esi
DST( movw %dx, (%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%edx
5:
SRC( movb (%esi), %dl )
DST( movb %dl, (%edi) )
6: addl %edx, %eax
adcl $0, %eax
7:
.section .fixup, "ax"
6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
movl $-EFAULT, (%ebx)
# zero the complete destination (computing the rest is too much work)
movl ARGBASE+8(%esp),%edi # dst
movl ARGBASE+12(%esp),%ecx # len
xorl %eax,%eax
rep; stosb
jmp 7b
6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
movl $-EFAULT, (%ebx)
jmp 7b
.previous
popl %esi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE esi
popl %edi
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE edi
popl %ebx
CFI_ADJUST_CFA_OFFSET -4
CFI_RESTORE ebx
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
#undef ROUND
#undef ROUND1
#endif

View File

@@ -0,0 +1,58 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
/*
* Zero a page.
* rdi page
*/
ENTRY(clear_page_c)
CFI_STARTPROC
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
CFI_ENDPROC
ENDPROC(clear_page_c)
ENTRY(clear_page)
CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
.Lloop:
decl %ecx
#define PUT(x) movq %rax,x*8(%rdi)
movq %rax,(%rdi)
PUT(1)
PUT(2)
PUT(3)
PUT(4)
PUT(5)
PUT(6)
PUT(7)
leaq 64(%rdi),%rdi
jnz .Lloop
nop
ret
CFI_ENDPROC
.Lclear_page_end:
ENDPROC(clear_page)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad clear_page
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lclear_page_end - clear_page
.byte 2b - 1b
.previous

View File

@@ -0,0 +1,57 @@
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*
*/
#include <linux/linkage.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>
.text
/*
* Inputs:
* %esi : memory location to compare
* %eax : low 32 bits of old value
* %edx : high 32 bits of old value
* %ebx : low 32 bits of new value
* %ecx : high 32 bits of new value
*/
ENTRY(cmpxchg8b_emu)
CFI_STARTPROC
#
# Emulate 'cmpxchg8b (%esi)' on UP except we don't
# set the whole ZF thing (caller will just compare
# eax:edx with the expected value)
#
cmpxchg8b_emu:
pushfl
cli
cmpl (%esi), %eax
jne not_same
cmpl 4(%esi), %edx
jne half_same
movl %ebx, (%esi)
movl %ecx, 4(%esi)
popfl
ret
not_same:
movl (%esi), %eax
half_same:
movl 4(%esi), %edx
popfl
ret
CFI_ENDPROC
ENDPROC(cmpxchg8b_emu)

View File

@@ -0,0 +1,119 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/linkage.h>
#include <asm/dwarf2.h>
ALIGN
copy_page_c:
CFI_STARTPROC
movl $4096/8,%ecx
rep movsq
ret
CFI_ENDPROC
ENDPROC(copy_page_c)
/* Don't use streaming store because it's better when the target
ends up in cache. */
/* Could vary the prefetch distance based on SMP/UP */
ENTRY(copy_page)
CFI_STARTPROC
subq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET 3*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
.Loop64:
dec %rcx
movq (%rsi), %rax
movq 8 (%rsi), %rbx
movq 16 (%rsi), %rdx
movq 24 (%rsi), %r8
movq 32 (%rsi), %r9
movq 40 (%rsi), %r10
movq 48 (%rsi), %r11
movq 56 (%rsi), %r12
prefetcht0 5*64(%rsi)
movq %rax, (%rdi)
movq %rbx, 8 (%rdi)
movq %rdx, 16 (%rdi)
movq %r8, 24 (%rdi)
movq %r9, 32 (%rdi)
movq %r10, 40 (%rdi)
movq %r11, 48 (%rdi)
movq %r12, 56 (%rdi)
leaq 64 (%rsi), %rsi
leaq 64 (%rdi), %rdi
jnz .Loop64
movl $5,%ecx
.p2align 4
.Loop2:
decl %ecx
movq (%rsi), %rax
movq 8 (%rsi), %rbx
movq 16 (%rsi), %rdx
movq 24 (%rsi), %r8
movq 32 (%rsi), %r9
movq 40 (%rsi), %r10
movq 48 (%rsi), %r11
movq 56 (%rsi), %r12
movq %rax, (%rdi)
movq %rbx, 8 (%rdi)
movq %rdx, 16 (%rdi)
movq %r8, 24 (%rdi)
movq %r9, 32 (%rdi)
movq %r10, 40 (%rdi)
movq %r11, 48 (%rdi)
movq %r12, 56 (%rdi)
leaq 64(%rdi),%rdi
leaq 64(%rsi),%rsi
jnz .Loop2
movq (%rsp),%rbx
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
movq 2*8(%rsp),%r13
CFI_RESTORE r13
addq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET -3*8
ret
.Lcopy_page_end:
CFI_ENDPROC
ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad copy_page
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lcopy_page_end - copy_page
.byte 2b - 1b
.previous

View File

@@ -0,0 +1,269 @@
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v2.
*
* Functions to copy from and to user space.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
.macro ALTERNATIVE_JUMP feature,orig,alt
0:
.byte 0xe9 /* 32bit jump */
.long \orig-1f /* by default jump to orig */
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
.long \alt-1b /* offset */ /* or alternatively to alt */
.previous
.section .altinstructions,"a"
.align 8
.quad 0b
.quad 2b
.byte \feature /* when feature is set */
.byte 5
.byte 5
.previous
.endm
.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jz 102f /* already aligned */
subl $8,%ecx
negl %ecx
subl %ecx,%edx
100: movb (%rsi),%al
101: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 100b
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
.section __ex_table,"a"
.align 8
.quad 100b,103b
.quad 101b,103b
.previous
#endif
.endm
/* Standard copy_to_user with segment limit checking */
ENTRY(copy_to_user)
CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rdi,%rcx
addq %rdx,%rcx
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_to_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
ENDPROC(copy_to_user)
/* Standard copy_from_user with segment limit checking */
ENTRY(copy_from_user)
CFI_STARTPROC
GET_THREAD_INFO(%rax)
movq %rsi,%rcx
addq %rdx,%rcx
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_from_user
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
ENDPROC(copy_from_user)
ENTRY(copy_user_generic)
CFI_STARTPROC
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
ENDPROC(copy_user_generic)
ENTRY(__copy_from_user_inatomic)
CFI_STARTPROC
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC
ENDPROC(__copy_from_user_inatomic)
.section .fixup,"ax"
/* must zero dest */
ENTRY(bad_from_user)
bad_from_user:
CFI_STARTPROC
movl %edx,%ecx
xorl %eax,%eax
rep
stosb
bad_to_user:
movl %edx,%eax
ret
CFI_ENDPROC
ENDPROC(bad_from_user)
.previous
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
* code for rep movsq
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successfull.
*/
ENTRY(copy_user_generic_unrolled)
CFI_STARTPROC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz 17f
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movq %r8,(%rdi)
6: movq %r9,1*8(%rdi)
7: movq %r10,2*8(%rdi)
8: movq %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movq %r8,4*8(%rdi)
14: movq %r9,5*8(%rdi)
15: movq %r10,6*8(%rdi)
16: movq %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
17: movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
18: movq (%rsi),%r8
19: movq %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz 18b
20: andl %edx,%edx
jz 23f
movl %edx,%ecx
21: movb (%rsi),%al
22: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 21b
23: xor %eax,%eax
ret
.section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
jmp 60f
40: lea (%rdx,%rcx,8),%rdx
jmp 60f
50: movl %ecx,%edx
60: jmp copy_user_handle_tail /* ecx is zerorest also */
.previous
.section __ex_table,"a"
.align 8
.quad 1b,30b
.quad 2b,30b
.quad 3b,30b
.quad 4b,30b
.quad 5b,30b
.quad 6b,30b
.quad 7b,30b
.quad 8b,30b
.quad 9b,30b
.quad 10b,30b
.quad 11b,30b
.quad 12b,30b
.quad 13b,30b
.quad 14b,30b
.quad 15b,30b
.quad 16b,30b
.quad 18b,40b
.quad 19b,40b
.quad 21b,50b
.quad 22b,50b
.previous
CFI_ENDPROC
ENDPROC(copy_user_generic_unrolled)
/* Some CPUs run faster using the string copy instructions.
* This is also a lot simpler. Use them when possible.
*
* Only 4GB of copy is supported. This shouldn't be a problem
* because the kernel normally only writes from/to page sized chunks
* even if user space passed a longer buffer.
* And more would be dangerous because both Intel and AMD have
* errata with rep movsq > 4GB. If someone feels the need to fix
* this please consider this.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successful.
*/
ENTRY(copy_user_generic_string)
CFI_STARTPROC
andl %edx,%edx
jz 4f
cmpl $8,%edx
jb 2f /* less than 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
1: rep
movsq
2: movl %edx,%ecx
3: rep
movsb
4: xorl %eax,%eax
ret
.section .fixup,"ax"
11: lea (%rdx,%rcx,8),%rcx
12: movl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
.section __ex_table,"a"
.align 8
.quad 1b,11b
.quad 3b,12b
.previous
CFI_ENDPROC
ENDPROC(copy_user_generic_string)

View File

@@ -0,0 +1,137 @@
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v2.
*
* Functions to copy from and to user space.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#define FIX_ALIGNMENT 1
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
jz 102f /* already aligned */
subl $8,%ecx
negl %ecx
subl %ecx,%edx
100: movb (%rsi),%al
101: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 100b
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
jmp copy_user_handle_tail
.previous
.section __ex_table,"a"
.align 8
.quad 100b,103b
.quad 101b,103b
.previous
#endif
.endm
/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination/source out of cache for more performance.
*/
ENTRY(__copy_user_nocache)
CFI_STARTPROC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
ALIGN_DESTINATION
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
jz 17f
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
4: movq 3*8(%rsi),%r11
5: movnti %r8,(%rdi)
6: movnti %r9,1*8(%rdi)
7: movnti %r10,2*8(%rdi)
8: movnti %r11,3*8(%rdi)
9: movq 4*8(%rsi),%r8
10: movq 5*8(%rsi),%r9
11: movq 6*8(%rsi),%r10
12: movq 7*8(%rsi),%r11
13: movnti %r8,4*8(%rdi)
14: movnti %r9,5*8(%rdi)
15: movnti %r10,6*8(%rdi)
16: movnti %r11,7*8(%rdi)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
17: movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
18: movq (%rsi),%r8
19: movnti %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
jnz 18b
20: andl %edx,%edx
jz 23f
movl %edx,%ecx
21: movb (%rsi),%al
22: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz 21b
23: xorl %eax,%eax
sfence
ret
.section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
jmp 60f
40: lea (%rdx,%rcx,8),%rdx
jmp 60f
50: movl %ecx,%edx
60: sfence
jmp copy_user_handle_tail
.previous
.section __ex_table,"a"
.quad 1b,30b
.quad 2b,30b
.quad 3b,30b
.quad 4b,30b
.quad 5b,30b
.quad 6b,30b
.quad 7b,30b
.quad 8b,30b
.quad 9b,30b
.quad 10b,30b
.quad 11b,30b
.quad 12b,30b
.quad 13b,30b
.quad 14b,30b
.quad 15b,30b
.quad 16b,30b
.quad 18b,40b
.quad 19b,40b
.quad 21b,50b
.quad 22b,50b
.previous
CFI_ENDPROC
ENDPROC(__copy_user_nocache)

View File

@@ -0,0 +1,249 @@
/*
* Copyright 2002,2003 Andi Kleen, SuSE Labs.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>
/*
* Checksum copy with exception handling.
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
* destination is zeroed.
*
* Input
* rdi source
* rsi destination
* edx len (32bit)
* ecx sum (32bit)
* r8 src_err_ptr (int)
* r9 dst_err_ptr (int)
*
* Output
* eax 64bit sum. undefined in case of exception.
*
* Wrappers need to take care of valid exception sum and zeroing.
* They also should align source or destination to 8 bytes.
*/
.macro source
10:
.section __ex_table,"a"
.align 8
.quad 10b,.Lbad_source
.previous
.endm
.macro dest
20:
.section __ex_table,"a"
.align 8
.quad 20b,.Lbad_dest
.previous
.endm
.macro ignore L=.Lignore
30:
.section __ex_table,"a"
.align 8
.quad 30b,\L
.previous
.endm
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
cmpl $3*64,%edx
jle .Lignore
.Lignore:
subq $7*8,%rsp
CFI_ADJUST_CFA_OFFSET 7*8
movq %rbx,2*8(%rsp)
CFI_REL_OFFSET rbx, 2*8
movq %r12,3*8(%rsp)
CFI_REL_OFFSET r12, 3*8
movq %r14,4*8(%rsp)
CFI_REL_OFFSET r14, 4*8
movq %r13,5*8(%rsp)
CFI_REL_OFFSET r13, 5*8
movq %rbp,6*8(%rsp)
CFI_REL_OFFSET rbp, 6*8
movq %r8,(%rsp)
movq %r9,1*8(%rsp)
movl %ecx,%eax
movl %edx,%ecx
xorl %r9d,%r9d
movq %rcx,%r12
shrq $6,%r12
jz .Lhandle_tail /* < 64 */
clc
/* main loop. clear in 64 byte blocks */
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
/* r11: temp3, rdx: temp4, r12 loopcnt */
/* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
.p2align 4
.Lloop:
source
movq (%rdi),%rbx
source
movq 8(%rdi),%r8
source
movq 16(%rdi),%r11
source
movq 24(%rdi),%rdx
source
movq 32(%rdi),%r10
source
movq 40(%rdi),%rbp
source
movq 48(%rdi),%r14
source
movq 56(%rdi),%r13
ignore 2f
prefetcht0 5*64(%rdi)
2:
adcq %rbx,%rax
adcq %r8,%rax
adcq %r11,%rax
adcq %rdx,%rax
adcq %r10,%rax
adcq %rbp,%rax
adcq %r14,%rax
adcq %r13,%rax
decl %r12d
dest
movq %rbx,(%rsi)
dest
movq %r8,8(%rsi)
dest
movq %r11,16(%rsi)
dest
movq %rdx,24(%rsi)
dest
movq %r10,32(%rsi)
dest
movq %rbp,40(%rsi)
dest
movq %r14,48(%rsi)
dest
movq %r13,56(%rsi)
3:
leaq 64(%rdi),%rdi
leaq 64(%rsi),%rsi
jnz .Lloop
adcq %r9,%rax
/* do last upto 56 bytes */
.Lhandle_tail:
/* ecx: count */
movl %ecx,%r10d
andl $63,%ecx
shrl $3,%ecx
jz .Lfold
clc
.p2align 4
.Lloop_8:
source
movq (%rdi),%rbx
adcq %rbx,%rax
decl %ecx
dest
movq %rbx,(%rsi)
leaq 8(%rsi),%rsi /* preserve carry */
leaq 8(%rdi),%rdi
jnz .Lloop_8
adcq %r9,%rax /* add in carry */
.Lfold:
/* reduce checksum to 32bits */
movl %eax,%ebx
shrq $32,%rax
addl %ebx,%eax
adcl %r9d,%eax
/* do last upto 6 bytes */
.Lhandle_7:
movl %r10d,%ecx
andl $7,%ecx
shrl $1,%ecx
jz .Lhandle_1
movl $2,%edx
xorl %ebx,%ebx
clc
.p2align 4
.Lloop_1:
source
movw (%rdi),%bx
adcl %ebx,%eax
decl %ecx
dest
movw %bx,(%rsi)
leaq 2(%rdi),%rdi
leaq 2(%rsi),%rsi
jnz .Lloop_1
adcl %r9d,%eax /* add in carry */
/* handle last odd byte */
.Lhandle_1:
testl $1,%r10d
jz .Lende
xorl %ebx,%ebx
source
movb (%rdi),%bl
dest
movb %bl,(%rsi)
addl %ebx,%eax
adcl %r9d,%eax /* carry */
CFI_REMEMBER_STATE
.Lende:
movq 2*8(%rsp),%rbx
CFI_RESTORE rbx
movq 3*8(%rsp),%r12
CFI_RESTORE r12
movq 4*8(%rsp),%r14
CFI_RESTORE r14
movq 5*8(%rsp),%r13
CFI_RESTORE r13
movq 6*8(%rsp),%rbp
CFI_RESTORE rbp
addq $7*8,%rsp
CFI_ADJUST_CFA_OFFSET -7*8
ret
CFI_RESTORE_STATE
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
movq (%rsp),%rax
testq %rax,%rax
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
.Lbad_dest:
movq 8(%rsp),%rax
testq %rax,%rax
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)

View File

@@ -0,0 +1,148 @@
/*
* arch/x86_64/lib/csum-partial.c
*
* This file contains network checksum routines that are better done
* in an architecture-specific manner due to speed.
*/
#include <linux/compiler.h>
#include <linux/module.h>
#include <asm/checksum.h>
static inline unsigned short from32to16(unsigned a)
{
unsigned short b = a >> 16;
asm("addw %w2,%w0\n\t"
"adcw $0,%w0\n"
: "=r" (b)
: "0" (b), "r" (a));
return b;
}
/*
* Do a 64-bit checksum on an arbitrary memory area.
* Returns a 32bit checksum.
*
* This isn't as time critical as it used to be because many NICs
* do hardware checksumming these days.
*
* Things tried and found to not make it faster:
* Manual Prefetching
* Unrolling to an 128 bytes inner loop.
* Using interleaving with more registers to break the carry chains.
*/
static unsigned do_csum(const unsigned char *buff, unsigned len)
{
unsigned odd, count;
unsigned long result = 0;
if (unlikely(len == 0))
return result;
odd = 1 & (unsigned long) buff;
if (unlikely(odd)) {
result = *buff << 8;
len--;
buff++;
}
count = len >> 1; /* nr of 16-bit words.. */
if (count) {
if (2 & (unsigned long) buff) {
result += *(unsigned short *)buff;
count--;
len -= 2;
buff += 2;
}
count >>= 1; /* nr of 32-bit words.. */
if (count) {
unsigned long zero;
unsigned count64;
if (4 & (unsigned long) buff) {
result += *(unsigned int *) buff;
count--;
len -= 4;
buff += 4;
}
count >>= 1; /* nr of 64-bit words.. */
/* main loop using 64byte blocks */
zero = 0;
count64 = count >> 3;
while (count64) {
asm("addq 0*8(%[src]),%[res]\n\t"
"adcq 1*8(%[src]),%[res]\n\t"
"adcq 2*8(%[src]),%[res]\n\t"
"adcq 3*8(%[src]),%[res]\n\t"
"adcq 4*8(%[src]),%[res]\n\t"
"adcq 5*8(%[src]),%[res]\n\t"
"adcq 6*8(%[src]),%[res]\n\t"
"adcq 7*8(%[src]),%[res]\n\t"
"adcq %[zero],%[res]"
: [res] "=r" (result)
: [src] "r" (buff), [zero] "r" (zero),
"[res]" (result));
buff += 64;
count64--;
}
/* last upto 7 8byte blocks */
count %= 8;
while (count) {
asm("addq %1,%0\n\t"
"adcq %2,%0\n"
: "=r" (result)
: "m" (*(unsigned long *)buff),
"r" (zero), "0" (result));
--count;
buff += 8;
}
result = add32_with_carry(result>>32,
result&0xffffffff);
if (len & 4) {
result += *(unsigned int *) buff;
buff += 4;
}
}
if (len & 2) {
result += *(unsigned short *) buff;
buff += 2;
}
}
if (len & 1)
result += *buff;
result = add32_with_carry(result>>32, result & 0xffffffff);
if (unlikely(odd)) {
result = from32to16(result);
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
}
return result;
}
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
*
* returns a 32-bit number suitable for feeding into itself
* or csum_tcpudp_magic
*
* this function must be called with even lengths, except
* for the last fragment, which may be odd
*
* it's best to have buff aligned on a 64-bit boundary
*/
__wsum csum_partial(const void *buff, int len, __wsum sum)
{
return (__force __wsum)add32_with_carry(do_csum(buff, len),
(__force u32)sum);
}
/*
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
*/
__sum16 ip_compute_csum(const void *buff, int len)
{
return csum_fold(csum_partial(buff,len,0));
}
EXPORT_SYMBOL(ip_compute_csum);

View File

@@ -0,0 +1,150 @@
/*
* Copyright 2002, 2003 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v.2
*
* Wrappers of assembly checksum functions for x86-64.
*/
#include <asm/checksum.h>
#include <linux/module.h>
/**
* csum_partial_copy_from_user - Copy and checksum from user space.
* @src: source address (user space)
* @dst: destination address
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
* @errp: set to -EFAULT for an bad source address.
*
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
*/
__wsum
csum_partial_copy_from_user(const void __user *src, void *dst,
int len, __wsum isum, int *errp)
{
might_sleep();
*errp = 0;
if (!likely(access_ok(VERIFY_READ, src, len)))
goto out_err;
/*
* Why 6, not 7? To handle odd addresses aligned we
* would need to do considerable complications to fix the
* checksum which is defined as an 16bit accumulator. The
* fix alignment code is primarily for performance
* compatibility with 32bit and that will handle odd
* addresses slowly too.
*/
if (unlikely((unsigned long)src & 6)) {
while (((unsigned long)src & 6) && len >= 2) {
__u16 val16;
*errp = __get_user(val16, (const __u16 __user *)src);
if (*errp)
return isum;
*(__u16 *)dst = val16;
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
src += 2;
dst += 2;
len -= 2;
}
}
isum = csum_partial_copy_generic((__force const void *)src,
dst, len, isum, errp, NULL);
if (unlikely(*errp))
goto out_err;
return isum;
out_err:
*errp = -EFAULT;
memset(dst, 0, len);
return isum;
}
EXPORT_SYMBOL(csum_partial_copy_from_user);
/**
* csum_partial_copy_to_user - Copy and checksum to user space.
* @src: source address
* @dst: destination address (user space)
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
* @errp: set to -EFAULT for an bad destination address.
*
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
*/
__wsum
csum_partial_copy_to_user(const void *src, void __user *dst,
int len, __wsum isum, int *errp)
{
might_sleep();
if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
*errp = -EFAULT;
return 0;
}
if (unlikely((unsigned long)dst & 6)) {
while (((unsigned long)dst & 6) && len >= 2) {
__u16 val16 = *(__u16 *)src;
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
*errp = __put_user(val16, (__u16 __user *)dst);
if (*errp)
return isum;
src += 2;
dst += 2;
len -= 2;
}
}
*errp = 0;
return csum_partial_copy_generic(src, (void __force *)dst,
len, isum, NULL, errp);
}
EXPORT_SYMBOL(csum_partial_copy_to_user);
/**
* csum_partial_copy_nocheck - Copy and checksum.
* @src: source address
* @dst: destination address
* @len: number of bytes to be copied.
* @isum: initial sum that is added into the result (32bit unfolded)
*
* Returns an 32bit unfolded checksum of the buffer.
*/
__wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
{
return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, unsigned short proto, __wsum sum)
{
__u64 rest, sum64;
rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
(__force __u64)sum;
asm(" addq (%[saddr]),%[sum]\n"
" adcq 8(%[saddr]),%[sum]\n"
" adcq (%[daddr]),%[sum]\n"
" adcq 8(%[daddr]),%[sum]\n"
" adcq $0,%[sum]\n"
: [sum] "=r" (sum64)
: "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));
return csum_fold(
(__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
}
EXPORT_SYMBOL(csum_ipv6_magic);

140
kernel/arch/x86/lib/delay.c Normal file
View File

@@ -0,0 +1,140 @@
/*
* Precise Delay Loops for i386
*
* Copyright (C) 1993 Linus Torvalds
* Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
* Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
*
* The __delay function must _NOT_ be inlined as its execution time
* depends wildly on alignment on many x86 processors. The additional
* jump magic is needed to get the timing stable on all the CPU's
* we have to worry about.
*/
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
#ifdef CONFIG_SMP
# include <asm/smp.h>
#endif
/* simple loop based delay: */
static void delay_loop(unsigned long loops)
{
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
" jmp 1f \n"
".align 16 \n"
"1: jmp 2f \n"
".align 16 \n"
"2: dec %0 \n"
" jnz 2b \n"
"3: dec %0 \n"
: /* we don't need output */
:"a" (loops)
);
}
/* TSC based delay: */
static void delay_tsc(unsigned long loops)
{
unsigned long bclock, now;
int cpu;
preempt_disable();
cpu = smp_processor_id();
rdtsc_barrier();
rdtscl(bclock);
for (;;) {
rdtsc_barrier();
rdtscl(now);
if ((now - bclock) >= loops)
break;
/* Allow RT tasks to run */
preempt_enable();
rep_nop();
preempt_disable();
/*
* It is possible that we moved to another CPU, and
* since TSC's are per-cpu we need to calculate
* that. The delay must guarantee that we wait "at
* least" the amount of time. Being moved to another
* CPU could make the wait longer but we just need to
* make sure we waited long enough. Rebalance the
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
loops -= (now - bclock);
cpu = smp_processor_id();
rdtsc_barrier();
rdtscl(bclock);
}
}
preempt_enable();
}
/*
* Since we calibrate only once at boot, this
* function should be set once at boot and not changed
*/
static void (*delay_fn)(unsigned long) = delay_loop;
void use_tsc_delay(void)
{
delay_fn = delay_tsc;
}
int __devinit read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscll(*timer_val);
return 0;
}
return -1;
}
void __delay(unsigned long loops)
{
delay_fn(loops);
}
EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
int d0;
xloops *= 4;
asm("mull %%edx"
:"=d" (xloops), "=&a" (d0)
:"1" (xloops), "0"
(cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
__delay(++xloops);
}
EXPORT_SYMBOL(__const_udelay);
void __udelay(unsigned long usecs)
{
__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
}
EXPORT_SYMBOL(__udelay);
void __ndelay(unsigned long nsecs)
{
__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
}
EXPORT_SYMBOL(__ndelay);

View File

@@ -0,0 +1,104 @@
/*
* __get_user functions.
*
* (C) Copyright 1998 Linus Torvalds
* (C) Copyright 2005 Andi Kleen
* (C) Copyright 2008 Glauber Costa
*
* These functions have a non-standard call interface
* to make them more efficient, especially as they
* return an error value in addition to the "real"
* return value.
*/
/*
* __get_user_X
*
* Inputs: %[r|e]ax contains the address.
* The register is modified, but all changes are undone
* before returning because the C code doesn't know about it.
*
* Outputs: %[r|e]ax is error code (0 or -EFAULT)
* %[r|e]dx contains zero-extended value
*
*
* These functions should not modify any other registers,
* as they get called from within inline assembly.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/page_types.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/asm.h>
.text
ENTRY(__get_user_1)
CFI_STARTPROC
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
1: movzb (%_ASM_AX),%edx
xor %eax,%eax
ret
CFI_ENDPROC
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
CFI_STARTPROC
add $1,%_ASM_AX
jc bad_get_user
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
2: movzwl -1(%_ASM_AX),%edx
xor %eax,%eax
ret
CFI_ENDPROC
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
CFI_STARTPROC
add $3,%_ASM_AX
jc bad_get_user
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
3: mov -3(%_ASM_AX),%edx
xor %eax,%eax
ret
CFI_ENDPROC
ENDPROC(__get_user_4)
#ifdef CONFIG_X86_64
ENTRY(__get_user_8)
CFI_STARTPROC
add $7,%_ASM_AX
jc bad_get_user
GET_THREAD_INFO(%_ASM_DX)
cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
4: movq -7(%_ASM_AX),%_ASM_DX
xor %eax,%eax
ret
CFI_ENDPROC
ENDPROC(__get_user_8)
#endif
bad_get_user:
CFI_STARTPROC
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
ret
CFI_ENDPROC
END(bad_get_user)
.section __ex_table,"a"
_ASM_PTR 1b,bad_get_user
_ASM_PTR 2b,bad_get_user
_ASM_PTR 3b,bad_get_user
#ifdef CONFIG_X86_64
_ASM_PTR 4b,bad_get_user
#endif

View File

@@ -0,0 +1,25 @@
#include <linux/string.h>
#include <linux/module.h>
#include <asm/io.h>
void __memcpy_toio(unsigned long dst, const void *src, unsigned len)
{
__inline_memcpy((void *)dst, src, len);
}
EXPORT_SYMBOL(__memcpy_toio);
void __memcpy_fromio(void *dst, unsigned long src, unsigned len)
{
__inline_memcpy(dst, (const void *)src, len);
}
EXPORT_SYMBOL(__memcpy_fromio);
void memset_io(volatile void __iomem *a, int b, size_t c)
{
/*
* TODO: memset can mangle the IO patterns quite a bit.
* perhaps it would be better to use a dumb one:
*/
memset((void *)a, b, c);
}
EXPORT_SYMBOL(memset_io);

View File

@@ -0,0 +1,30 @@
/*
* Copyright 2006 PathScale, Inc. All Rights Reserved.
*
* This file is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
/*
* override generic version in lib/iomap_copy.c
*/
ENTRY(__iowrite32_copy)
CFI_STARTPROC
movl %edx,%ecx
rep movsd
ret
CFI_ENDPROC
ENDPROC(__iowrite32_copy)

View File

@@ -0,0 +1,43 @@
#include <linux/string.h>
#include <linux/module.h>
#undef memcpy
#undef memset
void *memcpy(void *to, const void *from, size_t n)
{
#ifdef CONFIG_X86_USE_3DNOW
return __memcpy3d(to, from, n);
#else
return __memcpy(to, from, n);
#endif
}
EXPORT_SYMBOL(memcpy);
void *memset(void *s, int c, size_t count)
{
return __memset(s, c, count);
}
EXPORT_SYMBOL(memset);
void *memmove(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
if (dest < src) {
memcpy(dest, src, n);
} else {
__asm__ __volatile__(
"std\n\t"
"rep\n\t"
"movsb\n\t"
"cld"
: "=&c" (d0), "=&S" (d1), "=&D" (d2)
:"0" (n),
"1" (n-1+src),
"2" (n-1+dest)
:"memory");
}
return dest;
}
EXPORT_SYMBOL(memmove);

View File

@@ -0,0 +1,150 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
/*
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*/
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
* Calls to this get patched into the kernel image via the
* alternative instructions framework:
*/
ALIGN
memcpy_c:
CFI_STARTPROC
movq %rdi, %rax
movl %edx, %ecx
shrl $3, %ecx
andl $7, %edx
rep movsq
movl %edx, %ecx
rep movsb
ret
CFI_ENDPROC
ENDPROC(memcpy_c)
ENTRY(__memcpy)
ENTRY(memcpy)
CFI_STARTPROC
/*
* Put the number of full 64-byte blocks into %ecx.
* Tail portion is handled at the end:
*/
movq %rdi, %rax
movl %edx, %ecx
shrl $6, %ecx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
/*
* We decrement the loop index here - and the zero-flag is
* checked at the end of the loop (instructions inbetween do
* not change the zero flag):
*/
decl %ecx
/*
* Move in blocks of 4x16 bytes:
*/
movq 0*8(%rsi), %r11
movq 1*8(%rsi), %r8
movq %r11, 0*8(%rdi)
movq %r8, 1*8(%rdi)
movq 2*8(%rsi), %r9
movq 3*8(%rsi), %r10
movq %r9, 2*8(%rdi)
movq %r10, 3*8(%rdi)
movq 4*8(%rsi), %r11
movq 5*8(%rsi), %r8
movq %r11, 4*8(%rdi)
movq %r8, 5*8(%rdi)
movq 6*8(%rsi), %r9
movq 7*8(%rsi), %r10
movq %r9, 6*8(%rdi)
movq %r10, 7*8(%rdi)
leaq 64(%rsi), %rsi
leaq 64(%rdi), %rdi
jnz .Lloop_64
.Lhandle_tail:
movl %edx, %ecx
andl $63, %ecx
shrl $3, %ecx
jz .Lhandle_7
.p2align 4
.Lloop_8:
decl %ecx
movq (%rsi), %r8
movq %r8, (%rdi)
leaq 8(%rdi), %rdi
leaq 8(%rsi), %rsi
jnz .Lloop_8
.Lhandle_7:
movl %edx, %ecx
andl $7, %ecx
jz .Lend
.p2align 4
.Lloop_1:
movb (%rsi), %r8b
movb %r8b, (%rdi)
incq %rdi
incq %rsi
decl %ecx
jnz .Lloop_1
.Lend:
ret
CFI_ENDPROC
ENDPROC(memcpy)
ENDPROC(__memcpy)
/*
* Some CPUs run faster using the string copy instructions.
* It is also a lot simpler. Use this when possible:
*/
.section .altinstr_replacement, "ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions, "a"
.align 8
.quad memcpy
.quad 1b
.byte X86_FEATURE_REP_GOOD
/*
* Replace only beginning, memcpy is used to apply alternatives,
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
.byte 2b - 1b
.byte 2b - 1b
.previous

View File

@@ -0,0 +1,21 @@
/* Normally compiler builtins are used, but sometimes the compiler calls out
of line code. Based on asm-i386/string.h.
*/
#define _STRING_C
#include <linux/string.h>
#include <linux/module.h>
#undef memmove
void *memmove(void *dest, const void *src, size_t count)
{
if (dest < src) {
return memcpy(dest, src, count);
} else {
char *p = dest + count;
const char *s = src + count;
while (count--)
*--p = *--s;
}
return dest;
}
EXPORT_SYMBOL(memmove);

View File

@@ -0,0 +1,133 @@
/* Copyright 2002 Andi Kleen, SuSE Labs */
#include <linux/linkage.h>
#include <asm/dwarf2.h>
/*
* ISO C memset - set a memory block to a byte value.
*
* rdi destination
* rsi value (char)
* rdx count (bytes)
*
* rax original destination
*/
ALIGN
memset_c:
CFI_STARTPROC
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d
movl %edx,%ecx
shrl $3,%ecx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
mulq %rsi /* with rax, clobbers rdx */
rep stosq
movl %r8d,%ecx
rep stosb
movq %r9,%rax
ret
CFI_ENDPROC
ENDPROC(memset_c)
ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
movq %rdi,%r10
movq %rdx,%r11
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
mul %rcx /* with rax, clobbers rdx */
/* align dst */
movl %edi,%r9d
andl $7,%r9d
jnz .Lbad_alignment
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
movl %r11d,%ecx
shrl $6,%ecx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
decl %ecx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
movq %rax,24(%rdi)
movq %rax,32(%rdi)
movq %rax,40(%rdi)
movq %rax,48(%rdi)
movq %rax,56(%rdi)
leaq 64(%rdi),%rdi
jnz .Lloop_64
/* Handle tail in loops. The loops should be faster than hard
to predict jump tables. */
.p2align 4
.Lhandle_tail:
movl %r11d,%ecx
andl $63&(~7),%ecx
jz .Lhandle_7
shrl $3,%ecx
.p2align 4
.Lloop_8:
decl %ecx
movq %rax,(%rdi)
leaq 8(%rdi),%rdi
jnz .Lloop_8
.Lhandle_7:
movl %r11d,%ecx
andl $7,%ecx
jz .Lende
.p2align 4
.Lloop_1:
decl %ecx
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
.Lende:
movq %r10,%rax
ret
CFI_RESTORE_STATE
.Lbad_alignment:
cmpq $7,%r11
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
ENDPROC(memset)
ENDPROC(__memset)
/* Some CPUs run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
.byte (memset_c - memset) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
.align 8
.quad memset
.quad 1b
.byte X86_FEATURE_REP_GOOD
.byte .Lfinal - memset
.byte 2b - 1b
.previous

View File

@@ -0,0 +1,377 @@
/*
* MMX 3DNow! library helper functions
*
* To do:
* We can use MMX just for prefetch in IRQ's. This may be a win.
* (reported so on K6-III)
* We should use a better code neutral filler for the short jump
* leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
* We also want to clobber the filler register so we don't get any
* register forwarding stalls on the filler.
*
* Add *user handling. Checksums are not a win with MMX on any CPU
* tested so far for any MMX solution figured.
*
* 22/09/2000 - Arjan van de Ven
* Improved for non-egineering-sample Athlons
*
*/
#include <linux/hardirq.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <asm/i387.h>
#include <asm/asm.h>
void *_mmx_memcpy(void *to, const void *from, size_t len)
{
void *p;
int i;
if (unlikely(in_interrupt()))
return __memcpy(to, from, len);
p = to;
i = len >> 6; /* len/64 */
kernel_fpu_begin();
__asm__ __volatile__ (
"1: prefetch (%0)\n" /* This set is 28 bytes */
" prefetch 64(%0)\n"
" prefetch 128(%0)\n"
" prefetch 192(%0)\n"
" prefetch 256(%0)\n"
"2: \n"
".section .fixup, \"ax\"\n"
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
: : "r" (from));
for ( ; i > 5; i--) {
__asm__ __volatile__ (
"1: prefetch 320(%0)\n"
"2: movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
".section .fixup, \"ax\"\n"
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
: : "r" (from), "r" (to) : "memory");
from += 64;
to += 64;
}
for ( ; i > 0; i--) {
__asm__ __volatile__ (
" movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from += 64;
to += 64;
}
/*
* Now do the tail of the block:
*/
__memcpy(to, from, len & 63);
kernel_fpu_end();
return p;
}
EXPORT_SYMBOL(_mmx_memcpy);
#ifdef CONFIG_MK7
/*
* The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
* other MMX using processors do not.
*/
static void fast_clear_page(void *page)
{
int i;
kernel_fpu_begin();
__asm__ __volatile__ (
" pxor %%mm0, %%mm0\n" : :
);
for (i = 0; i < 4096/64; i++) {
__asm__ __volatile__ (
" movntq %%mm0, (%0)\n"
" movntq %%mm0, 8(%0)\n"
" movntq %%mm0, 16(%0)\n"
" movntq %%mm0, 24(%0)\n"
" movntq %%mm0, 32(%0)\n"
" movntq %%mm0, 40(%0)\n"
" movntq %%mm0, 48(%0)\n"
" movntq %%mm0, 56(%0)\n"
: : "r" (page) : "memory");
page += 64;
}
/*
* Since movntq is weakly-ordered, a "sfence" is needed to become
* ordered again:
*/
__asm__ __volatile__("sfence\n"::);
kernel_fpu_end();
}
static void fast_copy_page(void *to, void *from)
{
int i;
kernel_fpu_begin();
/*
* maybe the prefetch stuff can go before the expensive fnsave...
* but that is for later. -AV
*/
__asm__ __volatile__(
"1: prefetch (%0)\n"
" prefetch 64(%0)\n"
" prefetch 128(%0)\n"
" prefetch 192(%0)\n"
" prefetch 256(%0)\n"
"2: \n"
".section .fixup, \"ax\"\n"
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b) : : "r" (from));
for (i = 0; i < (4096-320)/64; i++) {
__asm__ __volatile__ (
"1: prefetch 320(%0)\n"
"2: movq (%0), %%mm0\n"
" movntq %%mm0, (%1)\n"
" movq 8(%0), %%mm1\n"
" movntq %%mm1, 8(%1)\n"
" movq 16(%0), %%mm2\n"
" movntq %%mm2, 16(%1)\n"
" movq 24(%0), %%mm3\n"
" movntq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm4\n"
" movntq %%mm4, 32(%1)\n"
" movq 40(%0), %%mm5\n"
" movntq %%mm5, 40(%1)\n"
" movq 48(%0), %%mm6\n"
" movntq %%mm6, 48(%1)\n"
" movq 56(%0), %%mm7\n"
" movntq %%mm7, 56(%1)\n"
".section .fixup, \"ax\"\n"
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
from += 64;
to += 64;
}
for (i = (4096-320)/64; i < 4096/64; i++) {
__asm__ __volatile__ (
"2: movq (%0), %%mm0\n"
" movntq %%mm0, (%1)\n"
" movq 8(%0), %%mm1\n"
" movntq %%mm1, 8(%1)\n"
" movq 16(%0), %%mm2\n"
" movntq %%mm2, 16(%1)\n"
" movq 24(%0), %%mm3\n"
" movntq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm4\n"
" movntq %%mm4, 32(%1)\n"
" movq 40(%0), %%mm5\n"
" movntq %%mm5, 40(%1)\n"
" movq 48(%0), %%mm6\n"
" movntq %%mm6, 48(%1)\n"
" movq 56(%0), %%mm7\n"
" movntq %%mm7, 56(%1)\n"
: : "r" (from), "r" (to) : "memory");
from += 64;
to += 64;
}
/*
* Since movntq is weakly-ordered, a "sfence" is needed to become
* ordered again:
*/
__asm__ __volatile__("sfence \n"::);
kernel_fpu_end();
}
#else /* CONFIG_MK7 */
/*
* Generic MMX implementation without K7 specific streaming
*/
static void fast_clear_page(void *page)
{
int i;
kernel_fpu_begin();
__asm__ __volatile__ (
" pxor %%mm0, %%mm0\n" : :
);
for (i = 0; i < 4096/128; i++) {
__asm__ __volatile__ (
" movq %%mm0, (%0)\n"
" movq %%mm0, 8(%0)\n"
" movq %%mm0, 16(%0)\n"
" movq %%mm0, 24(%0)\n"
" movq %%mm0, 32(%0)\n"
" movq %%mm0, 40(%0)\n"
" movq %%mm0, 48(%0)\n"
" movq %%mm0, 56(%0)\n"
" movq %%mm0, 64(%0)\n"
" movq %%mm0, 72(%0)\n"
" movq %%mm0, 80(%0)\n"
" movq %%mm0, 88(%0)\n"
" movq %%mm0, 96(%0)\n"
" movq %%mm0, 104(%0)\n"
" movq %%mm0, 112(%0)\n"
" movq %%mm0, 120(%0)\n"
: : "r" (page) : "memory");
page += 128;
}
kernel_fpu_end();
}
static void fast_copy_page(void *to, void *from)
{
int i;
kernel_fpu_begin();
__asm__ __volatile__ (
"1: prefetch (%0)\n"
" prefetch 64(%0)\n"
" prefetch 128(%0)\n"
" prefetch 192(%0)\n"
" prefetch 256(%0)\n"
"2: \n"
".section .fixup, \"ax\"\n"
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b) : : "r" (from));
for (i = 0; i < 4096/64; i++) {
__asm__ __volatile__ (
"1: prefetch 320(%0)\n"
"2: movq (%0), %%mm0\n"
" movq 8(%0), %%mm1\n"
" movq 16(%0), %%mm2\n"
" movq 24(%0), %%mm3\n"
" movq %%mm0, (%1)\n"
" movq %%mm1, 8(%1)\n"
" movq %%mm2, 16(%1)\n"
" movq %%mm3, 24(%1)\n"
" movq 32(%0), %%mm0\n"
" movq 40(%0), %%mm1\n"
" movq 48(%0), %%mm2\n"
" movq 56(%0), %%mm3\n"
" movq %%mm0, 32(%1)\n"
" movq %%mm1, 40(%1)\n"
" movq %%mm2, 48(%1)\n"
" movq %%mm3, 56(%1)\n"
".section .fixup, \"ax\"\n"
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
: : "r" (from), "r" (to) : "memory");
from += 64;
to += 64;
}
kernel_fpu_end();
}
#endif /* !CONFIG_MK7 */
/*
* Favour MMX for page clear and copy:
*/
static void slow_zero_page(void *page)
{
int d0, d1;
__asm__ __volatile__(
"cld\n\t"
"rep ; stosl"
: "=&c" (d0), "=&D" (d1)
:"a" (0), "1" (page), "0" (1024)
:"memory");
}
void mmx_clear_page(void *page)
{
if (unlikely(in_interrupt()))
slow_zero_page(page);
else
fast_clear_page(page);
}
EXPORT_SYMBOL(mmx_clear_page);
static void slow_copy_page(void *to, void *from)
{
int d0, d1, d2;
__asm__ __volatile__(
"cld\n\t"
"rep ; movsl"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (1024), "1" ((long) to), "2" ((long) from)
: "memory");
}
void mmx_copy_page(void *to, void *from)
{
if (unlikely(in_interrupt()))
slow_copy_page(to, from);
else
fast_copy_page(to, from);
}
EXPORT_SYMBOL(mmx_copy_page);

View File

@@ -0,0 +1,5 @@
#include <linux/module.h>
#include <asm/msr.h>
EXPORT_SYMBOL(native_rdmsr_safe_regs);
EXPORT_SYMBOL(native_wrmsr_safe_regs);

View File

@@ -0,0 +1,102 @@
#include <linux/linkage.h>
#include <linux/errno.h>
#include <asm/dwarf2.h>
#include <asm/asm.h>
#include <asm/msr.h>
#ifdef CONFIG_X86_64
/*
* int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
*
* reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
*
*/
.macro op_safe_regs op
ENTRY(native_\op\()_safe_regs)
CFI_STARTPROC
pushq_cfi %rbx
pushq_cfi %rbp
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
movl 4(%rdi), %ecx
movl 8(%rdi), %edx
movl 12(%rdi), %ebx
movl 20(%rdi), %ebp
movl 24(%rdi), %esi
movl 28(%rdi), %edi
CFI_REMEMBER_STATE
1: \op
2: movl %eax, (%r10)
movl %r11d, %eax /* Return value */
movl %ecx, 4(%r10)
movl %edx, 8(%r10)
movl %ebx, 12(%r10)
movl %ebp, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
popq_cfi %rbp
popq_cfi %rbx
ret
3:
CFI_RESTORE_STATE
movl $-EIO, %r11d
jmp 2b
_ASM_EXTABLE(1b, 3b)
CFI_ENDPROC
ENDPROC(native_\op\()_safe_regs)
.endm
#else /* X86_32 */
.macro op_safe_regs op
ENTRY(native_\op\()_safe_regs)
CFI_STARTPROC
pushl_cfi %ebx
pushl_cfi %ebp
pushl_cfi %esi
pushl_cfi %edi
pushl_cfi $0 /* Return value */
pushl_cfi %eax
movl 4(%eax), %ecx
movl 8(%eax), %edx
movl 12(%eax), %ebx
movl 20(%eax), %ebp
movl 24(%eax), %esi
movl 28(%eax), %edi
movl (%eax), %eax
CFI_REMEMBER_STATE
1: \op
2: pushl_cfi %eax
movl 4(%esp), %eax
popl_cfi (%eax)
addl $4, %esp
CFI_ADJUST_CFA_OFFSET -4
movl %ecx, 4(%eax)
movl %edx, 8(%eax)
movl %ebx, 12(%eax)
movl %ebp, 20(%eax)
movl %esi, 24(%eax)
movl %edi, 28(%eax)
popl_cfi %eax
popl_cfi %edi
popl_cfi %esi
popl_cfi %ebp
popl_cfi %ebx
ret
3:
CFI_RESTORE_STATE
movl $-EIO, 4(%esp)
jmp 2b
_ASM_EXTABLE(1b, 3b)
CFI_ENDPROC
ENDPROC(native_\op\()_safe_regs)
.endm
#endif
op_safe_regs rdmsr
op_safe_regs wrmsr

View File

@@ -0,0 +1,204 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <linux/smp.h>
#include <asm/msr.h>
static void __rdmsr_on_cpu(void *info)
{
struct msr_info *rv = info;
struct msr *reg;
int this_cpu = raw_smp_processor_id();
if (rv->msrs)
reg = per_cpu_ptr(rv->msrs, this_cpu);
else
reg = &rv->reg;
rdmsr(rv->msr_no, reg->l, reg->h);
}
static void __wrmsr_on_cpu(void *info)
{
struct msr_info *rv = info;
struct msr *reg;
int this_cpu = raw_smp_processor_id();
if (rv->msrs)
reg = per_cpu_ptr(rv->msrs, this_cpu);
else
reg = &rv->reg;
wrmsr(rv->msr_no, reg->l, reg->h);
}
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
int err;
struct msr_info rv;
memset(&rv, 0, sizeof(rv));
rv.msr_no = msr_no;
err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
*l = rv.reg.l;
*h = rv.reg.h;
return err;
}
EXPORT_SYMBOL(rdmsr_on_cpu);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
int err;
struct msr_info rv;
memset(&rv, 0, sizeof(rv));
rv.msr_no = msr_no;
rv.reg.l = l;
rv.reg.h = h;
err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
return err;
}
EXPORT_SYMBOL(wrmsr_on_cpu);
static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
struct msr *msrs,
void (*msr_func) (void *info))
{
struct msr_info rv;
int this_cpu;
memset(&rv, 0, sizeof(rv));
rv.msrs = msrs;
rv.msr_no = msr_no;
this_cpu = get_cpu();
if (cpumask_test_cpu(this_cpu, mask))
msr_func(&rv);
smp_call_function_many(mask, msr_func, &rv, 1);
put_cpu();
}
/* rdmsr on a bunch of CPUs
*
* @mask: which CPUs
* @msr_no: which MSR
* @msrs: array of MSR values
*
*/
void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
{
__rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
}
EXPORT_SYMBOL(rdmsr_on_cpus);
/*
* wrmsr on a bunch of CPUs
*
* @mask: which CPUs
* @msr_no: which MSR
* @msrs: array of MSR values
*
*/
void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
{
__rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
}
EXPORT_SYMBOL(wrmsr_on_cpus);
/* These "safe" variants are slower and should be used when the target MSR
may not actually exist. */
static void __rdmsr_safe_on_cpu(void *info)
{
struct msr_info *rv = info;
rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
}
static void __wrmsr_safe_on_cpu(void *info)
{
struct msr_info *rv = info;
rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
}
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
int err;
struct msr_info rv;
memset(&rv, 0, sizeof(rv));
rv.msr_no = msr_no;
err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
*l = rv.reg.l;
*h = rv.reg.h;
return err ? err : rv.err;
}
EXPORT_SYMBOL(rdmsr_safe_on_cpu);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
int err;
struct msr_info rv;
memset(&rv, 0, sizeof(rv));
rv.msr_no = msr_no;
rv.reg.l = l;
rv.reg.h = h;
err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
return err ? err : rv.err;
}
EXPORT_SYMBOL(wrmsr_safe_on_cpu);
/*
* These variants are significantly slower, but allows control over
* the entire 32-bit GPR set.
*/
static void __rdmsr_safe_regs_on_cpu(void *info)
{
struct msr_regs_info *rv = info;
rv->err = rdmsr_safe_regs(rv->regs);
}
static void __wrmsr_safe_regs_on_cpu(void *info)
{
struct msr_regs_info *rv = info;
rv->err = wrmsr_safe_regs(rv->regs);
}
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
{
int err;
struct msr_regs_info rv;
rv.regs = regs;
rv.err = -EIO;
err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
return err ? err : rv.err;
}
EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
{
int err;
struct msr_regs_info rv;
rv.regs = regs;
rv.err = -EIO;
err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
return err ? err : rv.err;
}
EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);

23
kernel/arch/x86/lib/msr.c Normal file
View File

@@ -0,0 +1,23 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <asm/msr.h>
struct msr *msrs_alloc(void)
{
struct msr *msrs = NULL;
msrs = alloc_percpu(struct msr);
if (!msrs) {
pr_warning("%s: error allocating msrs\n", __func__);
return NULL;
}
return msrs;
}
EXPORT_SYMBOL(msrs_alloc);
void msrs_free(struct msr *msrs)
{
free_percpu(msrs);
}
EXPORT_SYMBOL(msrs_free);

View File

@@ -0,0 +1,97 @@
/*
* __put_user functions.
*
* (C) Copyright 2005 Linus Torvalds
* (C) Copyright 2005 Andi Kleen
* (C) Copyright 2008 Glauber Costa
*
* These functions have a non-standard call interface
* to make them more efficient, especially as they
* return an error value in addition to the "real"
* return value.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/thread_info.h>
#include <asm/errno.h>
#include <asm/asm.h>
/*
* __put_user_X
*
* Inputs: %eax[:%edx] contains the data
* %ecx contains the address
*
* Outputs: %eax is error code (0 or -EFAULT)
*
* These functions should not modify any other registers,
* as they get called from within inline assembly.
*/
#define ENTER CFI_STARTPROC ; \
GET_THREAD_INFO(%_ASM_BX)
#define EXIT ret ; \
CFI_ENDPROC
.text
ENTRY(__put_user_1)
ENTER
cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
jae bad_put_user
1: movb %al,(%_ASM_CX)
xor %eax,%eax
EXIT
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
ENTER
mov TI_addr_limit(%_ASM_BX),%_ASM_BX
sub $1,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
jae bad_put_user
2: movw %ax,(%_ASM_CX)
xor %eax,%eax
EXIT
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
ENTER
mov TI_addr_limit(%_ASM_BX),%_ASM_BX
sub $3,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
jae bad_put_user
3: movl %eax,(%_ASM_CX)
xor %eax,%eax
EXIT
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
ENTER
mov TI_addr_limit(%_ASM_BX),%_ASM_BX
sub $7,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
jae bad_put_user
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
5: movl %edx,4(%_ASM_CX)
#endif
xor %eax,%eax
EXIT
ENDPROC(__put_user_8)
bad_put_user:
CFI_STARTPROC
movl $-EFAULT,%eax
EXIT
END(bad_put_user)
.section __ex_table,"a"
_ASM_PTR 1b,bad_put_user
_ASM_PTR 2b,bad_put_user
_ASM_PTR 3b,bad_put_user
_ASM_PTR 4b,bad_put_user
#ifdef CONFIG_X86_32
_ASM_PTR 5b,bad_put_user
#endif
.previous

View File

@@ -0,0 +1,38 @@
/* Slow paths of read/write spinlocks. */
#include <linux/linkage.h>
#include <asm/rwlock.h>
#include <asm/alternative-asm.h>
#include <asm/dwarf2.h>
/* rdi: pointer to rwlock_t */
ENTRY(__write_lock_failed)
CFI_STARTPROC
LOCK_PREFIX
addl $RW_LOCK_BIAS,(%rdi)
1: rep
nop
cmpl $RW_LOCK_BIAS,(%rdi)
jne 1b
LOCK_PREFIX
subl $RW_LOCK_BIAS,(%rdi)
jnz __write_lock_failed
ret
CFI_ENDPROC
END(__write_lock_failed)
/* rdi: pointer to rwlock_t */
ENTRY(__read_lock_failed)
CFI_STARTPROC
LOCK_PREFIX
incl (%rdi)
1: rep
nop
cmpl $1,(%rdi)
js 1b
LOCK_PREFIX
decl (%rdi)
js __read_lock_failed
ret
CFI_ENDPROC
END(__read_lock_failed)

View File

@@ -0,0 +1,81 @@
/*
* x86-64 rwsem wrappers
*
* This interfaces the inline asm code to the slow-path
* C routines. We need to save the call-clobbered regs
* that the asm does not mark as clobbered, and move the
* argument from %rax to %rdi.
*
* NOTE! We don't need to save %rax, because the functions
* will always return the semaphore pointer in %rax (which
* is also the input argument to these helpers)
*
* The following can clobber %rdx because the asm clobbers it:
* call_rwsem_down_write_failed
* call_rwsem_wake
* but %rdi, %rsi, %rcx, %r8-r11 always need saving.
*/
#include <linux/linkage.h>
#include <asm/rwlock.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>
#define save_common_regs \
pushq %rdi; \
pushq %rsi; \
pushq %rcx; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11
#define restore_common_regs \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rcx; \
popq %rsi; \
popq %rdi
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
save_common_regs
pushq %rdx
movq %rax,%rdi
call rwsem_down_read_failed
popq %rdx
restore_common_regs
ret
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
ret
ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
decw %dx /* do nothing if still outstanding active readers */
jnz 1f
save_common_regs
movq %rax,%rdi
call rwsem_wake
restore_common_regs
1: ret
ENDPROC(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
save_common_regs
pushq %rdx
movq %rax,%rdi
call rwsem_downgrade_wake
popq %rdx
restore_common_regs
ret
ENDPROC(call_rwsem_downgrade_wake)

View File

@@ -0,0 +1,136 @@
/*
* i386 semaphore implementation.
*
* (C) Copyright 1999 Linus Torvalds
*
* Portions Copyright 1999 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
*/
#include <linux/linkage.h>
#include <asm/rwlock.h>
#include <asm/alternative-asm.h>
#include <asm/frame.h>
#include <asm/dwarf2.h>
/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
* need to convert that sequence back into the C sequence when
* there is contention on the semaphore.
*
* %eax contains the semaphore pointer on entry. Save the C-clobbered
* registers (%eax, %edx and %ecx) except %eax whish is either a return
* value or just clobbered..
*/
.section .sched.text, "ax"
/*
* rw spinlock fallbacks
*/
#ifdef CONFIG_SMP
ENTRY(__write_lock_failed)
CFI_STARTPROC
FRAME
2: LOCK_PREFIX
addl $ RW_LOCK_BIAS,(%eax)
1: rep; nop
cmpl $ RW_LOCK_BIAS,(%eax)
jne 1b
LOCK_PREFIX
subl $ RW_LOCK_BIAS,(%eax)
jnz 2b
ENDFRAME
ret
CFI_ENDPROC
ENDPROC(__write_lock_failed)
ENTRY(__read_lock_failed)
CFI_STARTPROC
FRAME
2: LOCK_PREFIX
incl (%eax)
1: rep; nop
cmpl $1,(%eax)
js 1b
LOCK_PREFIX
decl (%eax)
js 2b
ENDFRAME
ret
CFI_ENDPROC
ENDPROC(__read_lock_failed)
#endif
#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
push %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx,0
push %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx,0
call rwsem_down_read_failed
pop %edx
CFI_ADJUST_CFA_OFFSET -4
pop %ecx
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
CFI_STARTPROC
push %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx,0
calll rwsem_down_write_failed
pop %ecx
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
CFI_STARTPROC
decw %dx /* do nothing if still outstanding active readers */
jnz 1f
push %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx,0
call rwsem_wake
pop %ecx
CFI_ADJUST_CFA_OFFSET -4
1: ret
CFI_ENDPROC
ENDPROC(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
push %ecx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ecx,0
push %edx
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET edx,0
call rwsem_downgrade_wake
pop %edx
CFI_ADJUST_CFA_OFFSET -4
pop %ecx
CFI_ADJUST_CFA_OFFSET -4
ret
CFI_ENDPROC
ENDPROC(call_rwsem_downgrade_wake)
#endif

View File

@@ -0,0 +1,237 @@
/*
* Most of the string-functions are rather heavily hand-optimized,
* see especially strsep,strstr,str[c]spn. They should work, but are not
* very easy to understand. Everything is done entirely within the register
* set, making the functions fast and clean. String instructions have been
* used through-out, making for "slightly" unclear code :-)
*
* AK: On P4 and K7 using non string instruction implementations might be faster
* for large memory blocks. But most of them are unlikely to be used on large
* strings.
*/
#include <linux/string.h>
#include <linux/module.h>
#ifdef __HAVE_ARCH_STRCPY
char *strcpy(char *dest, const char *src)
{
int d0, d1, d2;
asm volatile("1:\tlodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b"
: "=&S" (d0), "=&D" (d1), "=&a" (d2)
: "0" (src), "1" (dest) : "memory");
return dest;
}
EXPORT_SYMBOL(strcpy);
#endif
#ifdef __HAVE_ARCH_STRNCPY
char *strncpy(char *dest, const char *src, size_t count)
{
int d0, d1, d2, d3;
asm volatile("1:\tdecl %2\n\t"
"js 2f\n\t"
"lodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n\t"
"rep\n\t"
"stosb\n"
"2:"
: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
: "0" (src), "1" (dest), "2" (count) : "memory");
return dest;
}
EXPORT_SYMBOL(strncpy);
#endif
#ifdef __HAVE_ARCH_STRCAT
char *strcat(char *dest, const char *src)
{
int d0, d1, d2, d3;
asm volatile("repne\n\t"
"scasb\n\t"
"decl %1\n"
"1:\tlodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b"
: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
return dest;
}
EXPORT_SYMBOL(strcat);
#endif
#ifdef __HAVE_ARCH_STRNCAT
char *strncat(char *dest, const char *src, size_t count)
{
int d0, d1, d2, d3;
asm volatile("repne\n\t"
"scasb\n\t"
"decl %1\n\t"
"movl %8,%3\n"
"1:\tdecl %3\n\t"
"js 2f\n\t"
"lodsb\n\t"
"stosb\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n"
"2:\txorl %2,%2\n\t"
"stosb"
: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu), "g" (count)
: "memory");
return dest;
}
EXPORT_SYMBOL(strncat);
#endif
#ifdef __HAVE_ARCH_STRCMP
int strcmp(const char *cs, const char *ct)
{
int d0, d1;
int res;
asm volatile("1:\tlodsb\n\t"
"scasb\n\t"
"jne 2f\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n\t"
"xorl %%eax,%%eax\n\t"
"jmp 3f\n"
"2:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"3:"
: "=a" (res), "=&S" (d0), "=&D" (d1)
: "1" (cs), "2" (ct)
: "memory");
return res;
}
EXPORT_SYMBOL(strcmp);
#endif
#ifdef __HAVE_ARCH_STRNCMP
int strncmp(const char *cs, const char *ct, size_t count)
{
int res;
int d0, d1, d2;
asm volatile("1:\tdecl %3\n\t"
"js 2f\n\t"
"lodsb\n\t"
"scasb\n\t"
"jne 3f\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n"
"2:\txorl %%eax,%%eax\n\t"
"jmp 4f\n"
"3:\tsbbl %%eax,%%eax\n\t"
"orb $1,%%al\n"
"4:"
: "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
: "1" (cs), "2" (ct), "3" (count)
: "memory");
return res;
}
EXPORT_SYMBOL(strncmp);
#endif
#ifdef __HAVE_ARCH_STRCHR
char *strchr(const char *s, int c)
{
int d0;
char *res;
asm volatile("movb %%al,%%ah\n"
"1:\tlodsb\n\t"
"cmpb %%ah,%%al\n\t"
"je 2f\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n\t"
"movl $1,%1\n"
"2:\tmovl %1,%0\n\t"
"decl %0"
: "=a" (res), "=&S" (d0)
: "1" (s), "0" (c)
: "memory");
return res;
}
EXPORT_SYMBOL(strchr);
#endif
#ifdef __HAVE_ARCH_STRLEN
size_t strlen(const char *s)
{
int d0;
int res;
asm volatile("repne\n\t"
"scasb\n\t"
"notl %0\n\t"
"decl %0"
: "=c" (res), "=&D" (d0)
: "1" (s), "a" (0), "0" (0xffffffffu)
: "memory");
return res;
}
EXPORT_SYMBOL(strlen);
#endif
#ifdef __HAVE_ARCH_MEMCHR
void *memchr(const void *cs, int c, size_t count)
{
int d0;
void *res;
if (!count)
return NULL;
asm volatile("repne\n\t"
"scasb\n\t"
"je 1f\n\t"
"movl $1,%0\n"
"1:\tdecl %0"
: "=D" (res), "=&c" (d0)
: "a" (c), "0" (cs), "1" (count)
: "memory");
return res;
}
EXPORT_SYMBOL(memchr);
#endif
#ifdef __HAVE_ARCH_MEMSCAN
void *memscan(void *addr, int c, size_t size)
{
if (!size)
return addr;
asm volatile("repnz; scasb\n\t"
"jnz 1f\n\t"
"dec %%edi\n"
"1:"
: "=D" (addr), "=c" (size)
: "0" (addr), "1" (size), "a" (c)
: "memory");
return addr;
}
EXPORT_SYMBOL(memscan);
#endif
#ifdef __HAVE_ARCH_STRNLEN
size_t strnlen(const char *s, size_t count)
{
int d0;
int res;
asm volatile("movl %2,%0\n\t"
"jmp 2f\n"
"1:\tcmpb $0,(%0)\n\t"
"je 3f\n\t"
"incl %0\n"
"2:\tdecl %1\n\t"
"cmpl $-1,%1\n\t"
"jne 1b\n"
"3:\tsubl %2,%0"
: "=a" (res), "=&d" (d0)
: "c" (s), "1" (count)
: "memory");
return res;
}
EXPORT_SYMBOL(strnlen);
#endif

View File

@@ -0,0 +1,31 @@
#include <linux/string.h>
char *strstr(const char *cs, const char *ct)
{
int d0, d1;
register char *__res;
__asm__ __volatile__(
"movl %6,%%edi\n\t"
"repne\n\t"
"scasb\n\t"
"notl %%ecx\n\t"
"decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */
"movl %%ecx,%%edx\n"
"1:\tmovl %6,%%edi\n\t"
"movl %%esi,%%eax\n\t"
"movl %%edx,%%ecx\n\t"
"repe\n\t"
"cmpsb\n\t"
"je 2f\n\t" /* also works for empty string, see above */
"xchgl %%eax,%%esi\n\t"
"incl %%esi\n\t"
"cmpb $0,-1(%%eax)\n\t"
"jne 1b\n\t"
"xorl %%eax,%%eax\n\t"
"2:"
: "=a" (__res), "=&c" (d0), "=&S" (d1)
: "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
: "dx", "di");
return __res;
}

View File

@@ -0,0 +1,47 @@
/*
* Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
* (inspired by Andi Kleen's thunk_64.S)
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
#include <linux/linkage.h>
#define ARCH_TRACE_IRQS_ON \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call trace_hardirqs_on; \
popl %edx; \
popl %ecx; \
popl %eax;
#define ARCH_TRACE_IRQS_OFF \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
call trace_hardirqs_off; \
popl %edx; \
popl %ecx; \
popl %eax;
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in eax (arg1) */
.macro thunk_ra name,func
.globl \name
\name:
pushl %eax
pushl %ecx
pushl %edx
/* Place EIP in the arg1 */
movl 3*4(%esp), %eax
call \func
popl %edx
popl %ecx
popl %eax
ret
.endm
thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
#endif

View File

@@ -0,0 +1,81 @@
/*
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
* Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/calling.h>
#include <asm/rwlock.h>
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro thunk name,func
.globl \name
\name:
CFI_STARTPROC
SAVE_ARGS
call \func
jmp restore
CFI_ENDPROC
.endm
/* rdi: arg1 ... normal C conventions. rax is passed from C. */
.macro thunk_retrax name,func
.globl \name
\name:
CFI_STARTPROC
SAVE_ARGS
call \func
jmp restore_norax
CFI_ENDPROC
.endm
.section .sched.text, "ax"
#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
thunk rwsem_wake_thunk,rwsem_wake
thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in rdi (arg1) */
.macro thunk_ra name,func
.globl \name
\name:
CFI_STARTPROC
SAVE_ARGS
/* SAVE_ARGS pushs 9 elements */
/* the next element would be the rip */
movq 9*8(%rsp), %rdi
call \func
jmp restore
CFI_ENDPROC
.endm
thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
thunk lockdep_sys_exit_thunk,lockdep_sys_exit
#endif
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
CFI_STARTPROC
SAVE_ARGS
restore:
RESTORE_ARGS
ret
CFI_ENDPROC
CFI_STARTPROC
SAVE_ARGS
restore_norax:
RESTORE_ARGS 1
ret
CFI_ENDPROC

View File

@@ -0,0 +1,885 @@
/*
* User address space access functions.
* The non inlined parts of asm-i386/uaccess.h are here.
*
* Copyright 1997 Andi Kleen <ak@muc.de>
* Copyright 1997 Linus Torvalds
*/
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/interrupt.h>
#include <asm/uaccess.h>
#include <asm/mmx.h>
#ifdef CONFIG_X86_INTEL_USERCOPY
/*
* Alignment at which movsl is preferred for bulk memory copies.
*/
struct movsl_mask movsl_mask __read_mostly;
#endif
static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
{
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
return 0;
#endif
return 1;
}
#define movsl_is_ok(a1, a2, n) \
__movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n))
/*
* Copy a null terminated string from userspace.
*/
#define __do_strncpy_from_user(dst, src, count, res) \
do { \
int __d0, __d1, __d2; \
might_fault(); \
__asm__ __volatile__( \
" testl %1,%1\n" \
" jz 2f\n" \
"0: lodsb\n" \
" stosb\n" \
" testb %%al,%%al\n" \
" jz 1f\n" \
" decl %1\n" \
" jnz 0b\n" \
"1: subl %1,%0\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movl %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(0b,3b) \
: "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
: "memory"); \
} while (0)
/**
* __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
* @dst: Destination address, in kernel space. This buffer must be at
* least @count bytes long.
* @src: Source address, in user space.
* @count: Maximum number of bytes to copy, including the trailing NUL.
*
* Copies a NUL-terminated string from userspace to kernel space.
* Caller must check the specified block with access_ok() before calling
* this function.
*
* On success, returns the length of the string (not including the trailing
* NUL).
*
* If access to userspace fails, returns -EFAULT (some data may have been
* copied).
*
* If @count is smaller than the length of the string, copies @count bytes
* and returns @count.
*/
long
__strncpy_from_user(char *dst, const char __user *src, long count)
{
long res;
__do_strncpy_from_user(dst, src, count, res);
return res;
}
EXPORT_SYMBOL(__strncpy_from_user);
/**
* strncpy_from_user: - Copy a NUL terminated string from userspace.
* @dst: Destination address, in kernel space. This buffer must be at
* least @count bytes long.
* @src: Source address, in user space.
* @count: Maximum number of bytes to copy, including the trailing NUL.
*
* Copies a NUL-terminated string from userspace to kernel space.
*
* On success, returns the length of the string (not including the trailing
* NUL).
*
* If access to userspace fails, returns -EFAULT (some data may have been
* copied).
*
* If @count is smaller than the length of the string, copies @count bytes
* and returns @count.
*/
long
strncpy_from_user(char *dst, const char __user *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, 1))
__do_strncpy_from_user(dst, src, count, res);
return res;
}
EXPORT_SYMBOL(strncpy_from_user);
/*
* Zero Userspace
*/
#define __do_clear_user(addr,size) \
do { \
int __d0; \
might_fault(); \
__asm__ __volatile__( \
"0: rep; stosl\n" \
" movl %2,%0\n" \
"1: rep; stosb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(0b,3b) \
_ASM_EXTABLE(1b,2b) \
: "=&c"(size), "=&D" (__d0) \
: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
} while (0)
/**
* clear_user: - Zero a block of memory in user space.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
* Zero a block of memory in user space.
*
* Returns number of bytes that could not be cleared.
* On success, this will be zero.
*/
unsigned long
clear_user(void __user *to, unsigned long n)
{
might_fault();
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
}
EXPORT_SYMBOL(clear_user);
/**
* __clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
* Zero a block of memory in user space. Caller must check
* the specified block with access_ok() before calling this function.
*
* Returns number of bytes that could not be cleared.
* On success, this will be zero.
*/
unsigned long
__clear_user(void __user *to, unsigned long n)
{
__do_clear_user(to, n);
return n;
}
EXPORT_SYMBOL(__clear_user);
/**
* strnlen_user: - Get the size of a string in user space.
* @s: The string to measure.
* @n: The maximum valid length
*
* Get the size of a NUL-terminated string in user space.
*
* Returns the size of the string INCLUDING the terminating NUL.
* On exception, returns 0.
* If the string is too long, returns a value greater than @n.
*/
long strnlen_user(const char __user *s, long n)
{
unsigned long mask = -__addr_ok(s);
unsigned long res, tmp;
might_fault();
__asm__ __volatile__(
" testl %0, %0\n"
" jz 3f\n"
" andl %0,%%ecx\n"
"0: repne; scasb\n"
" setne %%al\n"
" subl %%ecx,%0\n"
" addl %0,%%eax\n"
"1:\n"
".section .fixup,\"ax\"\n"
"2: xorl %%eax,%%eax\n"
" jmp 1b\n"
"3: movb $1,%%al\n"
" jmp 1b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,2b\n"
".previous"
:"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
:"0" (n), "1" (s), "2" (0), "3" (mask)
:"cc");
return res & mask;
}
EXPORT_SYMBOL(strnlen_user);
#ifdef CONFIG_X86_INTEL_USERCOPY
static unsigned long
__copy_user_intel(void __user *to, const void *from, unsigned long size)
{
int d0, d1;
__asm__ __volatile__(
" .align 2,0x90\n"
"1: movl 32(%4), %%eax\n"
" cmpl $67, %0\n"
" jbe 3f\n"
"2: movl 64(%4), %%eax\n"
" .align 2,0x90\n"
"3: movl 0(%4), %%eax\n"
"4: movl 4(%4), %%edx\n"
"5: movl %%eax, 0(%3)\n"
"6: movl %%edx, 4(%3)\n"
"7: movl 8(%4), %%eax\n"
"8: movl 12(%4),%%edx\n"
"9: movl %%eax, 8(%3)\n"
"10: movl %%edx, 12(%3)\n"
"11: movl 16(%4), %%eax\n"
"12: movl 20(%4), %%edx\n"
"13: movl %%eax, 16(%3)\n"
"14: movl %%edx, 20(%3)\n"
"15: movl 24(%4), %%eax\n"
"16: movl 28(%4), %%edx\n"
"17: movl %%eax, 24(%3)\n"
"18: movl %%edx, 28(%3)\n"
"19: movl 32(%4), %%eax\n"
"20: movl 36(%4), %%edx\n"
"21: movl %%eax, 32(%3)\n"
"22: movl %%edx, 36(%3)\n"
"23: movl 40(%4), %%eax\n"
"24: movl 44(%4), %%edx\n"
"25: movl %%eax, 40(%3)\n"
"26: movl %%edx, 44(%3)\n"
"27: movl 48(%4), %%eax\n"
"28: movl 52(%4), %%edx\n"
"29: movl %%eax, 48(%3)\n"
"30: movl %%edx, 52(%3)\n"
"31: movl 56(%4), %%eax\n"
"32: movl 60(%4), %%edx\n"
"33: movl %%eax, 56(%3)\n"
"34: movl %%edx, 60(%3)\n"
" addl $-64, %0\n"
" addl $64, %4\n"
" addl $64, %3\n"
" cmpl $63, %0\n"
" ja 1b\n"
"35: movl %0, %%eax\n"
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
"99: rep; movsl\n"
"36: movl %%eax, %0\n"
"37: rep; movsb\n"
"100:\n"
".section .fixup,\"ax\"\n"
"101: lea 0(%%eax,%0,4),%0\n"
" jmp 100b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 1b,100b\n"
" .long 2b,100b\n"
" .long 3b,100b\n"
" .long 4b,100b\n"
" .long 5b,100b\n"
" .long 6b,100b\n"
" .long 7b,100b\n"
" .long 8b,100b\n"
" .long 9b,100b\n"
" .long 10b,100b\n"
" .long 11b,100b\n"
" .long 12b,100b\n"
" .long 13b,100b\n"
" .long 14b,100b\n"
" .long 15b,100b\n"
" .long 16b,100b\n"
" .long 17b,100b\n"
" .long 18b,100b\n"
" .long 19b,100b\n"
" .long 20b,100b\n"
" .long 21b,100b\n"
" .long 22b,100b\n"
" .long 23b,100b\n"
" .long 24b,100b\n"
" .long 25b,100b\n"
" .long 26b,100b\n"
" .long 27b,100b\n"
" .long 28b,100b\n"
" .long 29b,100b\n"
" .long 30b,100b\n"
" .long 31b,100b\n"
" .long 32b,100b\n"
" .long 33b,100b\n"
" .long 34b,100b\n"
" .long 35b,100b\n"
" .long 36b,100b\n"
" .long 37b,100b\n"
" .long 99b,101b\n"
".previous"
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
return size;
}
static unsigned long
__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
{
int d0, d1;
__asm__ __volatile__(
" .align 2,0x90\n"
"0: movl 32(%4), %%eax\n"
" cmpl $67, %0\n"
" jbe 2f\n"
"1: movl 64(%4), %%eax\n"
" .align 2,0x90\n"
"2: movl 0(%4), %%eax\n"
"21: movl 4(%4), %%edx\n"
" movl %%eax, 0(%3)\n"
" movl %%edx, 4(%3)\n"
"3: movl 8(%4), %%eax\n"
"31: movl 12(%4),%%edx\n"
" movl %%eax, 8(%3)\n"
" movl %%edx, 12(%3)\n"
"4: movl 16(%4), %%eax\n"
"41: movl 20(%4), %%edx\n"
" movl %%eax, 16(%3)\n"
" movl %%edx, 20(%3)\n"
"10: movl 24(%4), %%eax\n"
"51: movl 28(%4), %%edx\n"
" movl %%eax, 24(%3)\n"
" movl %%edx, 28(%3)\n"
"11: movl 32(%4), %%eax\n"
"61: movl 36(%4), %%edx\n"
" movl %%eax, 32(%3)\n"
" movl %%edx, 36(%3)\n"
"12: movl 40(%4), %%eax\n"
"71: movl 44(%4), %%edx\n"
" movl %%eax, 40(%3)\n"
" movl %%edx, 44(%3)\n"
"13: movl 48(%4), %%eax\n"
"81: movl 52(%4), %%edx\n"
" movl %%eax, 48(%3)\n"
" movl %%edx, 52(%3)\n"
"14: movl 56(%4), %%eax\n"
"91: movl 60(%4), %%edx\n"
" movl %%eax, 56(%3)\n"
" movl %%edx, 60(%3)\n"
" addl $-64, %0\n"
" addl $64, %4\n"
" addl $64, %3\n"
" cmpl $63, %0\n"
" ja 0b\n"
"5: movl %0, %%eax\n"
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
"6: rep; movsl\n"
" movl %%eax,%0\n"
"7: rep; movsb\n"
"8:\n"
".section .fixup,\"ax\"\n"
"9: lea 0(%%eax,%0,4),%0\n"
"16: pushl %0\n"
" pushl %%eax\n"
" xorl %%eax,%%eax\n"
" rep; stosb\n"
" popl %%eax\n"
" popl %0\n"
" jmp 8b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,16b\n"
" .long 1b,16b\n"
" .long 2b,16b\n"
" .long 21b,16b\n"
" .long 3b,16b\n"
" .long 31b,16b\n"
" .long 4b,16b\n"
" .long 41b,16b\n"
" .long 10b,16b\n"
" .long 51b,16b\n"
" .long 11b,16b\n"
" .long 61b,16b\n"
" .long 12b,16b\n"
" .long 71b,16b\n"
" .long 13b,16b\n"
" .long 81b,16b\n"
" .long 14b,16b\n"
" .long 91b,16b\n"
" .long 6b,9b\n"
" .long 7b,16b\n"
".previous"
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
return size;
}
/*
* Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware.
* hyoshiok@miraclelinux.com
*/
static unsigned long __copy_user_zeroing_intel_nocache(void *to,
const void __user *from, unsigned long size)
{
int d0, d1;
__asm__ __volatile__(
" .align 2,0x90\n"
"0: movl 32(%4), %%eax\n"
" cmpl $67, %0\n"
" jbe 2f\n"
"1: movl 64(%4), %%eax\n"
" .align 2,0x90\n"
"2: movl 0(%4), %%eax\n"
"21: movl 4(%4), %%edx\n"
" movnti %%eax, 0(%3)\n"
" movnti %%edx, 4(%3)\n"
"3: movl 8(%4), %%eax\n"
"31: movl 12(%4),%%edx\n"
" movnti %%eax, 8(%3)\n"
" movnti %%edx, 12(%3)\n"
"4: movl 16(%4), %%eax\n"
"41: movl 20(%4), %%edx\n"
" movnti %%eax, 16(%3)\n"
" movnti %%edx, 20(%3)\n"
"10: movl 24(%4), %%eax\n"
"51: movl 28(%4), %%edx\n"
" movnti %%eax, 24(%3)\n"
" movnti %%edx, 28(%3)\n"
"11: movl 32(%4), %%eax\n"
"61: movl 36(%4), %%edx\n"
" movnti %%eax, 32(%3)\n"
" movnti %%edx, 36(%3)\n"
"12: movl 40(%4), %%eax\n"
"71: movl 44(%4), %%edx\n"
" movnti %%eax, 40(%3)\n"
" movnti %%edx, 44(%3)\n"
"13: movl 48(%4), %%eax\n"
"81: movl 52(%4), %%edx\n"
" movnti %%eax, 48(%3)\n"
" movnti %%edx, 52(%3)\n"
"14: movl 56(%4), %%eax\n"
"91: movl 60(%4), %%edx\n"
" movnti %%eax, 56(%3)\n"
" movnti %%edx, 60(%3)\n"
" addl $-64, %0\n"
" addl $64, %4\n"
" addl $64, %3\n"
" cmpl $63, %0\n"
" ja 0b\n"
" sfence \n"
"5: movl %0, %%eax\n"
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
"6: rep; movsl\n"
" movl %%eax,%0\n"
"7: rep; movsb\n"
"8:\n"
".section .fixup,\"ax\"\n"
"9: lea 0(%%eax,%0,4),%0\n"
"16: pushl %0\n"
" pushl %%eax\n"
" xorl %%eax,%%eax\n"
" rep; stosb\n"
" popl %%eax\n"
" popl %0\n"
" jmp 8b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,16b\n"
" .long 1b,16b\n"
" .long 2b,16b\n"
" .long 21b,16b\n"
" .long 3b,16b\n"
" .long 31b,16b\n"
" .long 4b,16b\n"
" .long 41b,16b\n"
" .long 10b,16b\n"
" .long 51b,16b\n"
" .long 11b,16b\n"
" .long 61b,16b\n"
" .long 12b,16b\n"
" .long 71b,16b\n"
" .long 13b,16b\n"
" .long 81b,16b\n"
" .long 14b,16b\n"
" .long 91b,16b\n"
" .long 6b,9b\n"
" .long 7b,16b\n"
".previous"
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
return size;
}
static unsigned long __copy_user_intel_nocache(void *to,
const void __user *from, unsigned long size)
{
int d0, d1;
__asm__ __volatile__(
" .align 2,0x90\n"
"0: movl 32(%4), %%eax\n"
" cmpl $67, %0\n"
" jbe 2f\n"
"1: movl 64(%4), %%eax\n"
" .align 2,0x90\n"
"2: movl 0(%4), %%eax\n"
"21: movl 4(%4), %%edx\n"
" movnti %%eax, 0(%3)\n"
" movnti %%edx, 4(%3)\n"
"3: movl 8(%4), %%eax\n"
"31: movl 12(%4),%%edx\n"
" movnti %%eax, 8(%3)\n"
" movnti %%edx, 12(%3)\n"
"4: movl 16(%4), %%eax\n"
"41: movl 20(%4), %%edx\n"
" movnti %%eax, 16(%3)\n"
" movnti %%edx, 20(%3)\n"
"10: movl 24(%4), %%eax\n"
"51: movl 28(%4), %%edx\n"
" movnti %%eax, 24(%3)\n"
" movnti %%edx, 28(%3)\n"
"11: movl 32(%4), %%eax\n"
"61: movl 36(%4), %%edx\n"
" movnti %%eax, 32(%3)\n"
" movnti %%edx, 36(%3)\n"
"12: movl 40(%4), %%eax\n"
"71: movl 44(%4), %%edx\n"
" movnti %%eax, 40(%3)\n"
" movnti %%edx, 44(%3)\n"
"13: movl 48(%4), %%eax\n"
"81: movl 52(%4), %%edx\n"
" movnti %%eax, 48(%3)\n"
" movnti %%edx, 52(%3)\n"
"14: movl 56(%4), %%eax\n"
"91: movl 60(%4), %%edx\n"
" movnti %%eax, 56(%3)\n"
" movnti %%edx, 60(%3)\n"
" addl $-64, %0\n"
" addl $64, %4\n"
" addl $64, %3\n"
" cmpl $63, %0\n"
" ja 0b\n"
" sfence \n"
"5: movl %0, %%eax\n"
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
"6: rep; movsl\n"
" movl %%eax,%0\n"
"7: rep; movsb\n"
"8:\n"
".section .fixup,\"ax\"\n"
"9: lea 0(%%eax,%0,4),%0\n"
"16: jmp 8b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,16b\n"
" .long 1b,16b\n"
" .long 2b,16b\n"
" .long 21b,16b\n"
" .long 3b,16b\n"
" .long 31b,16b\n"
" .long 4b,16b\n"
" .long 41b,16b\n"
" .long 10b,16b\n"
" .long 51b,16b\n"
" .long 11b,16b\n"
" .long 61b,16b\n"
" .long 12b,16b\n"
" .long 71b,16b\n"
" .long 13b,16b\n"
" .long 81b,16b\n"
" .long 14b,16b\n"
" .long 91b,16b\n"
" .long 6b,9b\n"
" .long 7b,16b\n"
".previous"
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
return size;
}
#else
/*
* Leave these declared but undefined. They should not be any references to
* them
*/
unsigned long __copy_user_zeroing_intel(void *to, const void __user *from,
unsigned long size);
unsigned long __copy_user_intel(void __user *to, const void *from,
unsigned long size);
unsigned long __copy_user_zeroing_intel_nocache(void *to,
const void __user *from, unsigned long size);
#endif /* CONFIG_X86_INTEL_USERCOPY */
/* Generic arbitrary sized copy. */
#define __copy_user(to, from, size) \
do { \
int __d0, __d1, __d2; \
__asm__ __volatile__( \
" cmp $7,%0\n" \
" jbe 1f\n" \
" movl %1,%0\n" \
" negl %0\n" \
" andl $7,%0\n" \
" subl %0,%3\n" \
"4: rep; movsb\n" \
" movl %3,%0\n" \
" shrl $2,%0\n" \
" andl $3,%3\n" \
" .align 2,0x90\n" \
"0: rep; movsl\n" \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"5: addl %3,%0\n" \
" jmp 2b\n" \
"3: lea 0(%3,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 4b,5b\n" \
" .long 0b,3b\n" \
" .long 1b,2b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
} while (0)
#define __copy_user_zeroing(to, from, size) \
do { \
int __d0, __d1, __d2; \
__asm__ __volatile__( \
" cmp $7,%0\n" \
" jbe 1f\n" \
" movl %1,%0\n" \
" negl %0\n" \
" andl $7,%0\n" \
" subl %0,%3\n" \
"4: rep; movsb\n" \
" movl %3,%0\n" \
" shrl $2,%0\n" \
" andl $3,%3\n" \
" .align 2,0x90\n" \
"0: rep; movsl\n" \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"5: addl %3,%0\n" \
" jmp 6f\n" \
"3: lea 0(%3,%0,4),%0\n" \
"6: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosb\n" \
" popl %%eax\n" \
" popl %0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 4b,5b\n" \
" .long 0b,3b\n" \
" .long 1b,6b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
} while (0)
unsigned long __copy_to_user_ll(void __user *to, const void *from,
unsigned long n)
{
#ifndef CONFIG_X86_WP_WORKS_OK
if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
((unsigned long)to) < TASK_SIZE) {
/*
* When we are in an atomic section (see
* mm/filemap.c:file_read_actor), return the full
* length to take the slow path.
*/
if (in_atomic())
return n;
/*
* CPU does not honor the WP bit when writing
* from supervisory mode, and due to preemption or SMP,
* the page tables can change at any time.
* Do it manually. Manfred <manfred@colorfullife.com>
*/
while (n) {
unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
unsigned long len = PAGE_SIZE - offset;
int retval;
struct page *pg;
void *maddr;
if (len > n)
len = n;
survive:
down_read(&current->mm->mmap_sem);
retval = get_user_pages(current, current->mm,
(unsigned long)to, 1, 1, 0, &pg, NULL);
if (retval == -ENOMEM && is_global_init(current)) {
up_read(&current->mm->mmap_sem);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto survive;
}
if (retval != 1) {
up_read(&current->mm->mmap_sem);
break;
}
maddr = kmap_atomic(pg, KM_USER0);
memcpy(maddr + offset, from, len);
kunmap_atomic(maddr, KM_USER0);
set_page_dirty_lock(pg);
put_page(pg);
up_read(&current->mm->mmap_sem);
from += len;
to += len;
n -= len;
}
return n;
}
#endif
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel(to, from, n);
return n;
}
EXPORT_SYMBOL(__copy_to_user_ll);
unsigned long __copy_from_user_ll(void *to, const void __user *from,
unsigned long n)
{
if (movsl_is_ok(to, from, n))
__copy_user_zeroing(to, from, n);
else
n = __copy_user_zeroing_intel(to, from, n);
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll);
unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
unsigned long n)
{
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel((void __user *)to,
(const void *)from, n);
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nozero);
unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
unsigned long n)
{
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && cpu_has_xmm2)
n = __copy_user_zeroing_intel_nocache(to, from, n);
else
__copy_user_zeroing(to, from, n);
#else
__copy_user_zeroing(to, from, n);
#endif
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache);
unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
unsigned long n)
{
#ifdef CONFIG_X86_INTEL_USERCOPY
if (n > 64 && cpu_has_xmm2)
n = __copy_user_intel_nocache(to, from, n);
else
__copy_user(to, from, n);
#else
__copy_user(to, from, n);
#endif
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
* @from: Source address, in kernel space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep.
*
* Copy data from kernel space to user space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*/
unsigned long
copy_to_user(void __user *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
EXPORT_SYMBOL(copy_to_user);
/**
* copy_from_user: - Copy a block of data from user space.
* @to: Destination address, in kernel space.
* @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep.
*
* Copy data from user space to kernel space.
*
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
*/
unsigned long
copy_from_user(void *to, const void __user *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
else
memset(to, 0, n);
return n;
}
EXPORT_SYMBOL(copy_from_user);

View File

@@ -0,0 +1,183 @@
/*
* User address space access functions.
*
* Copyright 1997 Andi Kleen <ak@muc.de>
* Copyright 1997 Linus Torvalds
* Copyright 2002 Andi Kleen <ak@suse.de>
*/
#include <linux/module.h>
#include <asm/uaccess.h>
/*
* Copy a null terminated string from userspace.
*/
#define __do_strncpy_from_user(dst,src,count,res) \
do { \
long __d0, __d1, __d2; \
might_fault(); \
__asm__ __volatile__( \
" testq %1,%1\n" \
" jz 2f\n" \
"0: lodsb\n" \
" stosb\n" \
" testb %%al,%%al\n" \
" jz 1f\n" \
" decq %1\n" \
" jnz 0b\n" \
"1: subq %1,%0\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: movq %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE(0b,3b) \
: "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
"=&D" (__d2) \
: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
: "memory"); \
} while (0)
long
__strncpy_from_user(char *dst, const char __user *src, long count)
{
long res;
__do_strncpy_from_user(dst, src, count, res);
return res;
}
EXPORT_SYMBOL(__strncpy_from_user);
long
strncpy_from_user(char *dst, const char __user *src, long count)
{
long res = -EFAULT;
if (access_ok(VERIFY_READ, src, 1))
return __strncpy_from_user(dst, src, count);
return res;
}
EXPORT_SYMBOL(strncpy_from_user);
/*
* Zero Userspace
*/
unsigned long __clear_user(void __user *addr, unsigned long size)
{
long __d0;
might_fault();
/* no memory constraint because it doesn't change any memory gcc knows
about */
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
"0: movq %[zero],(%[dst])\n"
" addq %[eight],%[dst]\n"
" decl %%ecx ; jnz 0b\n"
"4: movq %[size1],%%rcx\n"
" testl %%ecx,%%ecx\n"
" jz 2f\n"
"1: movb %b[zero],(%[dst])\n"
" incq %[dst]\n"
" decl %%ecx ; jnz 1b\n"
"2:\n"
".section .fixup,\"ax\"\n"
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(0b,3b)
_ASM_EXTABLE(1b,2b)
: [size8] "=&c"(size), [dst] "=&D" (__d0)
: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
[zero] "r" (0UL), [eight] "r" (8UL));
return size;
}
EXPORT_SYMBOL(__clear_user);
unsigned long clear_user(void __user *to, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
return __clear_user(to, n);
return n;
}
EXPORT_SYMBOL(clear_user);
/*
* Return the size of a string (including the ending 0)
*
* Return 0 on exception, a value greater than N if too long
*/
long __strnlen_user(const char __user *s, long n)
{
long res = 0;
char c;
while (1) {
if (res>n)
return n+1;
if (__get_user(c, s))
return 0;
if (!c)
return res+1;
res++;
s++;
}
}
EXPORT_SYMBOL(__strnlen_user);
long strnlen_user(const char __user *s, long n)
{
if (!access_ok(VERIFY_READ, s, 1))
return 0;
return __strnlen_user(s, n);
}
EXPORT_SYMBOL(strnlen_user);
long strlen_user(const char __user *s)
{
long res = 0;
char c;
for (;;) {
if (get_user(c, s))
return 0;
if (!c)
return res+1;
res++;
s++;
}
}
EXPORT_SYMBOL(strlen_user);
unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
{
if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) {
return copy_user_generic((__force void *)to, (__force void *)from, len);
}
return len;
}
EXPORT_SYMBOL(copy_in_user);
/*
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
*/
unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
{
char c;
unsigned zero_len;
for (; len; --len) {
if (__get_user_nocheck(c, from++, sizeof(char)))
break;
if (__put_user_nocheck(c, to++, sizeof(char)))
break;
}
for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
if (__put_user_nocheck(c, to++, sizeof(char)))
break;
return len;
}