237 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			237 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* A memcpy for CRIS.
 | 
						|
   Copyright (C) 1994-2005 Axis Communications.
 | 
						|
   All rights reserved.
 | 
						|
 | 
						|
   Redistribution and use in source and binary forms, with or without
 | 
						|
   modification, are permitted provided that the following conditions
 | 
						|
   are met:
 | 
						|
 | 
						|
   1. Redistributions of source code must retain the above copyright
 | 
						|
      notice, this list of conditions and the following disclaimer.
 | 
						|
 | 
						|
   2. Neither the name of Axis Communications nor the names of its
 | 
						|
      contributors may be used to endorse or promote products derived
 | 
						|
      from this software without specific prior written permission.
 | 
						|
 | 
						|
   THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
 | 
						|
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
						|
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | 
						|
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
 | 
						|
   COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 | 
						|
   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 | 
						|
   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
						|
   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | 
						|
   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 | 
						|
   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 | 
						|
   IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | 
						|
   POSSIBILITY OF SUCH DAMAGE.  */
 | 
						|
 | 
						|
/* FIXME: This file should really only be used for reference, as the
 | 
						|
   result is somewhat depending on gcc generating what we expect rather
 | 
						|
   than what we describe.  An assembly file should be used instead.  */
 | 
						|
 | 
						|
#include <stddef.h>
 | 
						|
 | 
						|
/* Break even between movem and move16 is really at 38.7 * 2, but
 | 
						|
   modulo 44, so up to the next multiple of 44, we use ordinary code.  */
 | 
						|
#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
 | 
						|
 | 
						|
/* No name ambiguities in this file.  */
 | 
						|
__asm__ (".syntax no_register_prefix");
 | 
						|
 | 
						|
void *
 | 
						|
memcpy(void *pdst, const void *psrc, size_t pn)
 | 
						|
{
 | 
						|
  /* Now we want the parameters put in special registers.
 | 
						|
     Make sure the compiler is able to make something useful of this.
 | 
						|
     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
 | 
						|
 | 
						|
     If gcc was allright, it really would need no temporaries, and no
 | 
						|
     stack space to save stuff on.  */
 | 
						|
 | 
						|
  register void *return_dst __asm__ ("r10") = pdst;
 | 
						|
  register unsigned char *dst __asm__ ("r13") = pdst;
 | 
						|
  register unsigned const char *src __asm__ ("r11") = psrc;
 | 
						|
  register int n __asm__ ("r12") = pn;
 | 
						|
 | 
						|
  /* When src is aligned but not dst, this makes a few extra needless
 | 
						|
     cycles.  I believe it would take as many to check that the
 | 
						|
     re-alignment was unnecessary.  */
 | 
						|
  if (((unsigned long) dst & 3) != 0
 | 
						|
      /* Don't align if we wouldn't copy more than a few bytes; so we
 | 
						|
	 don't have to check further for overflows.  */
 | 
						|
      && n >= 3)
 | 
						|
  {
 | 
						|
    if ((unsigned long) dst & 1)
 | 
						|
      {
 | 
						|
	n--;
 | 
						|
	*dst = *src;
 | 
						|
	src++;
 | 
						|
	dst++;
 | 
						|
      }
 | 
						|
 | 
						|
    if ((unsigned long) dst & 2)
 | 
						|
      {
 | 
						|
	n -= 2;
 | 
						|
	*(short *) dst = *(short *) src;
 | 
						|
	src += 2;
 | 
						|
	dst += 2;
 | 
						|
      }
 | 
						|
  }
 | 
						|
 | 
						|
  /* Decide which copying method to use.  */
 | 
						|
  if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
 | 
						|
    {
 | 
						|
      /* It is not optimal to tell the compiler about clobbering any
 | 
						|
	 registers; that will move the saving/restoring of those registers
 | 
						|
	 to the function prologue/epilogue, and make non-movem sizes
 | 
						|
	 suboptimal.  */
 | 
						|
      __asm__ volatile
 | 
						|
	("\
 | 
						|
	 ;; GCC does promise correct register allocations, but let's	\n\
 | 
						|
	 ;; make sure it keeps its promises.				\n\
 | 
						|
	 .ifnc %0-%1-%2,$r13-$r11-$r12					\n\
 | 
						|
	 .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"	\n\
 | 
						|
	 .endif								\n\
 | 
						|
									\n\
 | 
						|
	 ;; Save the registers we'll use in the movem process		\n\
 | 
						|
	 ;; on the stack.						\n\
 | 
						|
	 subq	11*4,sp							\n\
 | 
						|
	 movem	r10,[sp]						\n\
 | 
						|
									\n\
 | 
						|
	 ;; Now we've got this:						\n\
 | 
						|
	 ;; r11 - src							\n\
 | 
						|
	 ;; r13 - dst							\n\
 | 
						|
	 ;; r12 - n							\n\
 | 
						|
									\n\
 | 
						|
	 ;; Update n for the first loop.				\n\
 | 
						|
	 subq	 44,r12							\n\
 | 
						|
0:									\n\
 | 
						|
"
 | 
						|
#ifdef __arch_common_v10_v32
 | 
						|
	 /* Cater to branch offset difference between v32 and v10.  We
 | 
						|
	    assume the branch below has an 8-bit offset.  */
 | 
						|
"	 setf\n"
 | 
						|
#endif
 | 
						|
"	 movem	[r11+],r10						\n\
 | 
						|
	 subq	44,r12							\n\
 | 
						|
	 bge	 0b							\n\
 | 
						|
	 movem	r10,[r13+]						\n\
 | 
						|
									\n\
 | 
						|
	 ;; Compensate for last loop underflowing n.			\n\
 | 
						|
	 addq	44,r12							\n\
 | 
						|
									\n\
 | 
						|
	 ;; Restore registers from stack.				\n\
 | 
						|
	 movem [sp+],r10"
 | 
						|
 | 
						|
	 /* Outputs.  */
 | 
						|
	 : "=r" (dst), "=r" (src), "=r" (n)
 | 
						|
 | 
						|
	 /* Inputs.  */
 | 
						|
	 : "0" (dst), "1" (src), "2" (n));
 | 
						|
    }
 | 
						|
 | 
						|
  while (n >= 16)
 | 
						|
    {
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
 | 
						|
      n -= 16;
 | 
						|
    }
 | 
						|
 | 
						|
  switch (n)
 | 
						|
    {
 | 
						|
    case 0:
 | 
						|
      break;
 | 
						|
 | 
						|
    case 1:
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 2:
 | 
						|
      *(short *) dst = *(short *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 3:
 | 
						|
      *(short *) dst = *(short *) src; dst += 2; src += 2;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 4:
 | 
						|
      *(long *) dst = *(long *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 5:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 6:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(short *) dst = *(short *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 7:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(short *) dst = *(short *) src; dst += 2; src += 2;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 8:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 9:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 10:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(short *) dst = *(short *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 11:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(short *) dst = *(short *) src; dst += 2; src += 2;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 12:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 13:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 14:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(short *) dst = *(short *) src;
 | 
						|
      break;
 | 
						|
 | 
						|
    case 15:
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(long *) dst = *(long *) src; dst += 4; src += 4;
 | 
						|
      *(short *) dst = *(short *) src; dst += 2; src += 2;
 | 
						|
      *dst = *src;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
 | 
						|
  return return_dst;
 | 
						|
}
 |