patch-2.3.16 linux/arch/sh/lib/memcpy.S

Next file: linux/arch/sh/lib/memmove.S
Previous file: linux/arch/sh/lib/delay.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.3.15/linux/arch/sh/lib/memcpy.S linux/arch/sh/lib/memcpy.S
@@ -0,0 +1,131 @@
+! Taken from newlib-1.8.0
+
+!
+! Fast SH memcpy
+!
+! by Toshiyasu Morita (tm@netcom.com)
+! hacked by J"orn Rernnecke (amylaar@cygnus.co.uk) ("o for o-umlaut)
+!
+! Entry: r4: destination pointer
+!        r5: source pointer
+!        r6: byte count
+!
+! Exit:  r0: destination pointer
+!        r1-r7: trashed
+!
+! Notes: Usually one wants to do small reads and write a longword, but
+!        unfortunately it is difficult in some cases to concatanate bytes
+!        into a longword on the SH, so this does a longword read and small
+!        writes.
+!
+! This implementation makes two assumptions about how it is called:
+!
+! 1.: If the byte count is nonzero, the address of the last byte to be
+!     copied is unsigned greater than the address of the first byte to
+!     be copied.  This could be easily swapped for a signed comparison,
+!     but the algorithm used needs some comparison.
+!
+! 2.: When there are two or three bytes in the last word of an 11-or-bore
+!     bytes memory chunk to b copied, the rest of the word can be read
+!     without size effects.
+!     This could be easily changed by increasing the minumum size of
+!     a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
+!     however, this would cost a few extra cyles on average.
+!
+
+#include <linux/linkage.h>
+ENTRY(memcpy)
+	! Big endian version copies with decreasing addresses.
+	mov	r4,r0
+	add	r6,r0
+	sub	r4,r5
+	mov	#11,r1
+	cmp/hs	r1,r6
+	bf/s	L_small
+	 add	#-1,r5
+	mov	r5,r3
+	add	r0,r3
+	shlr	r3
+	bt/s	L_even
+	 mov	r4,r7
+	mov.b	@(r0,r5),r2
+	add	#-1,r3
+	mov.b	r2,@-r0
+L_even:
+	tst	#1,r0
+	add	#-1,r5
+	bf/s	L_odddst
+	 add	#8,r7
+	tst	#2,r0
+	bt	L_al4dst
+	add	#-1,r3
+	mov.w	@(r0,r5),r1
+	mov.w	r1,@-r0
+L_al4dst:
+	shlr	r3
+	bt	L_al4both
+	mov.w	@(r0,r5),r1
+	swap.w	r1,r1
+	add	#4,r7
+	add	#-4,r5
+	.align	2
+L_2l_loop:
+	mov.l	@(r0,r5),r2
+	xtrct	r2,r1
+	mov.l	r1,@-r0
+	cmp/hs	r7,r0
+	mov.l	@(r0,r5),r1
+	xtrct	r1,r2
+	mov.l	r2,@-r0
+	bt	L_2l_loop
+	bra	L_cleanup
+	 add	#5,r5
+
+	nop ! avoid nop in executed code.
+L_al4both:
+	add	#-2,r5
+	.align	2
+L_al4both_loop:
+	mov.l	@(r0,r5),r1
+	cmp/hs	r7,r0
+	bt/s	L_al4both_loop
+	 mov.l	r1,@-r0
+	bra	L_cleanup
+	 add	#3,r5
+
+	nop ! avoid nop in executed code.
+L_odddst:
+	shlr	r3
+	bt	L_al4src
+	mov.w	@(r0,r5),r1
+	mov.b	r1,@-r0
+	shlr8	r1
+	mov.b	r1,@-r0
+L_al4src:
+	add	#-2,r5
+	.align	2
+L_odd_loop:
+	mov.l	@(r0,r5),r2
+	cmp/hs	r7,r0
+	mov.b	r2,@-r0
+	shlr8	r2
+	mov.w	r2,@-r0
+	shlr16	r2
+	mov.b	r2,@-r0
+	bt	L_odd_loop
+
+	add	#3,r5
+L_cleanup:
+L_small:
+	cmp/eq	r4,r0
+	bt	L_ready
+	add	#1,r4
+	.align	2
+L_cleanup_loop:
+	mov.b	@(r0,r5),r2
+	cmp/eq	r4,r0
+	mov.b	r2,@-r0
+	bf	L_cleanup_loop
+L_ready:
+	rts
+	 nop

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)