patch-2.4.19 linux-2.4.19/arch/ppc64/lib/string.S
Next file: linux-2.4.19/arch/ppc64/mm/Makefile
Previous file: linux-2.4.19/arch/ppc64/lib/strcase.c
Back to the patch index
Back to the overall index
- Lines: 576
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/ppc64/lib/string.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/ppc64/lib/string.S linux-2.4.19/arch/ppc64/lib/string.S
@@ -0,0 +1,575 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.tmpl>
+#include <asm/processor.h>
+#include <asm/errno.h>
+
+#define CACHE_LINE_SIZE 128
+#define LG_CACHE_LINE_SIZE 7
+#define MAX_COPY_PREFETCH 1
+
+#define COPY_16_BYTES \
+ lwz r7,4(r4); \
+ lwz r8,8(r4); \
+ lwz r9,12(r4); \
+ lwzu r10,16(r4); \
+ stw r7,4(r6); \
+ stw r8,8(r6); \
+ stw r9,12(r6); \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+8 ## n ## 7: \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n) \
+9 ## n ## 0: \
+ addi r5,r5,-(16 * n); \
+ b 104f; \
+9 ## n ## 1: \
+ addi r5,r5,-(16 * n); \
+ b 105f; \
+.section __ex_table,"a"; \
+ .align 3; \
+ .llong 8 ## n ## 0b,9 ## n ## 0b; \
+ .llong 8 ## n ## 1b,9 ## n ## 0b; \
+ .llong 8 ## n ## 2b,9 ## n ## 0b; \
+ .llong 8 ## n ## 3b,9 ## n ## 0b; \
+ .llong 8 ## n ## 4b,9 ## n ## 1b; \
+ .llong 8 ## n ## 5b,9 ## n ## 1b; \
+ .llong 8 ## n ## 6b,9 ## n ## 1b; \
+ .llong 8 ## n ## 7b,9 ## n ## 1b; \
+.text
+
+CACHELINE_BYTES = CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_CACHE_LINE_SIZE
+CACHELINE_MASK = (CACHE_LINE_SIZE-1)
+
+_GLOBAL(strcpy)
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r5)
+ bne 1b
+ blr
+
+_GLOBAL(strncpy)
+ cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r3,-1
+ addi r4,r4,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r6)
+ bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
+ blr
+
+_GLOBAL(strcat)
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r0,1(r5)
+ cmpwi 0,r0,0
+ bne 1b
+ addi r5,r5,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r5)
+ bne 1b
+ blr
+
+_GLOBAL(strcmp)
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r3,1(r5)
+ cmpwi 1,r3,0
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ beqlr 1
+ beq 1b
+ blr
+
+_GLOBAL(strlen)
+ addi r4,r3,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ bne 1b
+ subf r3,r3,r4
+ blr
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero. This requires that the destination
+ * area is cacheable. -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+ mr r5,r4
+ li r4,0
+ addi r6,r3,-4
+ cmplwi 0,r5,4
+ blt 7f
+ stwu r4,4(r6)
+ beqlr
+ andi. r0,r6,3
+ add r5,r0,r5
+ subf r6,r0,r6
+ clrlwi r7,r6,32-LG_CACHELINE_BYTES
+ add r8,r7,r5
+ srwi r9,r8,LG_CACHELINE_BYTES
+ addic. r9,r9,-1 /* total number of complete cachelines */
+ ble 2f
+ xori r0,r7,CACHELINE_MASK & ~3
+ srwi. r0,r0,2
+ beq 3f
+ mtctr r0
+4: stwu r4,4(r6)
+ bdnz 4b
+3: mtctr r9
+ li r7,4
+10: dcbz r7,r6
+ addi r6,r6,CACHELINE_BYTES
+ bdnz 10b
+ clrlwi r5,r8,32-LG_CACHELINE_BYTES
+ addi r5,r5,4
+2: srwi r0,r5,2
+ mtctr r0
+ bdz 6f
+1: stwu r4,4(r6)
+ bdnz 1b
+6: andi. r5,r5,3
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r6,3
+8: stbu r4,1(r6)
+ bdnz 8b
+ blr
+
+_GLOBAL(memset)
+ rlwimi r4,r4,8,16,23
+ rlwimi r4,r4,16,0,15
+ addi r6,r3,-4
+ cmplwi 0,r5,4
+ blt 7f
+ stwu r4,4(r6)
+ beqlr
+ andi. r0,r6,3
+ add r5,r0,r5
+ subf r6,r0,r6
+ srwi r0,r5,2
+ mtctr r0
+ bdz 6f
+1: stwu r4,4(r6)
+ bdnz 1b
+6: andi. r5,r5,3
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r6,3
+8: stbu r4,1(r6)
+ bdnz 8b
+ blr
+
+_GLOBAL(memmove)
+ cmplw 0,r3,r4
+ bgt .backwards_memcpy
+ /* fall through */
+
+_GLOBAL(memcpy)
+ srwi. r7,r5,3
+ addi r6,r3,-4
+ addi r4,r4,-4
+ beq 2f /* if less than 8 bytes to do */
+ andi. r0,r6,3 /* get dest word aligned */
+ mtctr r7
+ bne 5f
+1: lwz r7,4(r4)
+ lwzu r8,8(r4)
+ stw r7,4(r6)
+ stwu r8,8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,4(r4)
+ addi r5,r5,-4
+ stwu r0,4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r4,r4,3
+ addi r6,r6,3
+4: lbzu r0,1(r4)
+ stbu r0,1(r6)
+ bdnz 4b
+ blr
+5: subfic r0,r0,4
+ mtctr r0
+6: lbz r7,4(r4)
+ addi r4,r4,1
+ stb r7,4(r6)
+ addi r6,r6,1
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+_GLOBAL(backwards_memcpy)
+ rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
+ add r6,r3,r5
+ add r4,r4,r5
+ beq 2f
+ andi. r0,r6,3
+ mtctr r7
+ bne 5f
+1: lwz r7,-4(r4)
+ lwzu r8,-8(r4)
+ stw r7,-4(r6)
+ stwu r8,-8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,-4(r4)
+ subi r5,r5,4
+ stwu r0,-4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+4: lbzu r0,-1(r4)
+ stbu r0,-1(r6)
+ bdnz 4b
+ blr
+5: mtctr r0
+6: lbzu r7,-1(r4)
+ stbu r7,-1(r6)
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+_GLOBAL(memcmp)
+ cmpwi 0,r5,0
+ ble- 2f
+ mtctr r5
+ addi r6,r3,-1
+ addi r4,r4,-1
+1: lbzu r3,1(r6)
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ bdnzt 2,1b
+ blr
+2: li r3,0
+ blr
+
+_GLOBAL(memchr)
+ cmpwi 0,r5,0
+ ble- 2f
+ mtctr r5
+ addi r3,r3,-1
+1: lbzu r0,1(r3)
+ cmpw 0,r0,r4
+ bdnzf 2,1b
+ beqlr
+2: li r3,0
+ blr
+
+_GLOBAL(__copy_tofrom_user)
+ addi r4,r4,-4
+ addi r6,r3,-4
+ neg r0,r3
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ andi. r8,r0,3 /* get it word-aligned first */
+ mtctr r8
+ beq+ 61f
+70: lbz r9,4(r4) /* do some bytes */
+71: stb r9,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 70b
+61: subf r5,r0,r5
+ srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+73: stwu r9,4(r6)
+ bdnz 72b
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ beq 63f
+
+ /* Here we decide how far ahead to prefetch the source */
+#if MAX_COPY_PREFETCH > 1
+ /* Heuristically, for large transfers we prefetch
+ MAX_COPY_PREFETCH cachelines ahead. For small transfers
+ we prefetch 1 cacheline ahead. */
+ cmpwi r0,MAX_COPY_PREFETCH
+ li r7,1
+ li r3,4
+ ble 111f
+ li r7,MAX_COPY_PREFETCH
+111: mtctr r7
+112: dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+ bdnz 112b
+#else /* MAX_COPY_PREFETCH == 1 */
+ li r3,CACHELINE_BYTES + 4
+ dcbt r11,r4
+#endif /* MAX_COPY_PREFETCH */
+
+ mtctr r0
+53:
+ dcbt r3,r4
+ dcbz r11,r6
+/* had to move these to keep extable in order */
+ .section __ex_table,"a"
+ .align 3
+ .llong 70b,100f
+ .llong 71b,101f
+ .llong 72b,102f
+ .llong 73b,103f
+ .llong 53b,105f
+ .text
+/* the main body of the cacheline loop */
+ COPY_16_BYTES_WITHEX(0)
+#if CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES_WITHEX(1)
+#if CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES_WITHEX(2)
+ COPY_16_BYTES_WITHEX(3)
+#if CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES_WITHEX(4)
+ COPY_16_BYTES_WITHEX(5)
+ COPY_16_BYTES_WITHEX(6)
+ COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+ bdnz 53b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+31: stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,3
+ mtctr r0
+ beq+ 65f
+40: lbz r0,4(r4)
+41: stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 40b
+65: li r3,0
+ blr
+
+/* read fault, initial single-byte copy */
+100: li r4,0
+ b 90f
+/* write fault, initial single-byte copy */
+101: li r4,1
+90: subf r5,r8,r5
+ li r3,0
+ b 99f
+/* read fault, initial word copy */
+102: li r4,0
+ b 91f
+/* write fault, initial word copy */
+103: li r4,1
+91: li r3,2
+ b 99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+ COPY_16_BYTES_EXCODE(0)
+#if CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES_EXCODE(1)
+#if CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES_EXCODE(2)
+ COPY_16_BYTES_EXCODE(3)
+#if CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES_EXCODE(4)
+ COPY_16_BYTES_EXCODE(5)
+ COPY_16_BYTES_EXCODE(6)
+ COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104: li r4,0
+ b 92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105: li r4,1
+92: li r3,LG_CACHELINE_BYTES
+ b 99f
+/* read fault in final word loop */
+108: li r4,0
+ b 93f
+/* write fault in final word loop */
+109: li r4,1
+93: andi. r5,r5,3
+ li r3,2
+ b 99f
+/* read fault in final byte loop */
+110: li r4,0
+ b 94f
+/* write fault in final byte loop */
+111: li r4,1
+94: li r5,0
+ li r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r4 is 0 for read or 1 for write.
+ */
+99: mfctr r0
+ slw r3,r0,r3
+ add r3,r3,r5
+ cmpwi 0,r4,0
+ bne 120f
+/* for read fault, clear out the destination: r3 bytes starting at 4(r6) */
+ srwi. r0,r3,2
+ li r9,0
+ mtctr r0
+ beq 113f
+112: stwu r9,4(r6)
+ bdnz 112b
+113: andi. r0,r3,3
+ mtctr r0
+ beq 120f
+114: stb r9,4(r6)
+ addi r6,r6,1
+ bdnz 114b
+120: blr
+
+ .section __ex_table,"a"
+ .align 3
+ .llong 30b,108b
+ .llong 31b,109b
+ .llong 40b,110b
+ .llong 41b,111b
+ .llong 112b,120b
+ .llong 114b,120b
+ .text
+
+_GLOBAL(__clear_user)
+ addi r6,r3,-4
+ li r3,0
+ li r5,0
+ cmplwi 0,r4,4
+ blt 7f
+ /* clear a single word */
+11: stwu r5,4(r6)
+ beqlr
+ /* clear word sized chunks */
+ andi. r0,r6,3
+ add r4,r0,r4
+ subf r6,r0,r6
+ srwi r0,r4,2
+ mtctr r0
+ bdz 6f
+1: stwu r5,4(r6)
+ bdnz 1b
+6: andi. r4,r4,3
+ /* clear byte sized chunks */
+7: cmpwi 0,r4,0
+ beqlr
+ mtctr r4
+ addi r6,r6,3
+8: stbu r5,1(r6)
+ bdnz 8b
+ blr
+99: li r3,-EFAULT
+ blr
+
+ .section __ex_table,"a"
+ .align 3
+ .llong 11b,99b
+ .llong 1b,99b
+ .llong 8b,99b
+ .text
+
+_GLOBAL(__strncpy_from_user)
+ addi r6,r3,-1
+ addi r4,r4,-1
+ cmpwi 0,r5,0
+ beq 2f
+ mtctr r5
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r6)
+ bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
+ beq 3f
+2: addi r6,r6,1
+3: subf r3,r3,r6
+ blr
+99: li r3,-EFAULT
+ blr
+
+ .section __ex_table,"a"
+ .align 3
+ .llong 1b,99b
+ .text
+
+/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
+_GLOBAL(__strnlen_user)
+ addi r7,r3,-1
+ subf r6,r7,r5 /* top+1 - str */
+ cmplw 0,r4,r6
+ bge 0f
+ mr r6,r4
+0: mtctr r6 /* ctr = min(len, top - str) */
+1: lbzu r0,1(r7) /* get next byte */
+ cmpwi 0,r0,0
+ bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
+ addi r7,r7,1
+ subf r3,r3,r7 /* number of bytes we have looked at */
+ beqlr /* return if we found a 0 byte */
+ cmpw 0,r3,r4 /* did we look at all len bytes? */
+ blt 99f /* if not, must have hit top */
+ addi r3,r4,1 /* return len + 1 to indicate no null found */
+ blr
+99: li r3,0 /* bad address, return 0 */
+ blr
+
+ .section __ex_table,"a"
+ .align 3
+ .llong 1b,99b
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)