patch-2.4.21 linux-2.4.21/arch/ppc64/lib/string.S
Next file: linux-2.4.21/arch/ppc64/mm/extable.c
Previous file: linux-2.4.21/arch/ppc64/lib/memcpy.S
Back to the patch index
Back to the overall index
- Lines: 363
- Date:
2003-06-13 07:51:32.000000000 -0700
- Orig file:
linux-2.4.20/arch/ppc64/lib/string.S
- Orig date:
2002-11-28 15:53:11.000000000 -0800
diff -urN linux-2.4.20/arch/ppc64/lib/string.S linux-2.4.21/arch/ppc64/lib/string.S
@@ -12,61 +12,6 @@
#include <asm/processor.h>
#include <asm/errno.h>
-#define CACHE_LINE_SIZE 128
-#define LG_CACHE_LINE_SIZE 7
-#define MAX_COPY_PREFETCH 1
-
-#define COPY_16_BYTES \
- lwz r7,4(r4); \
- lwz r8,8(r4); \
- lwz r9,12(r4); \
- lwzu r10,16(r4); \
- stw r7,4(r6); \
- stw r8,8(r6); \
- stw r9,12(r6); \
- stwu r10,16(r6)
-
-#define COPY_16_BYTES_WITHEX(n) \
-8 ## n ## 0: \
- lwz r7,4(r4); \
-8 ## n ## 1: \
- lwz r8,8(r4); \
-8 ## n ## 2: \
- lwz r9,12(r4); \
-8 ## n ## 3: \
- lwzu r10,16(r4); \
-8 ## n ## 4: \
- stw r7,4(r6); \
-8 ## n ## 5: \
- stw r8,8(r6); \
-8 ## n ## 6: \
- stw r9,12(r6); \
-8 ## n ## 7: \
- stwu r10,16(r6)
-
-#define COPY_16_BYTES_EXCODE(n) \
-9 ## n ## 0: \
- addi r5,r5,-(16 * n); \
- b 104f; \
-9 ## n ## 1: \
- addi r5,r5,-(16 * n); \
- b 105f; \
-.section __ex_table,"a"; \
- .align 3; \
- .llong 8 ## n ## 0b,9 ## n ## 0b; \
- .llong 8 ## n ## 1b,9 ## n ## 0b; \
- .llong 8 ## n ## 2b,9 ## n ## 0b; \
- .llong 8 ## n ## 3b,9 ## n ## 0b; \
- .llong 8 ## n ## 4b,9 ## n ## 1b; \
- .llong 8 ## n ## 5b,9 ## n ## 1b; \
- .llong 8 ## n ## 6b,9 ## n ## 1b; \
- .llong 8 ## n ## 7b,9 ## n ## 1b; \
-.text
-
-CACHELINE_BYTES = CACHE_LINE_SIZE
-LG_CACHELINE_BYTES = LG_CACHE_LINE_SIZE
-CACHELINE_MASK = (CACHE_LINE_SIZE-1)
-
_GLOBAL(strcpy)
addi r5,r3,-1
addi r4,r4,-1
@@ -120,54 +65,6 @@
subf r3,r3,r4
blr
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero. This requires that the destination
- * area is cacheable. -- paulus
- */
-_GLOBAL(cacheable_memzero)
- mr r5,r4
- li r4,0
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
- beqlr
- andi. r0,r6,3
- add r5,r0,r5
- subf r6,r0,r6
- clrlwi r7,r6,32-LG_CACHELINE_BYTES
- add r8,r7,r5
- srwi r9,r8,LG_CACHELINE_BYTES
- addic. r9,r9,-1 /* total number of complete cachelines */
- ble 2f
- xori r0,r7,CACHELINE_MASK & ~3
- srwi. r0,r0,2
- beq 3f
- mtctr r0
-4: stwu r4,4(r6)
- bdnz 4b
-3: mtctr r9
- li r7,4
-10: dcbz r7,r6
- addi r6,r6,CACHELINE_BYTES
- bdnz 10b
- clrlwi r5,r8,32-LG_CACHELINE_BYTES
- addi r5,r5,4
-2: srwi r0,r5,2
- mtctr r0
- bdz 6f
-1: stwu r4,4(r6)
- bdnz 1b
-6: andi. r5,r5,3
-7: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r6,3
-8: stbu r4,1(r6)
- bdnz 8b
- blr
-
_GLOBAL(memset)
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
@@ -196,48 +93,7 @@
_GLOBAL(memmove)
cmplw 0,r3,r4
bgt .backwards_memcpy
- /* fall through */
-
-_GLOBAL(memcpy)
- srwi. r7,r5,3
- addi r6,r3,-4
- addi r4,r4,-4
- beq 2f /* if less than 8 bytes to do */
- andi. r0,r6,3 /* get dest word aligned */
- mtctr r7
- bne 5f
-1: lwz r7,4(r4)
- lwzu r8,8(r4)
- stw r7,4(r6)
- stwu r8,8(r6)
- bdnz 1b
- andi. r5,r5,7
-2: cmplwi 0,r5,4
- blt 3f
- lwzu r0,4(r4)
- addi r5,r5,-4
- stwu r0,4(r6)
-3: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r4,r4,3
- addi r6,r6,3
-4: lbzu r0,1(r4)
- stbu r0,1(r6)
- bdnz 4b
- blr
-5: subfic r0,r0,4
- mtctr r0
-6: lbz r7,4(r4)
- addi r4,r4,1
- stb r7,4(r6)
- addi r6,r6,1
- bdnz 6b
- subf r5,r0,r5
- rlwinm. r7,r5,32-3,3,31
- beq 2b
- mtctr r7
- b 1b
+ b .memcpy
_GLOBAL(backwards_memcpy)
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
@@ -301,195 +157,6 @@
2: li r3,0
blr
-_GLOBAL(__copy_tofrom_user)
- addi r4,r4,-4
- addi r6,r3,-4
- neg r0,r3
- andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
- beq 58f
-
- cmplw 0,r5,r0 /* is this more than total to do? */
- blt 63f /* if not much to do */
- andi. r8,r0,3 /* get it word-aligned first */
- mtctr r8
- beq+ 61f
-70: lbz r9,4(r4) /* do some bytes */
-71: stb r9,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 70b
-61: subf r5,r0,r5
- srwi. r0,r0,2
- mtctr r0
- beq 58f
-72: lwzu r9,4(r4) /* do some words */
-73: stwu r9,4(r6)
- bdnz 72b
-
-58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
- clrlwi r5,r5,32-LG_CACHELINE_BYTES
- li r11,4
- beq 63f
-
- /* Here we decide how far ahead to prefetch the source */
-#if MAX_COPY_PREFETCH > 1
- /* Heuristically, for large transfers we prefetch
- MAX_COPY_PREFETCH cachelines ahead. For small transfers
- we prefetch 1 cacheline ahead. */
- cmpwi r0,MAX_COPY_PREFETCH
- li r7,1
- li r3,4
- ble 111f
- li r7,MAX_COPY_PREFETCH
-111: mtctr r7
-112: dcbt r3,r4
- addi r3,r3,CACHELINE_BYTES
- bdnz 112b
-#else /* MAX_COPY_PREFETCH == 1 */
- li r3,CACHELINE_BYTES + 4
- dcbt r11,r4
-#endif /* MAX_COPY_PREFETCH */
-
- mtctr r0
- dcbt r3,r4
-53:
-54: dcbz r11,r6
-/* had to move these to keep extable in order */
- .section __ex_table,"a"
- .align 3
- .llong 70b,100f
- .llong 71b,101f
- .llong 72b,102f
- .llong 73b,103f
- .llong 54b,105f
- .text
-/* the main body of the cacheline loop */
- COPY_16_BYTES_WITHEX(0)
-#if CACHE_LINE_SIZE >= 32
- COPY_16_BYTES_WITHEX(1)
-#if CACHE_LINE_SIZE >= 64
- COPY_16_BYTES_WITHEX(2)
- COPY_16_BYTES_WITHEX(3)
-#if CACHE_LINE_SIZE >= 128
- COPY_16_BYTES_WITHEX(4)
- COPY_16_BYTES_WITHEX(5)
- COPY_16_BYTES_WITHEX(6)
- COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
- bdnz 53b
-
-63: srwi. r0,r5,2
- mtctr r0
- beq 64f
-30: lwzu r0,4(r4)
-31: stwu r0,4(r6)
- bdnz 30b
-
-64: andi. r0,r5,3
- mtctr r0
- beq+ 65f
-40: lbz r0,4(r4)
-41: stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 40b
-65: li r3,0
- blr
-
-/* read fault, initial single-byte copy */
-100: li r4,0
- b 90f
-/* write fault, initial single-byte copy */
-101: li r4,1
-90: subf r5,r8,r5
- li r3,0
- b 99f
-/* read fault, initial word copy */
-102: li r4,0
- b 91f
-/* write fault, initial word copy */
-103: li r4,1
-91: li r3,2
- b 99f
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
- COPY_16_BYTES_EXCODE(0)
-#if CACHE_LINE_SIZE >= 32
- COPY_16_BYTES_EXCODE(1)
-#if CACHE_LINE_SIZE >= 64
- COPY_16_BYTES_EXCODE(2)
- COPY_16_BYTES_EXCODE(3)
-#if CACHE_LINE_SIZE >= 128
- COPY_16_BYTES_EXCODE(4)
- COPY_16_BYTES_EXCODE(5)
- COPY_16_BYTES_EXCODE(6)
- COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-/* read fault in cacheline loop */
-104: li r4,0
- b 92f
-/* fault on dcbz (effectively a write fault) */
-/* or write fault in cacheline loop */
-105: li r4,1
-92: li r3,LG_CACHELINE_BYTES
- b 99f
-/* read fault in final word loop */
-108: li r4,0
- b 93f
-/* write fault in final word loop */
-109: li r4,1
-93: andi. r5,r5,3
- li r3,2
- b 99f
-/* read fault in final byte loop */
-110: li r4,0
- b 94f
-/* write fault in final byte loop */
-111: li r4,1
-94: li r5,0
- li r3,0
-/*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r4 is 0 for read or 1 for write.
- */
-99: mfctr r0
- slw r3,r0,r3
- add r3,r3,r5
- cmpwi 0,r4,0
- bne 120f
-/* for read fault, clear out the destination: r3 bytes starting at 4(r6) */
- srwi. r0,r3,2
- li r9,0
- mtctr r0
- beq 113f
-112: stwu r9,4(r6)
- bdnz 112b
-113: andi. r0,r3,3
- mtctr r0
- beq 120f
-114: stb r9,4(r6)
- addi r6,r6,1
- bdnz 114b
-120: blr
-
- .section __ex_table,"a"
- .align 3
- .llong 30b,108b
- .llong 31b,109b
- .llong 40b,110b
- .llong 41b,111b
- .llong 112b,120b
- .llong 114b,120b
- .text
-
_GLOBAL(__clear_user)
addi r6,r3,-4
li r3,0
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)