patch-2.1.44 linux/arch/sparc64/lib/blockops.S
Next file: linux/arch/sparc64/lib/checksum.S
Previous file: linux/arch/sparc64/lib/VISmemset.S
Back to the patch index
Back to the overall index
- Lines: 199
- Date:
Mon Jul 7 08:18:55 1997
- Orig file:
v2.1.43/linux/arch/sparc64/lib/blockops.S
- Orig date:
Sat May 24 09:10:23 1997
diff -u --recursive --new-file v2.1.43/linux/arch/sparc64/lib/blockops.S linux/arch/sparc64/lib/blockops.S
@@ -1,138 +1,70 @@
-/* $Id: blockops.S,v 1.6 1997/05/18 04:16:49 davem Exp $
+/* $Id: blockops.S,v 1.10 1997/06/24 17:29:10 jj Exp $
* arch/sparc64/lib/blockops.S: UltraSparc block zero optimized routines.
*
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
-#include <asm/asi.h>
-
- /* Zero out 256 bytes of memory at (buf + offset). */
-#define BLAST_BLOCK(buf, offset) \
- stda %f48, [buf + offset + 0x00] %asi; \
- stda %f48, [buf + offset + 0x40] %asi; \
- stda %f48, [buf + offset + 0x80] %asi; \
- stda %f48, [buf + offset + 0xc0] %asi;
-
- /* Copy 256 bytes of memory at (src + offset) to
- * (dst + offset).
- */
-#define MIRROR_BLOCK(dst, src, offset, sync) \
- ldda [src + offset + 0x000] %asi, %f0; \
- ldda [src + offset + 0x040] %asi, %f16; \
- ldda [src + offset + 0x080] %asi, %f32; \
- ldda [src + offset + 0x0c0] %asi, %f48; \
- membar sync; \
- stda %f0, [dst + offset + 0x000] %asi; \
- stda %f16, [dst + offset + 0x040] %asi; \
- stda %f32, [dst + offset + 0x080] %asi; \
- stda %f48, [dst + offset + 0x0c0] %asi;
+#include "VIS.h"
.text
- .align 4
-
-#if 0
- .globl bzero_1page
-bzero_1page:
- /* %o0 = buf */
- mov %o0, %o1
- wr %g0, ASI_BLK_P, %asi
- mov 0x08, %g2
- membar #Sync|#StoreLoad
- fzero %f48
- fzero %f50
- fzero %f52
- fzero %f54
- fzero %f56
- fzero %f58
- fzero %f60
- fzero %f62
-1:
- BLAST_BLOCK(%o0, 0x000)
- BLAST_BLOCK(%o0, 0x100)
- BLAST_BLOCK(%o0, 0x200)
- BLAST_BLOCK(%o0, 0x300)
- subcc %g2, 1, %g2
- bne,pt %icc, 1b
- add %o0, 0x400, %o0
-
- membar #Sync|#LoadStore|#StoreStore
-
- retl
- mov %o1, %o0
-#endif
+ .align 32
.globl __bfill64
-__bfill64:
-#if 1
- /* %o0 = buf, %o1 = 64-bit pattern */
-#define FILL_BLOCK(buf, offset) \
- stx %o1, [buf + offset + 0x38]; \
- stx %o1, [buf + offset + 0x30]; \
- stx %o1, [buf + offset + 0x28]; \
- stx %o1, [buf + offset + 0x20]; \
- stx %o1, [buf + offset + 0x18]; \
- stx %o1, [buf + offset + 0x10]; \
- stx %o1, [buf + offset + 0x08]; \
- stx %o1, [buf + offset + 0x00];
-
- mov 0x20, %g2
-1:
- FILL_BLOCK(%o0, 0x00)
- FILL_BLOCK(%o0, 0x40)
- FILL_BLOCK(%o0, 0x80)
- FILL_BLOCK(%o0, 0xc0)
- subcc %g2, 1, %g2
- bne,pt %icc, 1b
- add %o0, 0x100, %o0
- retl
- nop
-#undef FILL_BLOCK
-
-#else
- /* %o0 = buf */
- stx %o1, [%sp + 0x7ff + 128]
- wr %g0, ASI_BLK_P, %asi
- mov 0x08, %g2
- ldd [%sp + 0x7ff + 128], %f48
- membar #Sync|#StoreLoad
- fmovd %f48, %f50
- fmovd %f48, %f52
- fmovd %f48, %f54
- fmovd %f48, %f56
- fmovd %f48, %f58
- fmovd %f48, %f60
- fmovd %f48, %f62
-1:
- BLAST_BLOCK(%o0, 0x000)
- BLAST_BLOCK(%o0, 0x100)
- BLAST_BLOCK(%o0, 0x200)
- BLAST_BLOCK(%o0, 0x300)
- subcc %g2, 1, %g2
- bne,pt %icc, 1b
- add %o0, 0x400, %o0
-
- retl
- membar #Sync|#LoadStore|#StoreStore
-#endif
-
-#if 0
- .globl __copy_1page
-__copy_1page:
- /* %o0 = dst, %o1 = src */
- or %g0, 0x08, %g1
- wr %g0, ASI_BLK_P, %asi
- membar #Sync|#StoreLoad
-1:
- MIRROR_BLOCK(%o0, %o1, 0x000, #Sync)
- MIRROR_BLOCK(%o0, %o1, 0x100, #Sync)
- MIRROR_BLOCK(%o0, %o1, 0x200, #Sync)
- MIRROR_BLOCK(%o0, %o1, 0x300, #Sync)
- subcc %g1, 1, %g1
- add %o0, 0x400, %o0
- bne,pt %icc, 1b
- add %o1, 0x400, %o1
-
- retl
- membar #Sync|#LoadStore|#StoreStore
-#endif
+__bfill64: /* %o0 = buf, %o1= ptr to pattern */
+ wr %g0, FPRS_FEF, %fprs ! FPU Group
+ ldd [%o1], %f48 ! Load Group
+ wr %g0, ASI_BLK_P, %asi ! LSU Group
+ membar #StoreStore | #LoadStore ! LSU Group
+ mov 32, %g2 ! IEU0 Group
+
+ /* Cannot perform real arithmatic on the pattern, that can
+ * lead to fp_exception_other ;-)
+ */
+ fmovd %f48, %f50 ! FPA Group
+ fmovd %f48, %f52 ! FPA Group
+ fmovd %f48, %f54 ! FPA Group
+ fmovd %f48, %f56 ! FPA Group
+ fmovd %f48, %f58 ! FPA Group
+ fmovd %f48, %f60 ! FPA Group
+ fmovd %f48, %f62 ! FPA Group
+
+1: stda %f48, [%o0 + 0x00] %asi ! Store Group
+ stda %f48, [%o0 + 0x40] %asi ! Store Group
+ stda %f48, [%o0 + 0x80] %asi ! Store Group
+ stda %f48, [%o0 + 0xc0] %asi ! Store Group
+ subcc %g2, 1, %g2 ! IEU1 Group
+ bne,pt %icc, 1b ! CTI
+ add %o0, 0x100, %o0 ! IEU0
+ membar #Sync ! LSU Group
+
+ jmpl %o7 + 0x8, %g0 ! CTI Group brk forced
+ wr %g0, 0, %fprs ! FPU Group
+
+ .align 32
+ .globl __bzero_1page
+__bzero_1page:
+ wr %g0, FPRS_FEF, %fprs ! FPU Group
+ fzero %f0 ! FPA Group
+ mov 32, %g1 ! IEU0
+ fzero %f2 ! FPA Group
+ faddd %f0, %f2, %f4 ! FPA Group
+ fmuld %f0, %f2, %f6 ! FPM
+ faddd %f0, %f2, %f8 ! FPA Group
+ fmuld %f0, %f2, %f10 ! FPM
+
+ faddd %f0, %f2, %f12 ! FPA Group
+ fmuld %f0, %f2, %f14 ! FPM
+ wr %g0, ASI_BLK_P, %asi ! LSU Group
+ membar #StoreStore | #LoadStore ! LSU Group
+1: stda %f0, [%o0 + 0x00] %asi ! Store Group
+ stda %f0, [%o0 + 0x40] %asi ! Store Group
+ stda %f0, [%o0 + 0x80] %asi ! Store Group
+ stda %f0, [%o0 + 0xc0] %asi ! Store Group
+
+ subcc %g1, 1, %g1 ! IEU1
+ bne,pt %icc, 1b ! CTI
+ add %o0, 0x100, %o0 ! IEU0 Group
+ membar #Sync ! LSU Group
+ jmpl %o7 + 0x8, %g0 ! CTI Group brk forced
+ wr %g0, 0, %fprs ! FPU Group
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov