patch-2.1.44 linux/arch/sparc64/lib/VISmemset.S
Next file: linux/arch/sparc64/lib/blockops.S
Previous file: linux/arch/sparc64/lib/VIScopy.S
Back to the patch index
Back to the overall index
- Lines: 229
- Date:
Mon Jul 7 08:18:55 1997
- Orig file:
v2.1.43/linux/arch/sparc64/lib/VISmemset.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.1.43/linux/arch/sparc64/lib/VISmemset.S linux/arch/sparc64/lib/VISmemset.S
@@ -0,0 +1,228 @@
+/* $Id: VISmemset.S,v 1.4 1997/07/02 19:00:39 jj Exp $
+ * VISmemset.S: High speed memset operations utilizing the UltraSparc
+ * Visual Instruction Set.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include "VIS.h"
+
+#ifdef REGS_64BIT
+#define SET_BLOCKS(base, offset, source) \
+ stx source, [base - offset - 0x18]; \
+ stx source, [base - offset - 0x10]; \
+ stx source, [base - offset - 0x08]; \
+ stx source, [base - offset - 0x00];
+#else
+#define SET_BLOCKS(base, offset, source) \
+ stw source, [base - offset - 0x18]; \
+ stw source, [base - offset - 0x14]; \
+ stw source, [base - offset - 0x10]; \
+ stw source, [base - offset - 0x0c]; \
+ stw source, [base - offset - 0x08]; \
+ stw source, [base - offset - 0x04]; \
+ stw source, [base - offset - 0x00]; \
+ stw source, [base - offset + 0x04];
+#endif
+
+#ifndef __KERNEL__
+/* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */
+#include "VISbzero.S"
+#endif
+
+#ifdef __KERNEL__
+#define RETL clr %o0
+#else
+#define RETL mov %g3, %o0
+#endif
+
+ /* Well, memset is a lot easier to get right than bcopy... */
+ .text
+ .align 32
+#ifdef __KERNEL__
+ .globl __memset
+__memset:
+#endif
+ .globl memset
+memset:
+#ifndef __KERNEL__
+ brz,a,pt %o1, bzero_private
+ mov %o2, %o1
+#ifndef REGS_64BIT
+ srl %o2, 0, %o2
+#endif
+ mov %o0, %g3
+#endif
+ cmp %o2, 7
+ bleu,pn %xcc, 17f
+ andcc %o0, 3, %g5
+ be,pt %xcc, 4f
+ and %o1, 0xff, %o1
+ cmp %g5, 3
+ be,pn %xcc, 2f
+ stb %o1, [%o0 + 0x00]
+ cmp %g5, 2
+ be,pt %xcc, 2f
+ stb %o1, [%o0 + 0x01]
+ stb %o1, [%o0 + 0x02]
+2: sub %g5, 4, %g5
+ sub %o0, %g5, %o0
+ add %o2, %g5, %o2
+4: sllx %o1, 8, %g1
+ andcc %o0, 4, %g0
+ or %o1, %g1, %o1
+ sllx %o1, 16, %g1
+ or %o1, %g1, %o1
+ be,pt %xcc, 2f
+#ifdef REGS_64BIT
+ sllx %o1, 32, %g1
+#else
+ cmp %o2, 128
+#endif
+ stw %o1, [%o0]
+ sub %o2, 4, %o2
+ add %o0, 4, %o0
+2:
+#ifdef REGS_64BIT
+ cmp %o2, 128
+ or %o1, %g1, %o1
+#endif
+ blu,pn %xcc, 9f
+ andcc %o0, 0x38, %g5
+ be,pn %icc, 6f
+ mov 64, %o5
+ andcc %o0, 8, %g0
+ be,pn %icc, 1f
+ sub %o5, %g5, %o5
+#ifdef REGS_64BIT
+ stx %o1, [%o0]
+#else
+ stw %o1, [%o0]
+ stw %o1, [%o0 + 4]
+#endif
+ add %o0, 8, %o0
+1: andcc %o5, 16, %g0
+ be,pn %icc, 1f
+ sub %o2, %o5, %o2
+#ifdef REGS_64BIT
+ stx %o1, [%o0]
+ stx %o1, [%o0 + 8]
+#else
+ stw %o1, [%o0]
+ stw %o1, [%o0 + 4]
+ stw %o1, [%o0 + 8]
+ stw %o1, [%o0 + 12]
+#endif
+ add %o0, 16, %o0
+1: andcc %o5, 32, %g0
+ be,pn %icc, 7f
+ andncc %o2, 0x3f, %o3
+#ifdef REGS_64BIT
+ stx %o1, [%o0]
+ stx %o1, [%o0 + 8]
+ stx %o1, [%o0 + 16]
+ stx %o1, [%o0 + 24]
+#else
+ stw %o1, [%o0]
+ stw %o1, [%o0 + 4]
+ stw %o1, [%o0 + 8]
+ stw %o1, [%o0 + 12]
+ stw %o1, [%o0 + 16]
+ stw %o1, [%o0 + 20]
+ stw %o1, [%o0 + 24]
+ stw %o1, [%o0 + 28]
+#endif
+ add %o0, 32, %o0
+7: be,pn %xcc, 9f
+#ifdef __KERNEL__
+ wr %g0, FPRS_FEF, %fprs
+#endif
+ ldd [%o0 - 8], %f0
+18: wr %g0, ASI_BLK_P, %asi
+ membar #StoreStore | #LoadStore
+ andcc %o3, 0xc0, %g5
+ and %o2, 0x3f, %o2
+ fmovd %f0, %f2
+ fmovd %f0, %f4
+ andn %o3, 0xff, %o3
+ fmovd %f0, %f6
+ cmp %g5, 64
+ fmovd %f0, %f8
+ fmovd %f0, %f10
+ fmovd %f0, %f12
+ brz,pn %g5, 10f
+ fmovd %f0, %f14
+ be,pn %icc, 2f
+ stda %f0, [%o0 + 0x00] %asi
+ cmp %g5, 128
+ be,pn %icc, 2f
+ stda %f0, [%o0 + 0x40] %asi
+ stda %f0, [%o0 + 0x80] %asi
+2: brz,pn %o3, 12f
+ add %o0, %g5, %o0
+10: stda %f0, [%o0 + 0x00] %asi
+ stda %f0, [%o0 + 0x40] %asi
+ stda %f0, [%o0 + 0x80] %asi
+ stda %f0, [%o0 + 0xc0] %asi
+11: subcc %o3, 256, %o3
+ bne,pt %xcc, 10b
+ add %o0, 256, %o0
+12:
+#ifdef __KERNEL__
+ wr %g0, 0, %fprs
+#endif
+ membar #Sync
+9: andcc %o2, 0x78, %g5
+ be,pn %xcc, 13f
+ andcc %o2, 7, %o2
+14: rd %pc, %o4
+#ifdef REGS_64BIT
+ srl %g5, 1, %o3
+ sub %o4, %o3, %o4
+#else
+ sub %o4, %g5, %o4
+#endif
+ jmpl %o4 + (13f - 14b), %g0
+ add %o0, %g5, %o0
+12: SET_BLOCKS(%o0, 0x68, %o1)
+ SET_BLOCKS(%o0, 0x48, %o1)
+ SET_BLOCKS(%o0, 0x28, %o1)
+ SET_BLOCKS(%o0, 0x08, %o1)
+13: be,pn %xcc, 8f
+ andcc %o2, 4, %g0
+ be,pn %xcc, 1f
+ andcc %o2, 2, %g0
+ stw %o1, [%o0]
+ add %o0, 4, %o0
+1: be,pn %xcc, 1f
+ andcc %o2, 1, %g0
+ sth %o1, [%o0]
+ add %o0, 2, %o0
+1: bne,a,pn %xcc, 8f
+ stb %o1, [%o0]
+8: retl
+ RETL
+17: brz,pn %o2, 0f
+8: add %o0, 1, %o0
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 8b
+ stb %o1, [%o0 - 1]
+0: retl
+ RETL
+6:
+#ifdef REGS_64BIT
+ stx %o1, [%o0]
+#else
+ stw %o1, [%o0]
+ stw %o1, [%o0 + 4]
+#endif
+ andncc %o2, 0x3f, %o3
+ be,pn %xcc, 9b
+#ifdef __KERNEL__
+ wr %g0, FPRS_FEF, %fprs
+#else
+ nop
+#endif
+ ba,pt %xcc, 18b
+ ldd [%o0], %f0
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov