patch-2.4.15 linux/arch/ia64/lib/copy_user.S
Next file: linux/arch/ia64/lib/do_csum.S
Previous file: linux/arch/ia64/lib/copy_page.S
Back to the patch index
Back to the overall index
- Lines: 384
- Date:
Fri Nov 9 14:26:17 2001
- Orig file:
v2.4.14/linux/arch/ia64/lib/copy_user.S
- Orig date:
Sun Aug 12 13:27:58 2001
diff -u --recursive --new-file v2.4.14/linux/arch/ia64/lib/copy_user.S linux/arch/ia64/lib/copy_user.S
@@ -19,8 +19,8 @@
* ret0 0 in case of success. The number of bytes NOT copied in
* case of error.
*
- * Copyright (C) 2000 Hewlett-Packard Co
- * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000-2001 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
*
* Fixme:
* - handle the case where we have more than 16 bytes and the alignment
@@ -85,7 +85,7 @@
cmp.eq p8,p0=r0,len // check for zero length
.save ar.lc, saved_lc
mov saved_lc=ar.lc // preserve ar.lc (slow)
-(p8) br.ret.spnt.few rp // empty mempcy()
+(p8) br.ret.spnt.many rp // empty mempcy()
;;
add enddst=dst,len // first byte after end of source
add endsrc=src,len // first byte after end of destination
@@ -103,26 +103,26 @@
cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy
xor tmp=src,dst // same alignment test prepare
-(p10) br.cond.dptk.few long_copy_user
+(p10) br.cond.dptk .long_copy_user
;; // RAW pr.rot/p16 ?
//
// Now we do the byte by byte loop with software pipeline
//
// p7 is necessarily false by now
1:
- EX(failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
- EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+ EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
br.ctop.dptk.few 1b
;;
mov ar.lc=saved_lc
mov pr=saved_pr,0xffffffffffff0000
mov ar.pfs=saved_pfs // restore ar.ec
- br.ret.sptk.few rp // end of short memcpy
+ br.ret.sptk.many rp // end of short memcpy
//
// Not 8-byte aligned
//
-diff_align_copy_user:
+.diff_align_copy_user:
// At this point we know we have more than 16 bytes to copy
// and also that src and dest do _not_ have the same alignment.
and src2=0x7,src1 // src offset
@@ -153,7 +153,7 @@
// We know src1 is not 8-byte aligned in this case.
//
cmp.eq p14,p15=r0,dst2
-(p15) br.cond.spnt.few 1f
+(p15) br.cond.spnt 1f
;;
sub t1=8,src2
mov t2=src2
@@ -163,7 +163,7 @@
;;
sub lshift=64,rshift
;;
- br.cond.spnt.few word_copy_user
+ br.cond.spnt .word_copy_user
;;
1:
cmp.leu p14,p15=src2,dst2
@@ -192,15 +192,15 @@
mov ar.lc=cnt
;;
2:
- EX(failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
- EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
+ EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
br.ctop.dptk.few 2b
;;
clrrrb
;;
-word_copy_user:
+.word_copy_user:
cmp.gtu p9,p0=16,len1
-(p9) br.cond.spnt.few 4f // if (16 > len1) skip 8-byte copy
+(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy
;;
shr.u cnt=len1,3 // number of 64-bit words
;;
@@ -232,24 +232,24 @@
#define EPI_1 p[PIPE_DEPTH-2]
#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift
#define CASE(pred, shift) \
- (pred) br.cond.spnt.few copy_user_bit##shift
+ (pred) br.cond.spnt .copy_user_bit##shift
#define BODY(rshift) \
-copy_user_bit##rshift: \
+.copy_user_bit##rshift: \
1: \
- EX(failure_out,(EPI) st8 [dst1]=tmp,8); \
+ EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \
(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift; \
EX(3f,(p16) ld8 val1[0]=[src1],8); \
- br.ctop.dptk.few 1b; \
+ br.ctop.dptk 1b; \
;; \
- br.cond.sptk.few .diff_align_do_tail; \
+ br.cond.sptk.many .diff_align_do_tail; \
2: \
(EPI) st8 [dst1]=tmp,8; \
(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift; \
3: \
(p16) mov val1[0]=r0; \
- br.ctop.dptk.few 2b; \
+ br.ctop.dptk 2b; \
;; \
- br.cond.sptk.few failure_in2
+ br.cond.sptk.many .failure_in2
//
// Since the instruction 'shrp' requires a fixed 128-bit value
@@ -301,25 +301,25 @@
mov ar.lc=len1
;;
5:
- EX(failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
- EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+ EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
br.ctop.dptk.few 5b
;;
mov ar.lc=saved_lc
mov pr=saved_pr,0xffffffffffff0000
mov ar.pfs=saved_pfs
- br.ret.dptk.few rp
+ br.ret.sptk.many rp
//
// Beginning of long mempcy (i.e. > 16 bytes)
//
-long_copy_user:
+.long_copy_user:
tbit.nz p6,p7=src1,0 // odd alignement
and tmp=7,tmp
;;
cmp.eq p10,p8=r0,tmp
mov len1=len // copy because of rotation
-(p8) br.cond.dpnt.few diff_align_copy_user
+(p8) br.cond.dpnt .diff_align_copy_user
;;
// At this point we know we have more than 16 bytes to copy
// and also that both src and dest have the same alignment
@@ -327,11 +327,11 @@
// forward slowly until we reach 16byte alignment: no need to
// worry about reaching the end of buffer.
//
- EX(failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned
+ EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned
(p6) adds len1=-1,len1;;
tbit.nz p7,p0=src1,1
;;
- EX(failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned
+ EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned
(p7) adds len1=-2,len1;;
tbit.nz p8,p0=src1,2
;;
@@ -339,28 +339,28 @@
// Stop bit not required after ld4 because if we fail on ld4
// we have never executed the ld1, therefore st1 is not executed.
//
- EX(failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned
+ EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned
;;
- EX(failure_out,(p6) st1 [dst1]=val1[0],1)
+ EX(.failure_out,(p6) st1 [dst1]=val1[0],1)
tbit.nz p9,p0=src1,3
;;
//
// Stop bit not required after ld8 because if we fail on ld8
// we have never executed the ld2, therefore st2 is not executed.
//
- EX(failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned
- EX(failure_out,(p7) st2 [dst1]=val1[1],2)
+ EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned
+ EX(.failure_out,(p7) st2 [dst1]=val1[1],2)
(p8) adds len1=-4,len1
;;
- EX(failure_out, (p8) st4 [dst1]=val2[0],4)
+ EX(.failure_out, (p8) st4 [dst1]=val2[0],4)
(p9) adds len1=-8,len1;;
shr.u cnt=len1,4 // number of 128-bit (2x64bit) words
;;
- EX(failure_out, (p9) st8 [dst1]=val2[1],8)
+ EX(.failure_out, (p9) st8 [dst1]=val2[1],8)
tbit.nz p6,p0=len1,3
cmp.eq p7,p0=r0,cnt
adds tmp=-1,cnt // br.ctop is repeat/until
-(p7) br.cond.dpnt.few .dotail // we have less than 16 bytes left
+(p7) br.cond.dpnt .dotail // we have less than 16 bytes left
;;
adds src2=8,src1
adds dst2=8,dst1
@@ -370,12 +370,12 @@
// 16bytes/iteration
//
2:
- EX(failure_in3,(p16) ld8 val1[0]=[src1],16)
+ EX(.failure_in3,(p16) ld8 val1[0]=[src1],16)
(p16) ld8 val2[0]=[src2],16
- EX(failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16)
+ EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16)
(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
- br.ctop.dptk.few 2b
+ br.ctop.dptk 2b
;; // RAW on src1 when fall through from loop
//
// Tail correction based on len only
@@ -384,29 +384,28 @@
// is 16 byte aligned AND we have less than 16 bytes to copy.
//
.dotail:
- EX(failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes
+ EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes
tbit.nz p7,p0=len1,2
;;
- EX(failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes
+ EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes
tbit.nz p8,p0=len1,1
;;
- EX(failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes
+ EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes
tbit.nz p9,p0=len1,0
;;
- EX(failure_out, (p6) st8 [dst1]=val1[0],8)
+ EX(.failure_out, (p6) st8 [dst1]=val1[0],8)
;;
- EX(failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left
+ EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left
mov ar.lc=saved_lc
;;
- EX(failure_out,(p7) st4 [dst1]=val1[1],4)
+ EX(.failure_out,(p7) st4 [dst1]=val1[1],4)
mov pr=saved_pr,0xffffffffffff0000
;;
- EX(failure_out, (p8) st2 [dst1]=val2[0],2)
+ EX(.failure_out, (p8) st2 [dst1]=val2[0],2)
mov ar.pfs=saved_pfs
;;
- EX(failure_out, (p9) st1 [dst1]=val2[1])
- br.ret.dptk.few rp
-
+ EX(.failure_out, (p9) st1 [dst1]=val2[1])
+ br.ret.sptk.many rp
//
@@ -433,32 +432,32 @@
// pipeline going. We can't really do this inline because
// p16 is always reset to 1 when lc > 0.
//
-failure_in_pipe1:
+.failure_in_pipe1:
sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
1:
(p16) mov val1[0]=r0
(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
- br.ctop.dptk.few 1b
+ br.ctop.dptk 1b
;;
mov pr=saved_pr,0xffffffffffff0000
mov ar.lc=saved_lc
mov ar.pfs=saved_pfs
- br.ret.dptk.few rp
+ br.ret.sptk.many rp
//
// This is the case where the byte by byte copy fails on the load
// when we copy the head. We need to finish the pipeline and copy
// zeros for the rest of the destination. Since this happens
// at the top we still need to fill the body and tail.
-failure_in_pipe2:
+.failure_in_pipe2:
sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
2:
(p16) mov val1[0]=r0
(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
- br.ctop.dptk.few 2b
+ br.ctop.dptk 2b
;;
sub len=enddst,dst1,1 // precompute len
- br.cond.dptk.few failure_in1bis
+ br.cond.dptk.many .failure_in1bis
;;
//
@@ -533,9 +532,7 @@
// This means that we are in a situation similar the a fault in the
// head part. That's nice!
//
-failure_in1:
-// sub ret0=enddst,dst1 // number of bytes to zero, i.e. not copied
-// sub len=enddst,dst1,1
+.failure_in1:
sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
sub len=endsrc,src1,1
//
@@ -546,18 +543,17 @@
// calling side.
//
;;
-failure_in1bis: // from (failure_in3)
+.failure_in1bis: // from (.failure_in3)
mov ar.lc=len // Continue with a stupid byte store.
;;
5:
st1 [dst1]=r0,1
- br.cloop.dptk.few 5b
+ br.cloop.dptk 5b
;;
-skip_loop:
mov pr=saved_pr,0xffffffffffff0000
mov ar.lc=saved_lc
mov ar.pfs=saved_pfs
- br.ret.dptk.few rp
+ br.ret.sptk.many rp
//
// Here we simply restart the loop but instead
@@ -569,7 +565,7 @@
// we MUST use src1/endsrc here and not dst1/enddst because
// of the pipeline effect.
//
-failure_in3:
+.failure_in3:
sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
;;
2:
@@ -577,36 +573,36 @@
(p16) mov val2[0]=r0
(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16
(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
- br.ctop.dptk.few 2b
+ br.ctop.dptk 2b
;;
cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
sub len=enddst,dst1,1 // precompute len
-(p6) br.cond.dptk.few failure_in1bis
+(p6) br.cond.dptk .failure_in1bis
;;
mov pr=saved_pr,0xffffffffffff0000
mov ar.lc=saved_lc
mov ar.pfs=saved_pfs
- br.ret.dptk.few rp
+ br.ret.sptk.many rp
-failure_in2:
+.failure_in2:
sub ret0=endsrc,src1
cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
sub len=enddst,dst1,1 // precompute len
-(p6) br.cond.dptk.few failure_in1bis
+(p6) br.cond.dptk .failure_in1bis
;;
mov pr=saved_pr,0xffffffffffff0000
mov ar.lc=saved_lc
mov ar.pfs=saved_pfs
- br.ret.dptk.few rp
+ br.ret.sptk.many rp
//
// handling of failures on stores: that's the easy part
//
-failure_out:
+.failure_out:
sub ret0=enddst,dst1
mov pr=saved_pr,0xffffffffffff0000
mov ar.lc=saved_lc
mov ar.pfs=saved_pfs
- br.ret.dptk.few rp
+ br.ret.sptk.many rp
END(__copy_user)
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)