patch-2.4.4 linux/arch/ia64/lib/strlen.S
Next file: linux/arch/ia64/lib/strlen_user.S
Previous file: linux/arch/ia64/lib/memset.S
Back to the patch index
Back to the overall index
- Lines: 147
- Date:
Thu Apr 5 12:51:47 2001
- Orig file:
v2.4.3/linux/arch/ia64/lib/strlen.S
- Orig date:
Tue Mar 6 19:44:35 2001
diff -u --recursive --new-file v2.4.3/linux/arch/ia64/lib/strlen.S linux/arch/ia64/lib/strlen.S
@@ -5,12 +5,12 @@
*
* Inputs:
* in0 address of string
- *
- * Outputs:
- * ret0 the number of characters in the string (0 if empty string)
- * does not count the \0
*
- * Copyright (C) 1999 Hewlett-Packard Co
+ * Outputs:
+ * ret0 the number of characters in the string (0 if empty string)
+ * does not count the \0
+ *
+ * Copyright (C) 1999, 2001 Hewlett-Packard Co
* Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
*
* 09/24/99 S.Eranian add speculation recovery code
@@ -30,7 +30,7 @@
// string may not be 8-byte aligned. In this case we load the 8byte
// quantity which includes the start of the string and mask the unused
// bytes with 0xff to avoid confusing czx.
-// We use speculative loads and software pipelining to hide memory
+// We use speculative loads and software pipelining to hide memory
// latency and do read ahead safely. This way we defer any exception.
//
// Because we don't want the kernel to be relying on particular
@@ -42,7 +42,7 @@
// The fact that speculation may fail can be caused, for instance, by
// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
// a NaT bit will be set if the translation is not present. The normal
-// load, on the other hand, will cause the translation to be inserted
+// load, on the other hand, will cause the translation to be inserted
// if the mapping exists.
//
// It should be noted that we execute recovery code only when we need
@@ -50,22 +50,22 @@
// recovery code on pure read ahead data.
//
// Remarks:
-// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
// register to 1. This is required to make sure that we get the parallel
// compare correct.
//
// - we don't use the epilogue counter to exit the loop but we need to set
// it to zero beforehand.
//
-// - after the loop we must test for Nat values because neither the
+// - after the loop we must test for Nat values because neither the
// czx nor cmp instruction raise a NaT consumption fault. We must be
-// careful not to look too far for a Nat for which we don't care.
+// careful not to look too far for a Nat for which we don't care.
// For instance we don't need to look at a NaT in val2 if the zero byte
// was in val1.
//
// - Clearly performance tuning is required.
//
-//
+//
//
#define saved_pfs r11
#define tmp r10
@@ -78,15 +78,9 @@
#define val1 r22
#define val2 r23
-
- .text
- .psr abi64
- .psr lsb
- .lsb
-
GLOBAL_ENTRY(strlen)
- UNW(.prologue)
- UNW(.save ar.pfs, saved_pfs)
+ .prologue
+ .save ar.pfs, saved_pfs
alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
.rotr v[2], w[2] // declares our 4 aliases
@@ -94,11 +88,11 @@
extr.u tmp=in0,0,3 // tmp=least significant 3 bits
mov orig=in0 // keep trackof initial byte address
dep src=0,in0,0,3 // src=8byte-aligned in0 address
- UNW(.save pr, saved_pr)
+ .save pr, saved_pr
mov saved_pr=pr // preserve predicates (rotation)
;;
- UNW(.body)
+ .body
ld8 v[1]=[src],8 // must not speculate: can fail here
shl tmp=tmp,3 // multiply by 8bits/byte
@@ -115,8 +109,8 @@
or v[1]=v[1],mask // now we have a safe initial byte pattern
;;
1:
- ld8.s v[0]=[src],8 // speculatively load next
- czx1.r val1=v[1] // search 0 byte from right
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
czx1.r val2=w[1] // search 0 byte from right following 8bytes
;;
ld8.s w[0]=[src],8 // speculatively load next to next
@@ -132,11 +126,7 @@
// - there must be a better way of doing the test
//
cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
-#ifdef notyet
tnat.nz p6,p7=val1 // test NaT on val1
-#else
- tnat.z p7,p6=val1 // test NaT on val1
-#endif
(p6) br.cond.spnt.few recover// jump to recovery if val1 is NaT
;;
//
@@ -154,7 +144,7 @@
sub tmp=8,val1 // which byte in word
mov pr=saved_pr,0xffffffffffff0000
;;
- sub ret0=ret0,tmp // adjust
+ sub ret0=ret0,tmp // adjust
mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
br.ret.sptk.few rp // end of normal execution
@@ -167,8 +157,8 @@
//
// IMPORTANT:
// Please note that in the case of strlen() as opposed to strlen_user()
- // we don't use the exception mechanism, as this function is not
- // supposed to fail. If that happens it means we have a bug and the
+ // we don't use the exception mechanism, as this function is not
+ // supposed to fail. If that happens it means we have a bug and the
// code will cause of kernel fault.
//
// XXX Fixme
@@ -187,7 +177,7 @@
2:
(p6) ld8 val=[base],8 // will fail if unrecoverable fault
;;
- czx1.r val1=val // search 0 byte from right
+ czx1.r val1=val // search 0 byte from right
;;
cmp.eq p6,p0=8,val1 // val1==8 ?
(p6) br.wtop.dptk.few 2b // loop until p6 == 0
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)