patch-2.4.20 linux-2.4.20/include/asm-parisc/checksum.h
Next file: linux-2.4.20/include/asm-parisc/current.h
Previous file: linux-2.4.20/include/asm-parisc/cache.h
Back to the patch index
Back to the overall index
- Lines: 163
- Date:
Thu Nov 28 15:53:15 2002
- Orig file:
linux-2.4.19/include/asm-parisc/checksum.h
- Orig date:
Tue Dec 5 12:29:39 2000
diff -urN linux-2.4.19/include/asm-parisc/checksum.h linux-2.4.20/include/asm-parisc/checksum.h
@@ -61,32 +61,31 @@
unsigned int sum;
- __asm__ __volatile__ ("
- ldws,ma 4(%1), %0
- addi -4, %2, %2
- comib,>= 0, %2, 2f
-
- ldws,ma 4(%1), %%r19
- add %0, %%r19, %0
- ldws,ma 4(%1), %%r19
- addc %0, %%r19, %0
- ldws,ma 4(%1), %%r19
- addc %0, %%r19, %0
-1: ldws,ma 4(%1), %%r19
- addib,<> -1, %2, 1b
- addc %0, %%r19, %0
- addc %0, %%r0, %0
-
- zdepi -1, 31, 16, %%r19
- and %0, %%r19, %%r20
- extru %0, 15, 16, %%r21
- add %%r20, %%r21, %0
- and %0, %%r19, %%r20
- extru %0, 15, 16, %%r21
- add %%r20, %%r21, %0
- subi -1, %0, %0
-2:
- "
+ __asm__ __volatile__ (
+" ldws,ma 4(%1), %0\n"
+" addi -4, %2, %2\n"
+" comib,>= 0, %2, 2f\n"
+"\n"
+" ldws,ma 4(%1), %%r19\n"
+" add %0, %%r19, %0\n"
+" ldws,ma 4(%1), %%r19\n"
+" addc %0, %%r19, %0\n"
+" ldws,ma 4(%1), %%r19\n"
+" addc %0, %%r19, %0\n"
+"1: ldws,ma 4(%1), %%r19\n"
+" addib,<> -1, %2, 1b\n"
+" addc %0, %%r19, %0\n"
+" addc %0, %%r0, %0\n"
+"\n"
+" zdepi -1, 31, 16, %%r19\n"
+" and %0, %%r19, %%r20\n"
+" extru %0, 15, 16, %%r21\n"
+" add %%r20, %%r21, %0\n"
+" and %0, %%r19, %%r20\n"
+" extru %0, 15, 16, %%r21\n"
+" add %%r20, %%r21, %0\n"
+" subi -1, %0, %0\n"
+"2:\n"
: "=r" (sum), "=r" (iph), "=r" (ihl)
: "1" (iph), "2" (ihl)
: "r19", "r20", "r21" );
@@ -99,9 +98,12 @@
*/
static inline unsigned int csum_fold(unsigned int sum)
{
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return ~sum;
+ /* add the swapped two 16-bit halves of sum,
+ a possible carry from adding the two 16-bit halves,
+ will carry from the lower half into the upper half,
+ giving us the correct sum in the upper half. */
+ sum += (sum << 16) + (sum >> 16);
+ return (~sum) >> 16;
}
static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
@@ -110,11 +112,11 @@
unsigned short proto,
unsigned int sum)
{
- __asm__("
- add %1, %0, %0
- addc %2, %0, %0
- addc %3, %0, %0
- addc %%r0, %0, %0 "
+ __asm__(
+ " add %1, %0, %0\n"
+ " addc %2, %0, %0\n"
+ " addc %3, %0, %0\n"
+ " addc %%r0, %0, %0\n"
: "=r" (sum)
: "r" (daddr), "r"(saddr), "r"((proto<<16)+len), "0"(sum));
return sum;
@@ -141,6 +143,7 @@
return csum_fold (csum_partial(buf, len, 0));
}
+
#define _HAVE_ARCH_IPV6_CSUM
static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
struct in6_addr *daddr,
@@ -148,7 +151,62 @@
unsigned short proto,
unsigned int sum)
{
- BUG();
+ __asm__ __volatile__ (
+
+#if BITS_PER_LONG > 32
+
+ /*
+ ** We can execute two loads and two adds per cycle on PA 8000.
+ ** But add insn's get serialized waiting for the carry bit.
+ ** Try to keep 4 registers with "live" values ahead of the ALU.
+ */
+
+" ldd,ma 8(%1), %%r19\n" /* get 1st saddr word */
+" ldd,ma 8(%2), %%r20\n" /* get 1st daddr word */
+" add %8, %3, %3\n"/* add 16-bit proto + len */
+" add %%r19, %0, %0\n"
+" ldd,ma 8(%1), %%r21\n" /* 2cd saddr */
+" ldd,ma 8(%2), %%r22\n" /* 2cd daddr */
+" add,dc %%r20, %0, %0\n"
+" add,dc %%r21, %0, %0\n"
+" add,dc %%r22, %0, %0\n"
+" add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */
+" extrd,u %0, 31, 32, %%r19\n" /* copy upper half down */
+" depdi 0, 31, 32, %0\n" /* clear upper half */
+" add %%r19, %0, %0\n" /* fold into 32-bits */
+" addc 0, %0, %0\n" /* add carry */
+
+#else
+
+ /*
+ ** For PA 1.x, the insn order doesn't matter as much.
+ ** Insn stream is serialized on the carry bit here too.
+ ** result from the previous operation (eg r0 + x)
+ */
+
+" ldw,ma 4(%1), %%r19\n" /* get 1st saddr word */
+" ldw,ma 4(%2), %%r20\n" /* get 1st daddr word */
+" add %8, %3, %3\n" /* add 16-bit proto + len */
+" add %%r19, %0, %0\n"
+" ldw,ma 4(%1), %%r21\n" /* 2cd saddr */
+" addc %%r20, %0, %0\n"
+" ldw,ma 4(%2), %%r22\n" /* 2cd daddr */
+" addc %%r21, %0, %0\n"
+" ldw,ma 4(%1), %%r19\n" /* 3rd saddr */
+" addc %%r22, %0, %0\n"
+" ldw,ma 4(%2), %%r20\n" /* 3rd daddr */
+" addc %%r19, %0, %0\n"
+" ldw,ma 4(%1), %%r21\n" /* 4th saddr */
+" addc %%r20, %0, %0\n"
+" ldw,ma 4(%2), %%r22\n" /* 4th daddr */
+" addc %%r21, %0, %0\n"
+" addc %%r22, %0, %0\n"
+" addc %3, %0, %0\n" /* fold in proto+len, catch carry */
+
+#endif
+ : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len)
+ : "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto)
+ : "r19", "r20", "r21", "r22");
return csum_fold(sum);
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)