patch-2.4.22 linux-2.4.22/arch/sh64/lib/page_copy.S
Next file: linux-2.4.22/arch/sh64/lib/panic.c
Previous file: linux-2.4.22/arch/sh64/lib/page_clear.S
Back to the patch index
Back to the overall index
- Lines: 78
- Date:
2003-08-25 04:44:40.000000000 -0700
- Orig file:
linux-2.4.21/arch/sh64/lib/page_copy.S
- Orig date:
1969-12-31 16:00:00.000000000 -0800
diff -urN linux-2.4.21/arch/sh64/lib/page_copy.S linux-2.4.22/arch/sh64/lib/page_copy.S
@@ -0,0 +1,77 @@
+/* Written by Richard P. Curnow, SuperH (UK) Ltd.
+
+ Tight version of mempy for the case of just copying a page.
+ Prefetch strategy empirically optimised against RTL simulations
+ of SH5-101 cut2 eval chip with Cayman board DDR memory.
+
+ Parameters:
+ r2 : source effective address (start of page)
+ r3 : destination effective address (start of page)
+
+ Always copies 4096 bytes.
+
+ Points to review.
+ * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
+ It seems like the prefetch needs to be at at least 4 lines ahead to get
+ the data into the cache in time, and the allocos contend with outstanding
+ prefetches for the same cache set, so it's better to have the numbers
+ different.
+ */
+
+ .section .text..SHmedia32,"ax"
+ .little
+
+ .balign 8
+ .global sh64_page_copy
+sh64_page_copy:
+
+ /* Copy 4096 bytes worth of data from r2 to r3.
+ Do prefetches 4 lines ahead.
+ Do alloco 2 lines ahead */
+
+ pta 1f, tr1
+ pta 2f, tr2
+ pta 3f, tr3
+ ptabs r18, tr0
+
+ ld.q r2, 0x00, r63
+ ld.q r2, 0x20, r63
+ ld.q r2, 0x40, r63
+ ld.q r2, 0x60, r63
+ alloco r3, 0x00
+ alloco r3, 0x20
+
+ movi 3968, r6
+ add r3, r6, r6
+ addi r6, 64, r7
+ addi r7, 64, r8
+ sub r2, r3, r60
+ addi r60, 8, r61
+ addi r61, 8, r62
+ addi r62, 8, r23
+ addi r60, 0x80, r22
+
+/* Minimal code size. The extra branches inside the loop don't cost much
+ because they overlap with the time spent waiting for prefetches to
+ complete. */
+1:
+ bge/u r3, r6, tr2 ! skip prefetch for last 4 lines
+ ldx.q r3, r22, r63 ! prefetch 4 lines hence
+2:
+ bge/u r3, r7, tr3 ! skip alloco for last 2 lines
+ alloco r3, 0x40 ! alloc destination line 2 lines ahead
+3:
+ ldx.q r3, r60, r36
+ ldx.q r3, r61, r37
+ ldx.q r3, r62, r38
+ ldx.q r3, r23, r39
+ st.q r3, 0, r36
+ st.q r3, 8, r37
+ st.q r3, 16, r38
+ st.q r3, 24, r39
+ addi r3, 32, r3
+ bgt/l r8, r3, tr1
+
+ blink tr0, r63 ! return
+
+
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)