patch-2.1.34 linux/arch/sparc64/kernel/dtlb_miss.S
Next file: linux/arch/sparc64/kernel/dtlb_prot.S
Previous file: linux/arch/sparc64/kernel/devices.c
Back to the patch index
Back to the overall index
- Lines: 167
- Date:
Fri Apr 11 10:47:36 1997
- Orig file:
v2.1.33/linux/arch/sparc64/kernel/dtlb_miss.S
- Orig date:
Mon Mar 17 14:54:23 1997
diff -u --recursive --new-file v2.1.33/linux/arch/sparc64/kernel/dtlb_miss.S linux/arch/sparc64/kernel/dtlb_miss.S
@@ -1,93 +1,80 @@
-/* $Id: dtlb_miss.S,v 1.5 1997/02/25 20:00:02 jj Exp $
+/* $Id: dtlb_miss.S,v 1.11 1997/04/10 01:59:35 davem Exp $
* dtlb_miss.S: Data TLB miss code, this is included directly
* into the trap table.
*
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
- /* We are in the MMU globals, %g7 contains the physical
- * address of current->mm->pgd at all times.
- *
- * Many subtle things are done here. The high bits of
- * the virtual address missed are most easily obtained
- * from the tag target (it is at address zero in ASI_IMMU
- * so no address formation is necessary to get at this).
- * This is used to compute the pgd and pmd table offsets.
- *
- * Even more clever is that physical page zero is always
- * a page full of zeroes. This means we can just follow
- * through with all the page table traversals even if nothing
- * is mapped because we'll just do loads from page zero
- * and get yet another zero. We only need to do the check
- * for the valid bit being set in the final pte we obtain.
- *
- * Furthermore, we set the TSB base register to the address
- * zero, and we use the 8KB tsb ptr to calculate the pte
- * offset. Again it is at address zero in ASI_IMMU_TSB_8KB_PTR
- * so no address formation is necessary, saves more instructions.
- *
- * We use physical address accesses to get at the page
- * tables, and this is for two reasons. This makes it
- * impossible to take a fault while we are servicing the
- * miss. Also this physical bypass access only allocates
- * in the E-cache, and thus we prevent D-cache pollution
- * from the miss handlers probing the page tables.
- *
- * It looks very hairy and slow. But I take only 1 more
- * overhead of loads from ram than the Solaris version, and
- * my version is one instruction quicker for a true TLB miss.
- * And more importantly, all true TLB misses under Linux will be
- * serviced in _constant_ time. When using the TSB in the
- * manner it was intended to be used (like solaris does) the
- * overhead for a TLB miss is _indeterminate_ especially during
- * processes startup when the TSB is cold.
- *
- * XXX I think I can knock off two more instructions here...
- */
-
- /* I-cache line 0 */
- ldxa [%g0] ASI_DMMU, %g1 ! grab Tag Target either way
- brlz,pn %g1, 3f ! special kernel processing
- srlx %g1, 8, %g3 ! put high vaddr bits in place
-
-1:
- and %g3, %g2, %g3 ! get offset
- ldxa [%g7 + %g3] ASI_PHYS_USE_EC, %g5! load pgd
- sllx %g1, 2, %g4 ! begin pmd_offset formation
- and %g4, %g2, %g3 ! and now mask it
- ldxa [%g5 + %g3] ASI_PHYS_USE_EC, %g4! load pmd
- /* I-cache line 1 */
- ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! get 8KB pointer bits
- srlx %g1, 1, %g1 ! shift right to get pte_offset
- ldxa [%g4 + %g1] ASI_PHYS_USE_EC, %g3! load pte
- brlz,a,pt %g3, 2f ! is valid bit clear?
- stxa %g3, [%g0] ASI_DTLB_DATA_IN ! nope, load TTE into DTLB
-
- ba,a,pt %xcc, sparc64_dtlb_refbit_catch ! longer processing needed
-2:
- retry ! return from trap
-
-#define KTTE_HIGH_BITS _PAGE_VALID | _PAGE_SZ4MB
-#define KTTE_LOW_BITS _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W | _PAGE_G
-
- nop ! align next insn on cache line
-3:
- /* I-cache line 2 */
- srax %g1, 19, %g5 ! mask down high bits
- cmp %g5, -1 ! if -1 this is VMALLOC area
- be,pn %xcc, 1b ! yep
- sethi %uhi(KTTE_HIGH_BITS), %g4 ! begin pte formation
-
- sllx %g1, 23, %g1 ! begin masking for physpage
- sllx %g4, 32, %g4 ! high protection TTE bits
- or %g4, (KTTE_LOW_BITS), %g4 ! low protection TTE bits
- srlx %g1, 41, %g1 ! put physpage into place
- /* I-cache line 3 */
- or %g4, %g1, %g1 ! finish TTE computation
- stxa %g1, [%g0] ASI_DTLB_DATA_IN ! load TTE into DTLB
- retry ! return from trap
+/* The basic algorithm is:
+ *
+ * if(faulting_context != 0) {
+ * pgd = pgd_offset(current->mm.pgd, fault_address);
+ * page_table_walk_continue:
+ * pmd = pmd_offset(pgd, fault_address);
+ * pte = pte_offset(pmd, fault_address);
+ * if(pte & _PAGE_V) {
+ * tlb_load(pte, fault_address);
+ * return_from_trap();
+ * }
+ * goto longer_processing;
+ * } else {
+ * if(fault_address >= KERNBASE &&
+ * fault_address < VMALLOC_START) {
+ * tlb_load(__pa(fault_address) | PAGE_KERNEL);
+ * return_from_trap();
+ * } else {
+ * pgd = pgd_offset(swapper_pg_dir, fault_address);
+ * goto page_table_walk_continue;
+ * }
+ * }
+ *
+ * This is optimized for user TLB misses on purpose.
+ */
- nop; nop; nop; nop; nop;
+#define KERN_HIGHBITS (_PAGE_VALID | _PAGE_SZ4MB)
+#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
+#define KERN_LOWBITS_IO (_PAGE_E | _PAGE_P | _PAGE_W)
+
+ /* ICACHE line 1 */
+ /*0x00*/ ldxa [%g0] ASI_DMMU, %g1 ! Get TAG_TARGET
+ /*0x04*/ srlx %g1, 8, %g3 ! Position PGD offset
+ /*0x08*/ srlx %g1, 48, %g5 ! Shift down CONTEXT bits
+ /*0x0c*/ and %g3, %g2, %g3 ! Mask PGD offset
+ /*0x10*/ sllx %g1, 2, %g4 ! Position PMD offset
+ /*0x14*/ brz,pn %g5, 3f ! Context 0 == kernel
+ /*0x18*/ and %g4, %g2, %g4 ! Mask PMD offset
+ /*0x1c*/ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! For PTE offset
+
+ /* ICACHE line 2 */
+ /*0x20*/ ldxa [%g7 + %g3] ASI_PHYS_USE_EC, %g5 ! Load PGD
+ /*0x24*/ srlx %g1, 1, %g1 ! PTE offset
+2:/*0x28*/ ldxa [%g5 + %g4] ASI_PHYS_USE_EC, %g3 ! Load PMD
+ /*0x2c*/ ldxa [%g3 + %g1] ASI_PHYS_USE_EC, %g5 ! Load PTE
+ /*0x30*/ brlz,a,pt %g5, 1f ! Valid set?
+ /*0x34*/ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! TLB load
+ /*0x38*/ ba,a,pt %xcc, sparc64_dtlb_refbit_catch ! Nope...
+1:/*0x3c*/ retry ! Trap return
+
+3: /* ICACHE line 3 */
+ /*0x40*/ sllx %g1, 43, %g5 ! This gets >= VMALLOC_START...
+ /*0x44*/ brlz,pn %g5, 4f ! ...if now less than zero.
+ /*0x48*/ andncc %g1, 0x3ff, %g0 ! Slick trick...
+ /*0x4c*/ be,pn %xcc, 4f ! Yes, it is some PROM mapping
+ /*0x50*/ srlx %g5, 21, %g5 ! This is now physical page
+ /*0x54*/ sethi %uhi(KERN_HIGHBITS), %g1 ! Construct PTE
+ /*0x58*/ sllx %g1, 32, %g1 ! Move priv bits up
+ /*0x5c*/ or %g1, %g5, %g1 ! Or in the page
+
+ /* ICACHE line 4 */
+ /*0x60*/ or %g1, (KERN_LOWBITS), %g1 ! Set low priv bits
+ /*0x64*/ stxa %g1, [%g0] ASI_DTLB_DATA_IN ! TLB load
+ /*0x68*/ retry ! Trap return
+4:/*0x6c*/ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! For PTE offset
+ /*0x70*/ ldxa [%g6 + %g3] ASI_PHYS_USE_EC, %g5 ! Load kern PGD
+ /*0x74*/ ba,pt %xcc, 2b ! Go back up top
+ /*0x78*/ srlx %g1, 1, %g1
+ /*0x7c*/ nop
-#undef KTTE_HIGH_BITS
-#undef KTTE_LOW_BITS
+#undef KERN_HIGHBITS
+#undef KERN_LOWBITS
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov