patch-2.4.19 linux-2.4.19/include/asm-ia64/sn/bte_copy.h
Next file: linux-2.4.19/include/asm-ia64/sn/cdl.h
Previous file: linux-2.4.19/include/asm-ia64/sn/bte.h
Back to the patch index
Back to the overall index
- Lines: 312
- Date:
Fri Aug 2 17:39:45 2002
- Orig file:
linux-2.4.18/include/asm-ia64/sn/bte_copy.h
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/include/asm-ia64/sn/bte_copy.h linux-2.4.19/include/asm-ia64/sn/bte_copy.h
@@ -0,0 +1,311 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#ifndef _ASM_IA64_SN_BTE_COPY_H
+#define _ASM_IA64_SN_BTE_COPY_H
+
+#ident "$Revision: 1.1 $"
+
+#include <asm/sn/bte.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/pda.h>
+#include <asm/delay.h>
+
+/*
+ * BTE_LOCKING support - Undefining the following line will
+ * adapt the bte_copy code to support one bte per cpu in
+ * synchronous mode. Even if bte_copy is called with a
+ * notify address, the bte will spin and wait for the transfer
+ * to complete. By defining the following, spin_locks and
+ * busy checks are placed around the initiation of a BTE
+ * transfer and multiple bte's per cpu are supported.
+ */
+#define CONFIG_IA64_SGI_BTE_LOCKING 1
+
+/*
+ * Some macros to simplify reading.
+ *
+ * Start with macros to locate the BTE control registers.
+ */
+
+#define BTEREG_LNSTAT_ADDR (bte->bte_base_addr)
+#define BTEREG_SOURCE_ADDR (bte->bte_base_addr + IIO_IBSA0 - IIO_IBLS0)
+#define BTEREG_DEST_ADDR (bte->bte_base_addr + IIO_IBDA0 - IIO_IBLS0)
+#define BTEREG_CTRL_ADDR ((volatile char *)bte->bte_base_addr + IIO_IBCT0 - IIO_IBLS0)
+#define BTEREG_NOTIF_ADDR (bte->bte_base_addr + IIO_IBNA0 - IIO_IBLS0)
+
+/* Some macros to force the IBCT0 value valid. */
+
+#define BTE_VALID_MODES BTE_NOTIFY
+#define BTE_VLD_MODE(x) (x & BTE_VALID_MODES)
+
+// #define DEBUG_BTE
+// #define DEBUG_BTE_VERBOSE
+// #define DEBUG_TIME_BTE
+
+#ifdef DEBUG_BTE
+# define DPRINTK(x) printk x // Terse
+# ifdef DEBUG_BTE_VERBOSE
+# define DPRINTKV(x) printk x // Verbose
+# else
+# define DPRINTKV(x)
+# endif
+#else
+# define DPRINTK(x)
+# define DPRINTKV(x)
+#endif
+
+#ifdef DEBUG_TIME_BTE
+extern u64 BteSetupTime;
+extern u64 BteTransferTime;
+extern u64 BteTeardownTime;
+extern u64 BteExecuteTime;
+#endif
+
+/*
+ * bte_copy(src, dest, len, mode, notification)
+ *
+ * use the block transfer engine to move kernel
+ * memory from src to dest using the assigned mode.
+ *
+ * Paramaters:
+ * src - physical address of the transfer source.
+ * dest - physical address of the transfer destination.
+ * len - number of bytes to transfer from source to dest.
+ * mode - hardware defined. See reference information
+ * for IBCT0/1 in the SHUB Programmers Reference
+ * notification - kernel virtual address of the notification cache
+ * line. If NULL, the default is used and
+ * the bte_copy is synchronous.
+ *
+ * NOTE: This function requires src, dest, and len to
+ * be cache line aligned.
+ */
+extern __inline__ bte_result_t
+bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+{
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+ int bte_to_use;
+#endif
+
+#ifdef DEBUG_TIME_BTE
+ u64 invokeTime = 0;
+ u64 completeTime = 0;
+ u64 xferStartTime = 0;
+ u64 xferCompleteTime = 0;
+#endif
+ u64 transferSize;
+ bteinfo_t *bte;
+
+#ifdef DEBUG_TIME_BTE
+ invokeTime = ia64_get_itc();
+#endif
+
+ DPRINTK(("bte_copy (0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ src, dest, len, mode, notification));
+
+ if (len == 0) {
+ return (BTE_SUCCESS);
+ }
+
+ ASSERT(!((len & L1_CACHE_MASK) ||
+ (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)));
+
+ ASSERT(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT));
+
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+ {
+ bte_to_use = 0;
+
+ /* Attempt to lock one of the BTE interfaces */
+ while ((*pda.cpubte[bte_to_use]->
+ mostRecentNotification & IBLS_BUSY)
+ &&
+ (!(spin_trylock
+ (&(pda.cpubte[bte_to_use]->spinlock))))
+ && (bte_to_use < BTES_PER_NODE)) {
+ bte_to_use++;
+ }
+
+ if ((bte_to_use >= BTES_PER_NODE) &&
+ !(mode & BTE_WACQUIRE)) {
+ return (BTEFAIL_NOTAVAIL);
+ }
+
+ /* Wait until a bte is available. */
+ }
+ while (bte_to_use >= BTES_PER_NODE);
+
+ bte = pda.cpubte[bte_to_use];
+ DPRINTKV(("Got a lock on bte %d\n", bte_to_use));
+#else
+ /* Assuming one BTE per CPU. */
+ bte = pda.cpubte[0];
+#endif
+
+ /*
+ * The following are removed for optimization but is
+ * available in the event that the SHUB exhibits
+ * notification problems similar to the hub, bedrock et al.
+ *
+ * bte->mostRecentSrc = src;
+ * bte->mostRecentDest = dest;
+ * bte->mostRecentLen = len;
+ * bte->mostRecentMode = mode;
+ */
+ if (notification == NULL) {
+ /* User does not want to be notified. */
+ bte->mostRecentNotification = &bte->notify;
+ } else {
+ bte->mostRecentNotification = notification;
+ }
+
+ /* Calculate the number of cache lines to transfer. */
+ transferSize = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
+
+ DPRINTKV(("Calculated transfer size of %d cache lines\n",
+ transferSize));
+
+ /* Initialize the notification to a known value. */
+ *bte->mostRecentNotification = -1L;
+
+
+ DPRINTKV(("Before, status is 0x%lx and notify is 0x%lx\n",
+ HUB_L(BTEREG_LNSTAT_ADDR),
+ *bte->mostRecentNotification));
+
+ /* Set the status reg busy bit and transfer length */
+ DPRINTKV(("IBLS - HUB_S(0x%lx, 0x%lx)\n",
+ BTEREG_LNSTAT_ADDR, IBLS_BUSY | transferSize));
+ HUB_S(BTEREG_LNSTAT_ADDR, IBLS_BUSY | transferSize);
+
+
+ DPRINTKV(("After setting status, status is 0x%lx and notify is 0x%lx\n", HUB_L(BTEREG_LNSTAT_ADDR), *bte->mostRecentNotification));
+
+ /* Set the source and destination registers */
+ DPRINTKV(("IBSA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_SOURCE_ADDR,
+ src));
+ HUB_S(BTEREG_SOURCE_ADDR, src);
+ DPRINTKV(("IBDA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_DEST_ADDR, dest));
+ HUB_S(BTEREG_DEST_ADDR, dest);
+
+
+ /* Set the notification register */
+ DPRINTKV(("IBNA - HUB_S(0x%lx, 0x%lx)\n", BTEREG_NOTIF_ADDR,
+ __pa(bte->mostRecentNotification)));
+ HUB_S(BTEREG_NOTIF_ADDR, (__pa(bte->mostRecentNotification)));
+
+
+ DPRINTKV(("Set Notify, status is 0x%lx and notify is 0x%lx\n",
+ HUB_L(BTEREG_LNSTAT_ADDR),
+ *bte->mostRecentNotification));
+
+ /* Initiate the transfer */
+ DPRINTKV(("IBCT - HUB_S(0x%lx, 0x%lx)\n", BTEREG_CTRL_ADDR, mode));
+#ifdef DEBUG_TIME_BTE
+ xferStartTime = ia64_get_itc();
+#endif
+ HUB_S(BTEREG_CTRL_ADDR, BTE_VLD_MODE(mode));
+
+ DPRINTKV(("Initiated, status is 0x%lx and notify is 0x%lx\n",
+ HUB_L(BTEREG_LNSTAT_ADDR),
+ *bte->mostRecentNotification));
+
+ // >>> Temporarily work around not getting a notification
+ // from medusa.
+ // *bte->mostRecentNotification = HUB_L(bte->bte_base_addr);
+
+ if (notification == NULL) {
+ /*
+ * Calculate our timeout
+ *
+ * What are we doing here? We are trying to determine
+ * the fastest time the BTE could have transfered our
+ * block of data. By takine the clock frequency (ticks/sec)
+ * divided by the BTE MaxT Transfer Rate (lines/sec)
+ * times the transfer size (lines), we get a tick
+ * offset from current time that the transfer should
+ * complete.
+ *
+ * Why do this? We are watching for a notification
+ * failure from the BTE. This behaviour has been
+ * seen in the SN0 and SN1 hardware on rare circumstances
+ * and is expected in SN2. By checking at the
+ * ideal transfer timeout, we minimize our time
+ * delay from hardware completing our request and
+ * our detecting the failure.
+ */
+ bte->idealTransferTimeout = jiffies +
+ (HZ / BTE_MAXT_LINES_PER_SECOND * transferSize);
+
+ while ((IBLS_BUSY & bte->notify)) {
+ /*
+ * Notification Workaround: When the max
+ * theoretical time has elapsed, read the hub
+ * status register into the notification area.
+ * This fakes the shub performing the copy.
+ */
+ if (jiffies > bte->idealTransferTimeout) {
+ bte->notify = HUB_L(bte->bte_base_addr);
+ bte->idealTransferTimeoutReached++;
+ bte->idealTransferTimeout = jiffies +
+ (HZ / BTE_MAXT_LINES_PER_SECOND *
+ (bte->notify & BTE_LEN_MASK));
+ }
+ }
+#ifdef DEBUG_TIME_BTE
+ xferCompleteTime = ia64_get_itc();
+#endif
+ if (bte->notify & IBLS_ERROR) {
+ /* >>> Need to do real error checking. */
+ transferSize = 0;
+
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+ spin_unlock(&(bte->spinlock));
+#endif
+ return (BTEFAIL_ERROR);
+ }
+
+ }
+#ifdef CONFIG_IA64_SGI_BTE_LOCKING
+ spin_unlock(&(bte->spinlock));
+#endif
+#ifdef DEBUG_TIME_BTE
+ completeTime = ia64_get_itc();
+
+ BteSetupTime = xferStartTime - invokeTime;
+ BteTransferTime = xferCompleteTime - xferStartTime;
+ BteTeardownTime = completeTime - xferCompleteTime;
+ BteExecuteTime = completeTime - invokeTime;
+#endif
+ return (BTE_SUCCESS);
+}
+
+/*
+ * Define the bte_unaligned_copy as an extern.
+ */
+extern bte_result_t bte_unaligned_copy(u64, u64, u64, u64, char *);
+
+/*
+ * The following is the prefered way of calling bte_unaligned_copy
+ * If the copy is fully cache line aligned, then bte_copy is
+ * used instead. Since bte_copy is inlined, this saves a call
+ * stack. NOTE: bte_copy is called synchronously and does block
+ * until the transfer is complete. In order to get the asynch
+ * version of bte_copy, you must perform this check yourself.
+ */
+#define BTE_UNALIGNED_COPY(src, dest, len, mode, bteBlock) \
+ if ((len & L1_CACHE_MASK) || \
+ (src & L1_CACHE_MASK) || \
+ (dest & L1_CACHE_MASK)) { \
+ bte_unaligned_copy (src, dest, len, mode, bteBlock); \
+ } else { \
+ bte_copy(src, dest, len, mode, NULL); \
+ }
+
+#endif /* _ASM_IA64_SN_BTE_COPY_H */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)