patch-2.4.20 linux-2.4.20/arch/x86_64/kernel/mtrr.c
Next file: linux-2.4.20/arch/x86_64/kernel/nmi.c
Previous file: linux-2.4.20/arch/x86_64/kernel/msr.c
Back to the patch index
Back to the overall index
- Lines: 1315
- Date:
Thu Nov 28 15:53:12 2002
- Orig file:
linux-2.4.19/arch/x86_64/kernel/mtrr.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.19/arch/x86_64/kernel/mtrr.c linux-2.4.20/arch/x86_64/kernel/mtrr.c
@@ -0,0 +1,1314 @@
+/* x86-64 MTRR (Memory Type Range Register) driver.
+ Based largely upon arch/i386/kernel/mtrr.c
+
+ Copyright (C) 1997-2000 Richard Gooch
+ Copyright (C) 2002 Dave Jones.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ (For earlier history, see arch/i386/kernel/mtrr.c)
+ v2.00 September 2001 Dave Jones <davej@suse.de>
+ Initial rewrite for x86-64.
+ Removal of non-Intel style MTRR code.
+ v2.01 June 2002 Dave Jones <davej@suse.de>
+ Removal of redundant abstraction layer.
+ 64-bit fixes.
+ v2.02 July 2002 Dave Jones <davej@suse.de>
+ Fix gentry inconsistencies between kernel/userspace.
+ More casts to clean up warnings.
+*/
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/timer.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/proc_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#define MTRR_NEED_STRINGS
+#include <asm/mtrr.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/agp_backend.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/segment.h>
+#include <asm/bitops.h>
+#include <asm/atomic.h>
+#include <asm/msr.h>
+
+#include <asm/hardirq.h>
+#include <linux/irq.h>
+
+#define MTRR_VERSION "2.02 (20020716)"
+
+#undef Dprintk
+
+#define Dprintk(...)
+
+#define TRUE 1
+#define FALSE 0
+
+#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
+#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+
+#define MTRR_CHANGE_MASK_FIXED 0x01
+#define MTRR_CHANGE_MASK_VARIABLE 0x02
+#define MTRR_CHANGE_MASK_DEFTYPE 0x04
+
+typedef u8 mtrr_type;
+
+#define LINE_SIZE 80
+
+#ifdef CONFIG_SMP
+#define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type)
+#else
+#define set_mtrr(reg,base,size,type) set_mtrr_up (reg, base, size, type, TRUE)
+#endif
+
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_DEVFS_FS)
+#define USERSPACE_INTERFACE
+#endif
+
+#ifdef USERSPACE_INTERFACE
+static char *ascii_buffer;
+static unsigned int ascii_buf_bytes;
+static void compute_ascii (void);
+#else
+#define compute_ascii() while (0)
+#endif
+
+static unsigned int *usage_table;
+static DECLARE_MUTEX (mtrr_lock);
+
+struct set_mtrr_context {
+ u32 deftype_lo;
+ u32 deftype_hi;
+ unsigned long flags;
+ u64 cr4val;
+};
+
+
+/* Put the processor into a state where MTRRs can be safely set */
+static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
+{
+ u64 cr0;
+
+ /* Disable interrupts locally */
+ __save_flags(ctxt->flags);
+ __cli();
+
+ /* Save value of CR4 and clear Page Global Enable (bit 7) */
+ if (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) {
+ ctxt->cr4val = read_cr4();
+ write_cr4(ctxt->cr4val & ~(1UL << 7));
+ }
+
+ /* Disable and flush caches. Note that wbinvd flushes the TLBs as
+ a side-effect */
+ cr0 = read_cr0() | 0x40000000;
+ wbinvd();
+ write_cr0(cr0);
+ wbinvd();
+
+ /* Disable MTRRs, and set the default type to uncached */
+ rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
+ wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi);
+}
+
+
+/* Restore the processor after a set_mtrr_prepare */
+static void set_mtrr_done (struct set_mtrr_context *ctxt)
+{
+ /* Flush caches and TLBs */
+ wbinvd();
+
+ /* Restore MTRRdefType */
+ wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
+
+ /* Enable caches */
+ write_cr0(read_cr0() & 0xbfffffff);
+
+ /* Restore value of CR4 */
+ if (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability))
+ write_cr4 (ctxt->cr4val);
+
+ /* Re-enable interrupts locally (if enabled previously) */
+ __restore_flags(ctxt->flags);
+}
+
+
+/* This function returns the number of variable MTRRs */
+static unsigned int get_num_var_ranges (void)
+{
+ u32 config, dummy;
+
+ rdmsr (MSR_MTRRcap, config, dummy);
+ return (config & 0xff);
+}
+
+
+/* Returns non-zero if we have the write-combining memory type */
+static int have_wrcomb (void)
+{
+ u32 config, dummy;
+
+ rdmsr (MSR_MTRRcap, config, dummy);
+ return (config & (1 << 10));
+}
+
+
+static u64 size_or_mask, size_and_mask;
+
+static void get_mtrr (unsigned int reg, u64 *base, u32 *size, mtrr_type * type)
+{
+ u32 mask_lo, mask_hi, base_lo, base_hi;
+ u64 newsize;
+
+ rdmsr (MSR_MTRRphysMask(reg), mask_lo, mask_hi);
+ if ((mask_lo & 0x800) == 0) {
+ /* Invalid (i.e. free) range */
+ *base = 0;
+ *size = 0;
+ *type = 0;
+ return;
+ }
+
+ rdmsr (MSR_MTRRphysBase(reg), base_lo, base_hi);
+
+ /* Work out the shifted address mask. */
+ newsize = (u64) mask_hi << 32 | (mask_lo & ~0x800);
+ newsize = ~newsize+1;
+ *size = (u32) newsize >> PAGE_SHIFT;
+ *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *type = base_lo & 0xff;
+}
+
+
+
+/*
+ * Set variable MTRR register on the local CPU.
+ * <reg> The register to set.
+ * <base> The base address of the region.
+ * <size> The size of the region. If this is 0 the region is disabled.
+ * <type> The type of the region.
+ * <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+ * be done externally.
+ */
+static void set_mtrr_up (unsigned int reg, u64 base,
+ u32 size, mtrr_type type, int do_safe)
+{
+ struct set_mtrr_context ctxt;
+ u64 base64;
+ u64 size64;
+
+ if (do_safe)
+ set_mtrr_prepare (&ctxt);
+
+ if (size == 0) {
+ /* The invalid bit is kept in the mask, so we simply clear the
+ relevant mask register to disable a range. */
+ wrmsr (MSR_MTRRphysMask(reg), 0, 0);
+ } else {
+ base64 = (base << PAGE_SHIFT) & size_and_mask;
+ wrmsr (MSR_MTRRphysBase(reg), base64 | type, base64 >> 32);
+
+ size64 = ~((size << PAGE_SHIFT) - 1);
+ size64 = size64 & size_and_mask;
+ wrmsr (MSR_MTRRphysMask(reg), (u32) (size64 | 0x800), (u32) (size64 >> 32));
+ }
+ if (do_safe)
+ set_mtrr_done (&ctxt);
+}
+
+
+#ifdef CONFIG_SMP
+
+struct mtrr_var_range {
+ u32 base_lo;
+ u32 base_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+};
+
+/* Get the MSR pair relating to a var range */
+static void __init get_mtrr_var_range (unsigned int index,
+ struct mtrr_var_range *vr)
+{
+ rdmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi);
+ rdmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi);
+}
+
+
+/* Set the MSR pair relating to a var range. Returns TRUE if
+ changes are made */
+static int __init set_mtrr_var_range_testing (unsigned int index,
+ struct mtrr_var_range *vr)
+{
+ u32 lo, hi;
+ int changed = FALSE;
+
+ rdmsr (MSR_MTRRphysBase(index), lo, hi);
+ if ((vr->base_lo & 0xfffff0ff) != (lo & 0xfffff0ff) ||
+ (vr->base_hi & 0x000fffff) != (hi & 0x000fffff)) {
+ wrmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi);
+ changed = TRUE;
+ }
+
+ rdmsr (MSR_MTRRphysMask(index), lo, hi);
+ if ((vr->mask_lo & 0xfffff800) != (lo & 0xfffff800) ||
+ (vr->mask_hi & 0x000fffff) != (hi & 0x000fffff)) {
+ wrmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi);
+ changed = TRUE;
+ }
+ return changed;
+}
+
+
+static void __init get_fixed_ranges (mtrr_type * frs)
+{
+ u32 *p = (u32 *) frs;
+ int i;
+
+ rdmsr (MSR_MTRRfix64K_00000, p[0], p[1]);
+
+ for (i = 0; i < 2; i++)
+ rdmsr (MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
+ for (i = 0; i < 8; i++)
+ rdmsr (MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
+}
+
+
+static int __init set_fixed_ranges_testing (mtrr_type * frs)
+{
+ u32 *p = (u32 *) frs;
+ int changed = FALSE;
+ int i;
+ u32 lo, hi;
+
+ Dprintk (KERN_INFO "mtrr: rdmsr 64K_00000\n");
+ rdmsr (MSR_MTRRfix64K_00000, lo, hi);
+ if (p[0] != lo || p[1] != hi) {
+ Dprintk (KERN_INFO "mtrr: Writing %x:%x to 64K MSR. lohi were %x:%x\n", p[0], p[1], lo, hi);
+ wrmsr (MSR_MTRRfix64K_00000, p[0], p[1]);
+ changed = TRUE;
+ }
+
+ Dprintk (KERN_INFO "mtrr: rdmsr 16K_80000\n");
+ for (i = 0; i < 2; i++) {
+ rdmsr (MSR_MTRRfix16K_80000 + i, lo, hi);
+ if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
+ Dprintk (KERN_INFO "mtrr: Writing %x:%x to 16K MSR%d. lohi were %x:%x\n", p[2 + i * 2], p[3 + i * 2], i, lo, hi );
+ wrmsr (MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
+ changed = TRUE;
+ }
+ }
+
+ Dprintk (KERN_INFO "mtrr: rdmsr 4K_C0000\n");
+ for (i = 0; i < 8; i++) {
+ rdmsr (MSR_MTRRfix4K_C0000 + i, lo, hi);
+ Dprintk (KERN_INFO "mtrr: MTRRfix4K_C0000+%d = %x:%x\n", i, lo, hi);
+ if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
+ Dprintk (KERN_INFO "mtrr: Writing %x:%x to 4K MSR%d. lohi were %x:%x\n", p[6 + i * 2], p[7 + i * 2], i, lo, hi);
+ wrmsr (MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
+ changed = TRUE;
+ }
+ }
+ return changed;
+}
+
+
+struct mtrr_state {
+ unsigned int num_var_ranges;
+ struct mtrr_var_range *var_ranges;
+ mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+ mtrr_type def_type;
+ unsigned char enabled;
+};
+
+
+/* Grab all of the MTRR state for this CPU into *state */
+static void __init get_mtrr_state (struct mtrr_state *state)
+{
+ unsigned int nvrs, i;
+ struct mtrr_var_range *vrs;
+ u32 lo, dummy;
+
+ nvrs = state->num_var_ranges = get_num_var_ranges();
+ vrs = state->var_ranges
+ = kmalloc (nvrs * sizeof (struct mtrr_var_range), GFP_KERNEL);
+ if (vrs == NULL)
+ nvrs = state->num_var_ranges = 0;
+
+ for (i = 0; i < nvrs; i++)
+ get_mtrr_var_range (i, &vrs[i]);
+ get_fixed_ranges (state->fixed_ranges);
+
+ rdmsr (MSR_MTRRdefType, lo, dummy);
+ state->def_type = (lo & 0xff);
+ state->enabled = (lo & 0xc00) >> 10;
+}
+
+
+/* Free resources associated with a struct mtrr_state */
+static void __init finalize_mtrr_state (struct mtrr_state *state)
+{
+ if (state->var_ranges)
+ kfree (state->var_ranges);
+}
+
+
+/*
+ * Set the MTRR state for this CPU.
+ * <state> The MTRR state information to read.
+ * <ctxt> Some relevant CPU context.
+ * [NOTE] The CPU must already be in a safe state for MTRR changes.
+ * [RETURNS] 0 if no changes made, else a mask indication what was changed.
+ */
+static u64 __init set_mtrr_state (struct mtrr_state *state,
+ struct set_mtrr_context *ctxt)
+{
+ unsigned int i;
+ u64 change_mask = 0;
+
+ for (i = 0; i < state->num_var_ranges; i++)
+ if (set_mtrr_var_range_testing (i, &state->var_ranges[i]))
+ change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+
+ if (set_fixed_ranges_testing (state->fixed_ranges))
+ change_mask |= MTRR_CHANGE_MASK_FIXED;
+ /* Set_mtrr_restore restores the old value of MTRRdefType,
+ so to set it we fiddle with the saved value */
+ if ((ctxt->deftype_lo & 0xff) != state->def_type
+ || ((ctxt->deftype_lo & 0xc00) >> 10) != state->enabled) {
+ ctxt->deftype_lo |= (state->def_type | state->enabled << 10);
+ change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
+ }
+
+ return change_mask;
+}
+
+
+static atomic_t undone_count;
+static volatile int wait_barrier_execute = FALSE;
+static volatile int wait_barrier_cache_enable = FALSE;
+
+struct set_mtrr_data {
+ u64 smp_base;
+ u32 smp_size;
+ unsigned int smp_reg;
+ mtrr_type smp_type;
+};
+
+/*
+ * Synchronisation handler. Executed by "other" CPUs.
+ */
+static void ipi_handler (void *info)
+{
+ struct set_mtrr_data *data = info;
+ struct set_mtrr_context ctxt;
+
+ set_mtrr_prepare (&ctxt);
+ /* Notify master that I've flushed and disabled my cache */
+ atomic_dec (&undone_count);
+ while (wait_barrier_execute)
+ barrier ();
+
+ /* The master has cleared me to execute */
+ set_mtrr_up (data->smp_reg, data->smp_base, data->smp_size,
+ data->smp_type, FALSE);
+
+ /* Notify master CPU that I've executed the function */
+ atomic_dec (&undone_count);
+
+ /* Wait for master to clear me to enable cache and return */
+ while (wait_barrier_cache_enable)
+ barrier ();
+ set_mtrr_done (&ctxt);
+}
+
+
+static void set_mtrr_smp (unsigned int reg, u64 base, u32 size, mtrr_type type)
+{
+ struct set_mtrr_data data;
+ struct set_mtrr_context ctxt;
+
+ data.smp_reg = reg;
+ data.smp_base = base;
+ data.smp_size = size;
+ data.smp_type = type;
+ wait_barrier_execute = TRUE;
+ wait_barrier_cache_enable = TRUE;
+ atomic_set (&undone_count, smp_num_cpus - 1);
+
+ /* Start the ball rolling on other CPUs */
+ if (smp_call_function (ipi_handler, &data, 1, 0) != 0)
+ panic ("mtrr: timed out waiting for other CPUs\n");
+
+ /* Flush and disable the local CPU's cache */
+ set_mtrr_prepare (&ctxt);
+
+ /* Wait for all other CPUs to flush and disable their caches */
+ while (atomic_read (&undone_count) > 0)
+ barrier ();
+
+ /* Set up for completion wait and then release other CPUs to change MTRRs */
+ atomic_set (&undone_count, smp_num_cpus - 1);
+ wait_barrier_execute = FALSE;
+ set_mtrr_up (reg, base, size, type, FALSE);
+
+ /* Now wait for other CPUs to complete the function */
+ while (atomic_read (&undone_count) > 0)
+ barrier ();
+
+ /* Now all CPUs should have finished the function. Release the barrier to
+ allow them to re-enable their caches and return from their interrupt,
+ then enable the local cache and return */
+ wait_barrier_cache_enable = FALSE;
+ set_mtrr_done (&ctxt);
+}
+
+
+/* Some BIOS's are fucked and don't set all MTRRs the same! */
+static void __init mtrr_state_warn (u32 mask)
+{
+ if (!mask)
+ return;
+ if (mask & MTRR_CHANGE_MASK_FIXED)
+ printk (KERN_INFO "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+ if (mask & MTRR_CHANGE_MASK_VARIABLE)
+ printk (KERN_INFO "mtrr: your CPUs had inconsistent variable MTRR settings\n");
+ if (mask & MTRR_CHANGE_MASK_DEFTYPE)
+ printk (KERN_INFO "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+ printk (KERN_INFO "mtrr: probably your BIOS does not setup all CPUs\n");
+}
+
+#endif /* CONFIG_SMP */
+
+
+static inline char * attrib_to_str (int x)
+{
+ return (x <= 6) ? mtrr_strings[x] : "?";
+}
+
+
+static void __init init_table (void)
+{
+ int i, max;
+
+ max = get_num_var_ranges ();
+ if ((usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL))==NULL) {
+ printk ("mtrr: could not allocate\n");
+ return;
+ }
+
+ for (i = 0; i < max; i++)
+ usage_table[i] = 1;
+
+#ifdef USERSPACE_INTERFACE
+ if ((ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL)) == NULL) {
+ printk ("mtrr: could not allocate\n");
+ return;
+ }
+ ascii_buf_bytes = 0;
+ compute_ascii ();
+#endif
+}
+
+
+/*
+ * Get a free MTRR.
+ * returns the index of the region on success, else -1 on error.
+*/
+static int get_free_region(void)
+{
+ int i, max;
+ mtrr_type ltype;
+ u64 lbase;
+ u32 lsize;
+
+ max = get_num_var_ranges ();
+ for (i = 0; i < max; ++i) {
+ get_mtrr (i, &lbase, &lsize, <ype);
+ if (lsize == 0)
+ return i;
+ }
+ return -ENOSPC;
+}
+
+
+/**
+ * mtrr_add_page - Add a memory type region
+ * @base: Physical base address of region in pages (4 KB)
+ * @size: Physical size of region in pages (4 KB)
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
+ * Returns The MTRR register on success, else a negative number
+ * indicating the error code.
+ *
+ * Memory type region registers control the caching on newer
+ * processors. This function allows drivers to request an MTRR is added.
+ * The caller should expect to need to provide a power of two size on
+ * an equivalent power of two boundary.
+ *
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
+ *
+ * On a multiprocessor machine the changes are made to all processors.
+ *
+ * The available types are
+ *
+ * %MTRR_TYPE_UNCACHABLE - No caching
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ *
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
+{
+ int i, max;
+ mtrr_type ltype;
+ u64 lbase, last;
+ u32 lsize;
+
+ if (base + size < 0x100) {
+ printk (KERN_WARNING
+ "mtrr: cannot set region below 1 MiB (0x%Lx000,0x%x000)\n",
+ base, size);
+ return -EINVAL;
+ }
+
+#if 0 && defined(__x86_64__) && defined(CONFIG_AGP)
+ {
+ agp_kern_info info;
+ if (type != MTRR_TYPE_UNCACHABLE && agp_copy_info(&info) >= 0 &&
+ base<<PAGE_SHIFT >= info.aper_base &&
+ (base<<PAGE_SHIFT)+(size<<PAGE_SHIFT) >=
+ info.aper_base+info.aper_size*1024*1024)
+ printk(KERN_INFO "%s[%d] setting conflicting mtrr into agp aperture\n",current->comm,current->pid);
+ }
+#endif
+
+ /* Check upper bits of base and last are equal and lower bits are 0
+ for base and 1 for last */
+ last = base + size - 1;
+ for (lbase = base; !(lbase & 1) && (last & 1);
+ lbase = lbase >> 1, last = last >> 1) ;
+
+ if (lbase != last) {
+ printk (KERN_WARNING
+ "mtrr: base(0x%Lx000) is not aligned on a size(0x%x000) boundary\n",
+ base, size);
+ return -EINVAL;
+ }
+
+ if (type >= MTRR_NUM_TYPES) {
+ printk ("mtrr: type: %u illegal\n", type);
+ return -EINVAL;
+ }
+
+ /* If the type is WC, check that this processor supports it */
+ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
+ printk (KERN_WARNING
+ "mtrr: your processor doesn't support write-combining\n");
+ return -ENOSYS;
+ }
+
+ if (base & (size_or_mask>>PAGE_SHIFT)) {
+ printk (KERN_WARNING "mtrr: base(%Lx) exceeds the MTRR width(%Lx)\n",
+ base, (size_or_mask>>PAGE_SHIFT));
+ return -EINVAL;
+ }
+
+ if (size & (size_or_mask>>PAGE_SHIFT)) {
+ printk (KERN_WARNING "mtrr: size exceeds the MTRR width\n");
+ return -EINVAL;
+ }
+
+ increment = increment ? 1 : 0;
+ max = get_num_var_ranges ();
+ /* Search for existing MTRR */
+ down (&mtrr_lock);
+ for (i = 0; i < max; ++i) {
+ get_mtrr (i, &lbase, &lsize, <ype);
+ if (base >= lbase + lsize)
+ continue;
+ if ((base < lbase) && (base + size <= lbase))
+ continue;
+
+ /* At this point we know there is some kind of overlap/enclosure */
+ if ((base < lbase) || (base + size > lbase + lsize)) {
+ up (&mtrr_lock);
+ printk (KERN_WARNING
+ "mtrr: 0x%Lx000,0x%x000 overlaps existing"
+ " 0x%Lx000,0x%x000\n", base, size, lbase, lsize);
+ return -EINVAL;
+ }
+ /* New region is enclosed by an existing region */
+ if (ltype != type) {
+ if (type == MTRR_TYPE_UNCACHABLE)
+ continue;
+ up (&mtrr_lock);
+ printk
+ ("mtrr: type mismatch for %Lx000,%x000 old: %s new: %s\n",
+ base, size,
+ attrib_to_str (ltype),
+ attrib_to_str (type));
+ return -EINVAL;
+ }
+ if (increment)
+ ++usage_table[i];
+ compute_ascii ();
+ up (&mtrr_lock);
+ return i;
+ }
+ /* Search for an empty MTRR */
+ i = get_free_region();
+ if (i < 0) {
+ up (&mtrr_lock);
+ printk ("mtrr: no more MTRRs available\n");
+ return i;
+ }
+ set_mtrr (i, base, size, type);
+ usage_table[i] = 1;
+ compute_ascii ();
+ up (&mtrr_lock);
+ return i;
+}
+
+
+/**
+ * mtrr_add - Add a memory type region
+ * @base: Physical base address of region
+ * @size: Physical size of region
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
+ * Return the MTRR register on success, else a negative numbe
+ * indicating the error code.
+ *
+ * Memory type region registers control the caching on newer processors.
+ * This function allows drivers to request an MTRR is added.
+ * The caller should expect to need to provide a power of two size on
+ * an equivalent power of two boundary.
+ *
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
+ *
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
+ *
+ * The available types are
+ *
+ * %MTRR_TYPE_UNCACHABLE - No caching
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ *
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add (u64 base, u32 size, unsigned int type, char increment)
+{
+ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+ printk ("mtrr: size and base must be multiples of 4 kiB\n");
+ printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
+ return -EINVAL;
+ }
+ return mtrr_add_page (base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+ increment);
+}
+
+
+/**
+ * mtrr_del_page - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
+ *
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
+ */
+
+int mtrr_del_page (int reg, u64 base, u32 size)
+{
+ int i, max;
+ mtrr_type ltype;
+ u64 lbase;
+ u32 lsize;
+
+ max = get_num_var_ranges ();
+ down (&mtrr_lock);
+ if (reg < 0) {
+ /* Search for existing MTRR */
+ for (i = 0; i < max; ++i) {
+ get_mtrr (i, &lbase, &lsize, <ype);
+ if (lbase == base && lsize == size) {
+ reg = i;
+ break;
+ }
+ }
+ if (reg < 0) {
+ up (&mtrr_lock);
+ printk ("mtrr: no MTRR for %Lx000,%x000 found\n", base, size);
+ return -EINVAL;
+ }
+ }
+
+ if (reg >= max) {
+ up (&mtrr_lock);
+ printk ("mtrr: register: %d too big\n", reg);
+ return -EINVAL;
+ }
+ get_mtrr (reg, &lbase, &lsize, <ype);
+
+ if (lsize < 1) {
+ up (&mtrr_lock);
+ printk ("mtrr: MTRR %d not used\n", reg);
+ return -EINVAL;
+ }
+
+ if (usage_table[reg] < 1) {
+ up (&mtrr_lock);
+ printk ("mtrr: reg: %d has count=0\n", reg);
+ return -EINVAL;
+ }
+
+ if (--usage_table[reg] < 1)
+ set_mtrr (reg, 0, 0, 0);
+ compute_ascii ();
+ up (&mtrr_lock);
+ return reg;
+}
+
+
+/**
+ * mtrr_del - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
+ *
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
+ */
+
+int mtrr_del (int reg, u64 base, u32 size)
+{
+ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+ printk ("mtrr: size and base must be multiples of 4 kiB\n");
+ printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
+ return -EINVAL;
+ }
+ return mtrr_del_page (reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+
+#ifdef USERSPACE_INTERFACE
+
+static int mtrr_file_add (u64 base, u32 size, unsigned int type,
+ struct file *file, int page)
+{
+ int reg, max;
+ unsigned int *fcount = file->private_data;
+
+ max = get_num_var_ranges ();
+ if (fcount == NULL) {
+ if ((fcount =
+ kmalloc (max * sizeof *fcount, GFP_KERNEL)) == NULL) {
+ printk ("mtrr: could not allocate\n");
+ return -ENOMEM;
+ }
+ memset (fcount, 0, max * sizeof *fcount);
+ file->private_data = fcount;
+ }
+
+ if (!page) {
+ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+ printk
+ (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
+ printk (KERN_INFO "mtrr: size: 0x%x base: 0x%Lx\n", size, base);
+ return -EINVAL;
+ }
+ base >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+ }
+
+ reg = mtrr_add_page (base, size, type, 1);
+
+ if (reg >= 0)
+ ++fcount[reg];
+ return reg;
+}
+
+
+static int mtrr_file_del (u64 base, u32 size,
+ struct file *file, int page)
+{
+ int reg;
+ unsigned int *fcount = file->private_data;
+
+ if (!page) {
+ if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+ printk
+ (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
+ printk (KERN_INFO "mtrr: size: 0x%x base: 0x%Lx\n", size, base);
+ return -EINVAL;
+ }
+ base >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+ }
+ reg = mtrr_del_page (-1, base, size);
+ if (reg < 0)
+ return reg;
+ if (fcount == NULL)
+ return reg;
+ if (fcount[reg] < 1)
+ return -EINVAL;
+ --fcount[reg];
+ return reg;
+}
+
+
+static ssize_t mtrr_read (struct file *file, char *buf, size_t len,
+ loff_t * ppos)
+{
+ if (*ppos >= ascii_buf_bytes)
+ return 0;
+
+ if (*ppos + len > ascii_buf_bytes)
+ len = ascii_buf_bytes - *ppos;
+
+ if (copy_to_user (buf, ascii_buffer + *ppos, len))
+ return -EFAULT;
+
+ *ppos += len;
+ return len;
+}
+
+
+static ssize_t mtrr_write (struct file *file, const char *buf,
+ size_t len, loff_t * ppos)
+/* Format of control line:
+ "base=%Lx size=%Lx type=%s" OR:
+ "disable=%d"
+*/
+{
+ int i, err, reg;
+ u64 base;
+ u32 size;
+ char *ptr;
+ char line[LINE_SIZE];
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* Can't seek (pwrite) on this device */
+ if (ppos != &file->f_pos)
+ return -ESPIPE;
+ memset (line, 0, LINE_SIZE);
+
+ if (len > LINE_SIZE)
+ len = LINE_SIZE;
+
+ if (copy_from_user (line, buf, len - 1))
+ return -EFAULT;
+ ptr = line + strlen (line) - 1;
+
+ if (*ptr == '\n')
+ *ptr = '\0';
+
+ if (!strncmp (line, "disable=", 8)) {
+ reg = simple_strtoul (line + 8, &ptr, 0);
+ err = mtrr_del_page (reg, 0, 0);
+ if (err < 0)
+ return err;
+ return len;
+ }
+
+ if (strncmp (line, "base=", 5)) {
+ printk (KERN_INFO "mtrr: no \"base=\" in line: \"%s\"\n", line);
+ return -EINVAL;
+ }
+
+ base = simple_strtoull (line + 5, &ptr, 0);
+
+ for (; isspace (*ptr); ++ptr) ;
+
+ if (strncmp (ptr, "size=", 5)) {
+ printk (KERN_INFO "mtrr: no \"size=\" in line: \"%s\"\n", line);
+ return -EINVAL;
+ }
+
+ size = simple_strtoull (ptr + 5, &ptr, 0);
+
+ if ((base & 0xfff) || (size & 0xfff)) {
+ printk (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
+ printk (KERN_INFO "mtrr: size: 0x%x base: 0x%Lx\n", size, base);
+ return -EINVAL;
+ }
+
+ for (; isspace (*ptr); ++ptr) ;
+
+ if (strncmp (ptr, "type=", 5)) {
+ printk (KERN_INFO "mtrr: no \"type=\" in line: \"%s\"\n", line);
+ return -EINVAL;
+ }
+ ptr += 5;
+
+ for (; isspace (*ptr); ++ptr) ;
+
+ for (i = 0; i < MTRR_NUM_TYPES; ++i) {
+ if (strcmp (ptr, mtrr_strings[i]))
+ continue;
+ base >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+ err = mtrr_add_page ((u64) base, size, i, 1);
+ if (err < 0)
+ return err;
+ return len;
+ }
+ printk (KERN_INFO "mtrr: illegal type: \"%s\"\n", ptr);
+ return -EINVAL;
+}
+
+
+static int mtrr_ioctl (struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ int err;
+ mtrr_type type;
+ struct mtrr_sentry sentry;
+ struct mtrr_gentry gentry;
+
+ switch (cmd) {
+ default:
+ return -ENOIOCTLCMD;
+
+ case MTRRIOC_ADD_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_file_add (sentry.base, sentry.size, sentry.type,
+ file, 0);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_SET_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_add (sentry.base, sentry.size, sentry.type, 0);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_DEL_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_file_del (sentry.base, sentry.size, file, 0);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_KILL_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_del (-1, sentry.base, sentry.size);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_GET_ENTRY:
+ if (copy_from_user (&gentry, (void *) arg, sizeof gentry))
+ return -EFAULT;
+ if (gentry.regnum >= get_num_var_ranges ())
+ return -EINVAL;
+ get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type);
+
+ /* Hide entries that go above 4GB */
+ if (gentry.base + gentry.size > 0x100000
+ || gentry.size == 0x100000)
+ gentry.base = gentry.size = gentry.type = 0;
+ else {
+ gentry.base <<= PAGE_SHIFT;
+ gentry.size <<= PAGE_SHIFT;
+ gentry.type = type;
+ }
+
+ if (copy_to_user ((void *) arg, &gentry, sizeof gentry))
+ return -EFAULT;
+ break;
+
+ case MTRRIOC_ADD_PAGE_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_file_add (sentry.base, sentry.size, sentry.type, file, 1);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_SET_PAGE_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_add_page (sentry.base, sentry.size, sentry.type, 0);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_DEL_PAGE_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_file_del (sentry.base, sentry.size, file, 1);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_KILL_PAGE_ENTRY:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
+ return -EFAULT;
+ err = mtrr_del_page (-1, sentry.base, sentry.size);
+ if (err < 0)
+ return err;
+ break;
+
+ case MTRRIOC_GET_PAGE_ENTRY:
+ if (copy_from_user (&gentry, (void *) arg, sizeof gentry))
+ return -EFAULT;
+ if (gentry.regnum >= get_num_var_ranges ())
+ return -EINVAL;
+ get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type);
+ gentry.type = type;
+
+ if (copy_to_user ((void *) arg, &gentry, sizeof gentry))
+ return -EFAULT;
+ break;
+ }
+ return 0;
+}
+
+
+static int mtrr_close (struct inode *ino, struct file *file)
+{
+ int i, max;
+ unsigned int *fcount = file->private_data;
+
+ if (fcount == NULL)
+ return 0;
+
+ lock_kernel ();
+ max = get_num_var_ranges ();
+ for (i = 0; i < max; ++i) {
+ while (fcount[i] > 0) {
+ if (mtrr_del (i, 0, 0) < 0)
+ printk ("mtrr: reg %d not used\n", i);
+ --fcount[i];
+ }
+ }
+ unlock_kernel ();
+ kfree (fcount);
+ file->private_data = NULL;
+ return 0;
+}
+
+
+static struct file_operations mtrr_fops = {
+ owner: THIS_MODULE,
+ read: mtrr_read,
+ write: mtrr_write,
+ ioctl: mtrr_ioctl,
+ release:mtrr_close,
+};
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_root_mtrr;
+#endif
+
+static devfs_handle_t devfs_handle;
+
+static void compute_ascii (void)
+{
+ char factor;
+ int i, max;
+ mtrr_type type;
+ u64 base;
+ u32 size;
+
+ ascii_buf_bytes = 0;
+ max = get_num_var_ranges ();
+ for (i = 0; i < max; i++) {
+ get_mtrr (i, &base, &size, &type);
+ if (size == 0)
+ usage_table[i] = 0;
+ else {
+ if (size < (0x100000 >> PAGE_SHIFT)) {
+ /* less than 1MB */
+ factor = 'K';
+ size <<= PAGE_SHIFT - 10;
+ } else {
+ factor = 'M';
+ size >>= 20 - PAGE_SHIFT;
+ }
+ sprintf (ascii_buffer + ascii_buf_bytes,
+ "reg%02i: base=0x%05Lx000 (%4iMB), size=%4i%cB: %s, count=%d\n",
+ i, base, (u32) base >> (20 - PAGE_SHIFT), size, factor,
+ attrib_to_str (type), usage_table[i]);
+ ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes);
+ }
+ }
+ devfs_set_file_size (devfs_handle, ascii_buf_bytes);
+#ifdef CONFIG_PROC_FS
+ if (proc_root_mtrr)
+ proc_root_mtrr->size = ascii_buf_bytes;
+#endif
+}
+
+#endif /* USERSPACE_INTERFACE */
+
+EXPORT_SYMBOL (mtrr_add);
+EXPORT_SYMBOL (mtrr_del);
+
+
+static void __init mtrr_setup (void)
+{
+ printk ("mtrr: v%s)\n", MTRR_VERSION);
+
+ if (test_bit (X86_FEATURE_MTRR, boot_cpu_data.x86_capability)) {
+ /* Query the width (in bits) of the physical
+ addressable memory on the Hammer family. */
+ if ((cpuid_eax (0x80000000) >= 0x80000008)) {
+ u32 phys_addr;
+ phys_addr = cpuid_eax (0x80000008) & 0xff;
+ size_or_mask = ~((1L << phys_addr) - 1);
+ /*
+ * top bits MBZ as its beyond the addressable range.
+ * bottom bits MBZ as we don't care about lower 12 bits of addr.
+ */
+ size_and_mask = (~size_or_mask) & 0x000ffffffffff000L;
+ }
+ }
+}
+
+#ifdef CONFIG_SMP
+
+static volatile u32 smp_changes_mask __initdata = 0;
+static struct mtrr_state smp_mtrr_state __initdata = { 0, 0 };
+
+void __init mtrr_init_boot_cpu (void)
+{
+ mtrr_setup();
+ get_mtrr_state (&smp_mtrr_state);
+}
+
+
+void __init mtrr_init_secondary_cpu (void)
+{
+ u64 mask;
+ int count;
+ struct set_mtrr_context ctxt;
+
+ /* Note that this is not ideal, since the cache is only flushed/disabled
+ for this CPU while the MTRRs are changed, but changing this requires
+ more invasive changes to the way the kernel boots */
+ set_mtrr_prepare (&ctxt);
+ mask = set_mtrr_state (&smp_mtrr_state, &ctxt);
+ set_mtrr_done (&ctxt);
+
+ /* Use the atomic bitops to update the global mask */
+ for (count = 0; count < sizeof mask * 8; ++count) {
+ if (mask & 0x01)
+ set_bit (count, &smp_changes_mask);
+ mask >>= 1;
+ }
+}
+
+#endif /* CONFIG_SMP */
+
+
+int __init mtrr_init (void)
+{
+#ifdef CONFIG_SMP
+ /* mtrr_setup() should already have been called from mtrr_init_boot_cpu() */
+
+ finalize_mtrr_state (&smp_mtrr_state);
+ mtrr_state_warn (smp_changes_mask);
+#else
+ mtrr_setup();
+#endif
+
+#ifdef CONFIG_PROC_FS
+ proc_root_mtrr = create_proc_entry ("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+ if (proc_root_mtrr) {
+ proc_root_mtrr->owner = THIS_MODULE;
+ proc_root_mtrr->proc_fops = &mtrr_fops;
+ }
+#endif
+#ifdef CONFIG_DEVFS_FS
+ devfs_handle = devfs_register (NULL, "cpu/mtrr", DEVFS_FL_DEFAULT, 0, 0,
+ S_IFREG | S_IRUGO | S_IWUSR,
+ &mtrr_fops, NULL);
+#endif
+ init_table ();
+ return 0;
+}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)