patch-2.1.38 linux/mm/slab.c
Next file: linux/net/TUNABLE
Previous file: linux/mm/mmap.c
Back to the patch index
Back to the overall index
- Lines: 2878
- Date:
Wed May 14 15:01:21 1997
- Orig file:
v2.1.37/linux/mm/slab.c
- Orig date:
Tue May 13 22:41:20 1997
diff -u --recursive --new-file v2.1.37/linux/mm/slab.c linux/mm/slab.c
@@ -1,8 +1,81 @@
/*
* linux/mm/slab.c
- * Written by Mark Hemment, 1996.
+ * Written by Mark Hemment, 1996/97.
* (markhe@nextd.demon.co.uk)
+ *
+ * 11 April '97. Started multi-threading - markhe
+ * The global cache-chain is protected by the semaphore 'cache_chain_sem'.
+ * The sem is only needed when accessing/extending the cache-chain, which
+ * can never happen inside an interrupt (kmem_cache_create(),
+ * kmem_cache_shrink() and kmem_cache_reap()).
+ * This is a medium-term exclusion lock.
+ *
+ * Each cache has its own lock; 'c_spinlock'. This lock is needed only
+ * when accessing non-constant members of a cache-struct.
+ * Note: 'constant members' are assigned a value in kmem_cache_create() before
+ * the cache is linked into the cache-chain. The values never change, so not
+ * even a multi-reader lock is needed for these members.
+ * The c_spinlock is only ever held for a few cycles.
+ *
+ * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
+ * maybe be sleeping and therefore not holding the semaphore/lock), the
+ * c_growing field is used. This also prevents reaping from a cache.
+ *
+ * Note, caches can _never_ be destroyed. When a sub-system (eg module) has
+ * finished with a cache, it can only be shrunk. This leaves the cache empty,
+ * but already enabled for re-use, eg. during a module re-load.
+ *
+ * Notes:
+ * o Constructors/deconstructors are called while the cache-lock
+ * is _not_ held. Therefore they _must_ be threaded.
+ * o Constructors must not attempt to allocate memory from the
+ * same cache that they are a constructor for - infinite loop!
+ * (There is no easy way to trap this.)
+ * o The per-cache locks must be obtained with local-interrupts disabled.
+ * o When compiled with debug support, and an object-verify (upon release)
+ * is request for a cache, the verify-function is called with the cache
+ * lock held. This helps debugging.
+ * o The functions called from try_to_free_page() must not attempt
+ * to allocate memory from a cache which is being grown.
+ * The buffer sub-system might try to allocate memory, via buffer_cachep.
+ * As this pri is passed to the SLAB, and then (if necessary) onto the
+ * gfp() funcs (which avoid calling try_to_free_page()), no deadlock
+ * should happen.
+ *
+ * The positioning of the per-cache lock is tricky. If the lock is
+ * placed on the same h/w cache line as commonly accessed members
+ * the number of L1 cache-line faults is reduced. However, this can
+ * lead to the cache-line ping-ponging between processors when the
+ * lock is in contention (and the common members are being accessed).
+ * Decided to keep it away from common members.
+ *
+ * More fine-graining is possible, with per-slab locks...but this might be
+ * taking fine graining too far, but would have the advantage;
+ * During most allocs/frees no writes occur to the cache-struct.
+ * Therefore a multi-reader/one writer lock could be used (the writer
+ * needed when the slab chain is being link/unlinked).
+ * As we would not have an exclusion lock for the cache-structure, one
+ * would be needed per-slab (for updating s_free ptr, and/or the contents
+ * of s_index).
+ * The above locking would allow parallel operations to different slabs within
+ * the same cache with reduced spinning.
+ *
+ * Per-engine slab caches, backed by a global cache (as in Mach's Zone allocator),
+ * would allow most allocations from the same cache to execute in parallel.
+ *
+ * At present, each engine can be growing a cache. This should be blocked.
+ *
+ * It is not currently 100% safe to examine the page_struct outside of a kernel
+ * or global cli lock. The risk is v. small, and non-fatal.
+ *
+ * Calls to printk() are not 100% safe (the function is not threaded). However,
+ * printk() is only used under an error condition, and the risk is v. small (not
+ * sure if the console write functions 'enjoy' executing multiple contextes in
+ * parallel. I guess they don't...).
+ * Note, for most calls to printk() any held cache-lock is dropped. This is not
+ * always done for text size reasons - having *_unlock() everywhere is bloat.
*/
+
/*
* An implementation of the Slab Allocator as described in outline in;
* UNIX Internals: The New Frontiers by Uresh Vahalia
@@ -10,157 +83,251 @@
* or with a little more detail in;
* The Slab Allocator: An Object-Caching Kernel Memory Allocator
* Jeff Bonwick (Sun Microsystems).
- * Presented at: USENIX Summer 1994 Technical Conference
+ * Presented at: USENIX Summer 1994 Technical Conference
+ */
+
+/*
+ * This implementation deviates from Bonwick's paper as it
+ * does not use a hash-table for large objects, but rather a per slab
+ * index to hold the bufctls. This allows the bufctl structure to
+ * be small (one word), but limits the number of objects a slab (not
+ * a cache) can contain when off-slab bufctls are used. The limit is the
+ * size of the largest general-cache that does not use off-slab bufctls,
+ * divided by the size of a bufctl. For 32bit archs, is this 256/4 = 64.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general-cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
*/
-#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/config.h>
#include <linux/init.h>
-#include <asm/system.h>
-#include <asm/cache.h>
-/* SLAB_MGMT_CHECKS - define to enable extra checks in
- * kmem_cache_[create|destroy|shrink].
- * If you're not messing around with these funcs, then undef this.
- * SLAB_HIGH_PACK - define to allow 'bufctl's to be stored within objs that do not
- * have a state. This allows more objs per slab, but removes the
- * ability to sanity check an addr on release (if the addr is
- * within any slab, anywhere, kmem_cache_free() will accept it!).
- * SLAB_DEBUG_SUPPORT - when defined, kmem_cache_create() will honour; SLAB_DEBUG_FREE,
- * SLAB_DEBUG_INITIAL and SLAB_RED_ZONE.
- */
-#define SLAB_MGMT_CHECKS
-#undef SLAB_HIGH_PACK
-#define SLAB_DEBUG_SUPPORT /* undef this when your cache is stable */
-
-#define BYTES_PER_WORD sizeof(void *)
-
-/* legal flag mask for kmem_cache_create() */
-#if defined(SLAB_DEBUG_SUPPORT)
-#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_HWCACHE_ALIGN|SLAB_RED_ZONE)
-#else
-#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN)
-#endif /* SLAB_DEBUG_SUPPORT */
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/smp_lock.h>
+#include <asm/spinlock.h>
-/* Magic num for red zoning.
- * Placed in the first word after the end of an obj
+/* If there is a different PAGE_SIZE around, and it works with this allocator,
+ * then change the following.
*/
-#define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */
-#define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
+#if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
+#error Your page size is probably not correctly supported - please check
+#endif
-/* Used for linking objs within a slab. How much of the struct is
- * used, and where its placed, depends on the packing used in a cache.
- * Don't mess with the order!
- */
-typedef struct kmem_bufctl_s {
- struct kmem_bufctl_s *buf_nextp;
- struct kmem_slab_s *buf_slabp;
- void *buf_objp; /* start of obj */
- struct kmem_bufctl_s *buf_hnextp;
- struct kmem_bufctl_s **buf_hashp;
-} kmem_bufctl_t;
+/* SLAB_MGMT_CHECKS - 1 to enable extra checks in kmem_cache_create().
+ * 0 if you wish to reduce memory usage.
+ *
+ * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to honour; SLAB_DEBUG_FREE,
+ * SLAB_DEBUG_INITIAL, SLAB_RED_ZONE & SLAB_POISION.
+ * 0 for faster, smaller, code (espically in the critical paths).
+ *
+ * SLAB_STATS - 1 to collect stats for /proc/slabinfo.
+ * 0 for faster, smaller, code (espically in the critical paths).
+ *
+ * SLAB_SELFTEST - 1 to perform a few tests, mainly for developement.
+ */
+#define SLAB_MGMT_CHECKS 1
+#define SLAB_DEBUG_SUPPORT 0
+#define SLAB_STATS 0
+#define SLAB_SELFTEST 0
-/* different portions of the bufctl are used - so need some macros */
-#define kmem_bufctl_offset(x) ((unsigned long)&((kmem_bufctl_t *)0)->x)
-#define kmem_bufctl_short_size (kmem_bufctl_offset(buf_objp))
-#define kmem_bufctl_very_short_size (kmem_bufctl_offset(buf_slabp))
+/* Shouldn't this be in a header file somewhere? */
+#define BYTES_PER_WORD sizeof(void *)
+
+/* Legal flag mask for kmem_cache_create(). */
+#if SLAB_DEBUG_SUPPORT
+#if 0
+#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
+ SLAB_POISION|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
+ SLAB_HIGH_PACK)
+#endif
+#define SLAB_C_MASK (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
+ SLAB_POISION|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
+#else
+#if 0
+#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
+#endif
+#define SLAB_C_MASK (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
+#endif /* SLAB_DEBUG_SUPPORT */
/* Slab management struct.
* Manages the objs in a slab. Placed either at the end of mem allocated
- * for the slab, or from an internal obj cache (SLAB_CFLGS_OFF_SLAB).
- * Slabs are chain into a partially ordered list. The linking ptrs must
- * be first in the struct!
- * The size of the struct is important(ish); it should align well on
- * cache line(s)
- */
+ * for a slab, or from an internal obj cache (cache_slabp).
+ * Slabs are chained into a partially ordered list; fully used first, partial
+ * next, and then fully free slabs.
+ * The first 4 members are referenced during an alloc/free operation, and
+ * should always appear on the same cache line.
+ * Note: The offset between some members _must_ match offsets within
+ * the kmem_cache_t - see kmem_cache_init() for the checks. */
+
+#define SLAB_OFFSET_BITS 16 /* could make this larger for 64bit archs */
+
typedef struct kmem_slab_s {
- struct kmem_slab_s *s_nextp;
- struct kmem_slab_s *s_prevp;
- void *s_mem; /* addr of mem allocated for slab */
- unsigned long s_jiffies;
- kmem_bufctl_t *s_freep; /* ptr to first inactive obj in slab */
- unsigned long s_flags;
- unsigned long s_magic;
- unsigned long s_inuse; /* num of objs active in slab */
+ struct kmem_bufctl_s *s_freep; /* ptr to first inactive obj in slab */
+ struct kmem_bufctl_s *s_index;
+ unsigned long s_magic;
+ unsigned long s_inuse; /* num of objs active in slab */
+
+ struct kmem_slab_s *s_nextp;
+ struct kmem_slab_s *s_prevp;
+ void *s_mem; /* addr of first obj in slab */
+ unsigned long s_offset:SLAB_OFFSET_BITS,
+ s_dma:1;
} kmem_slab_t;
-/* to test for end of slab chain */
-#define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_firstp))
+/* When the slab mgmt is on-slab, this gives the size to use. */
+#define slab_align_size (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
+
+/* Test for end of slab chain. */
+#define kmem_slab_end(x) ((kmem_slab_t*)&((x)->c_offset))
/* s_magic */
-#define SLAB_MAGIC_ALLOC 0xA5C32F2BUL
-#define SLAB_MAGIC_UNALLOC 0xB2F23C5AUL
+#define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* slab is alive */
+#define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* slab has been destoryed */
-/* s_flags */
-#define SLAB_SFLGS_DMA 0x000001UL /* slab's mem can do DMA */
+/* Bufctl's are used for linking objs within a slab, identifying what slab an obj
+ * is in, and the address of the associated obj (for sanity checking with off-slab
+ * bufctls). What a bufctl contains depends upon the state of the obj and
+ * the organisation of the cache.
+ */
+typedef struct kmem_bufctl_s {
+ union {
+ struct kmem_bufctl_s *buf_nextp;
+ kmem_slab_t *buf_slabp; /* slab for obj */
+ void * buf_objp;
+ } u;
+} kmem_bufctl_t;
-/* cache struct - manages a cache.
- * c_lastp must appear immediately after c_firstp!
+/* ...shorthand... */
+#define buf_nextp u.buf_nextp
+#define buf_slabp u.buf_slabp
+#define buf_objp u.buf_objp
+
+#if SLAB_DEBUG_SUPPORT
+/* Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define SLAB_RED_MAGIC1 0x5A2CF071UL /* when obj is active */
+#define SLAB_RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
+
+/* ...and for poisioning */
+#define SLAB_POISION_BYTE 0x5a /* byte value for poisioning */
+#define SLAB_POISION_END 0xa5 /* end-byte of poisioning */
+
+#endif /* SLAB_DEBUG_SUPPORT */
+
+/* Cache struct - manages a cache.
+ * First four members are commonly referenced during an alloc/free operation.
*/
struct kmem_cache_s {
kmem_slab_t *c_freep; /* first slab w. free objs */
- unsigned long c_flags;
+ unsigned long c_flags; /* constant flags */
unsigned long c_offset;
- struct kmem_bufctl_s **c_hashp; /* ptr for off-slab bufctls */
- kmem_slab_t *c_firstp; /* first slab in chain */
- kmem_slab_t *c_lastp; /* last slab in chain */
- unsigned long c_hashbits;
unsigned long c_num; /* # of objs per slab */
- unsigned long c_gfporder; /* order of pgs per slab (2^n) */
- unsigned long c_org_size;
+
unsigned long c_magic;
unsigned long c_inuse; /* kept at zero */
- void (*c_ctor)(void *, int, unsigned long); /* constructor func */
- void (*c_dtor)(void *, int, unsigned long); /* de-constructor func */
+ kmem_slab_t *c_firstp; /* first slab in chain */
+ kmem_slab_t *c_lastp; /* last slab in chain */
+
+ spinlock_t c_spinlock;
+ unsigned long c_growing;
+ unsigned long c_dflags; /* dynamic flags */
+ size_t c_org_size;
+ unsigned long c_gfporder; /* order of pgs per slab (2^n) */
+ void (*c_ctor)(void *, kmem_cache_t *, unsigned long); /* constructor func */
+ void (*c_dtor)(void *, kmem_cache_t *, unsigned long); /* de-constructor func */
unsigned long c_align; /* alignment of objs */
- unsigned long c_colour; /* cache colouring range */
- unsigned long c_colour_next;/* cache colouring */
+ size_t c_colour; /* cache colouring range */
+ size_t c_colour_next;/* cache colouring */
+ unsigned long c_failures;
const char *c_name;
struct kmem_cache_s *c_nextp;
+ kmem_cache_t *c_index_cachep;
+#if SLAB_STATS
+ unsigned long c_num_active;
+ unsigned long c_num_allocations;
+ unsigned long c_high_mark;
+ unsigned long c_grown;
+ unsigned long c_reaped;
+ atomic_t c_errors;
+#endif /* SLAB_STATS */
};
-/* magic # for c_magic - used to detect out-of-slabs in __kmem_cache_alloc() */
-#define SLAB_C_MAGIC 0x4F17A36DUL
-
/* internal c_flags */
#define SLAB_CFLGS_OFF_SLAB 0x010000UL /* slab mgmt in own cache */
#define SLAB_CFLGS_BUFCTL 0x020000UL /* bufctls in own cache */
-#define SLAB_CFLGS_RELEASED 0x040000UL /* cache is/being destroyed */
+#define SLAB_CFLGS_GENERAL 0x080000UL /* a general-cache */
-#if defined(SLAB_HIGH_PACK)
-#define SLAB_CFLGS_PTR_IN_OBJ 0x080000UL /* free ptr in obj */
-#endif
+/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
+#define SLAB_CFLGS_GROWN 0x000002UL /* don't reap a recently grown */
#define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB)
#define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL)
-#define SLAB_RELEASED(x) ((x) & SLAB_CFLGS_RELEASED)
-#if defined(SLAB_HIGH_PACK)
-#define SLAB_PTR_IN_OBJ(x) ((x) & SLAB_CFLGS_PTR_IN_OBJ)
+#define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN)
+
+#if SLAB_STATS
+#define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++)
+#define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--)
+#define SLAB_STATS_INC_ALLOCED(x) ((x)->c_num_allocations++)
+#define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++)
+#define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++)
+#define SLAB_STATS_SET_HIGH(x) do { if ((x)->c_num_active > (x)->c_high_mark) \
+ (x)->c_high_mark = (x)->c_num_active; \
+ } while (0)
+#define SLAB_STATS_INC_ERR(x) (atomic_inc(&(x)->c_errors))
#else
-#define SLAB_PTR_IN_OBJ(x) (0)
+#define SLAB_STATS_INC_ACTIVE(x)
+#define SLAB_STATS_DEC_ACTIVE(x)
+#define SLAB_STATS_INC_ALLOCED(x)
+#define SLAB_STATS_INC_GROWN(x)
+#define SLAB_STATS_INC_REAPED(x)
+#define SLAB_STATS_SET_HIGH(x)
+#define SLAB_STATS_INC_ERR(x)
+#endif /* SLAB_STATS */
+
+#if SLAB_SELFTEST
+#if !SLAB_DEBUG_SUPPORT
+#error Debug support needed for self-test
#endif
+static void kmem_self_test(void);
+#endif /* SLAB_SELFTEST */
+
+/* c_magic - used to detect 'out of slabs' in __kmem_cache_alloc() */
+#define SLAB_C_MAGIC 0x4F17A36DUL
/* maximum size of an obj (in 2^order pages) */
#define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */
-/* maximum num of pages for a slab (avoids trying to ask for too may contigious pages) */
+/* maximum num of pages for a slab (prevents large requests to the VM layer) */
#define SLAB_MAX_GFP_ORDER 5 /* 32 pages */
/* the 'prefered' minimum num of objs per slab - maybe less for large objs */
#define SLAB_MIN_OBJS_PER_SLAB 4
-/* if the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
- * then the page order must be less than this before trying the next order
+/* If the num of objs per slab is <= SLAB_MIN_OBJS_PER_SLAB,
+ * then the page order must be less than this before trying the next order.
*/
#define SLAB_BREAK_GFP_ORDER 2
-/* size of hash tables for caches which use off-slab bufctls (SLAB_CFLGS_BUFCTL) */
-#define KMEM_HASH_SIZE 128
+/* Macros for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. With off-slab bufctls, these are used to find the
+ * slab an obj belongs to. With kmalloc(), and kfree(), these are used
+ * to find the cache which an obj belongs to.
+ */
+#define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x))
+#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next)
+#define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x))
+#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev)
-/* size description struct for general-caches */
+/* Size description struct for general-caches. */
typedef struct cache_sizes {
- unsigned long cs_size;
+ size_t cs_size;
kmem_cache_t *cs_cachep;
} cache_sizes_t;
@@ -176,177 +343,177 @@
{2048, NULL},
{4096, NULL},
{8192, NULL},
-#if PAGE_SIZE == 8192
{16384, NULL},
-#endif
+ {32768, NULL},
+ {65536, NULL},
+ {131072, NULL},
{0, NULL}
};
-/* Names for the general-caches.
- * Not placed into the sizes struct for a good reason; the
- * string ptr is not needed while searching in kmem_alloc()/
- * kmem_free(), and would 'get-in-the-way' - think about it.
+/* Names for the general-caches. Not placed into the sizes struct for
+ * a good reason; the string ptr is not needed while searching in kmalloc(),
+ * and would 'get-in-the-way' in the h/w cache.
*/
static char *cache_sizes_name[] = {
#if PAGE_SIZE == 4096
- "cache-32",
+ "size-32",
#endif
- "cache-64",
- "cache-128",
- "cache-256",
- "cache-512",
- "cache-1024",
- "cache-2048",
- "cache-4096",
-#if PAGE_SIZE == 4096
- "cache-8192"
-#elif PAGE_SIZE == 8192
- "cache-8192",
- "cache-16384"
-#else
-#error Your page size is not supported for the general-caches - please fix
-#endif
-};
-
-static void kmem_hash_ctor(void *ptr, int , unsigned long); /* fwd ref */
-extern kmem_cache_t cache_cache; /* fwd ref */
-
-/* internal cache of hash objs, only used when bufctls are off-slab */
-static kmem_cache_t cache_hash = {
-/* freep, flags */ kmem_slab_end(&cache_hash), 0,
-/* offset, hashp */ sizeof(kmem_bufctl_t*)*KMEM_HASH_SIZE, NULL,
-/* firstp, lastp */ kmem_slab_end(&cache_hash), kmem_slab_end(&cache_hash),
-/* hashbits, num, gfporder */ 0, 0, 0,
-/* org_size, magic */ sizeof(kmem_bufctl_t*)*KMEM_HASH_SIZE, SLAB_C_MAGIC,
-/* inuse, ctor, dtor, align */ 0, kmem_hash_ctor, NULL, L1_CACHE_BYTES,
-/* colour, colour_next */ 0, 0,
-/* name, nextp */ "hash_cache", &cache_cache
-};
-
-/* internal cache of freelist mgmnt objs, only use when bufctls are off-slab */
-static kmem_cache_t cache_bufctl = {
-/* freep, flags */ kmem_slab_end(&cache_bufctl), 0,
-/* offset, hashp */ sizeof(kmem_bufctl_t), NULL,
-/* firstp, lastp */ kmem_slab_end(&cache_bufctl), kmem_slab_end(&cache_bufctl),
-/* hashbits, num, gfporder */ 0, 0, 0,
-/* org_size, magic */ sizeof(kmem_bufctl_t), SLAB_C_MAGIC,
-/* inuse, ctor, dtor, align */ 0, NULL, NULL, BYTES_PER_WORD*2,
-/* colour, colour_next */ 0, 0,
-/* name, nextp */ "bufctl_cache", &cache_hash
-};
-
-/* internal cache of slab mngmnt objs, only used when slab mgmt is off-slab */
-static kmem_cache_t cache_slab = {
-/* freep, flags */ kmem_slab_end(&cache_slab), 0,
-/* offset, hashp */ sizeof(kmem_slab_t), NULL,
-/* firstp, lastp */ kmem_slab_end(&cache_slab), kmem_slab_end(&cache_slab),
-/* hashbits, num, gfporder */ 0, 0, 0,
-/* org_size, magic */ sizeof(kmem_slab_t), SLAB_C_MAGIC,
-/* inuse, ctor, dtor, align */ 0, NULL, NULL, L1_CACHE_BYTES,
-/* colour, colour_next */ 0, 0,
-/* name, nextp */ "slab_cache", &cache_bufctl
+ "size-64",
+ "size-128",
+ "size-256",
+ "size-512",
+ "size-1024",
+ "size-2048",
+ "size-4096",
+ "size-8192",
+ "size-16384",
+ "size-32768",
+ "size-65536",
+ "size-131072"
};
/* internal cache of cache description objs */
static kmem_cache_t cache_cache = {
-/* freep, flags */ kmem_slab_end(&cache_cache), 0,
-/* offset, hashp */ sizeof(kmem_cache_t), NULL,
+/* freep, flags */ kmem_slab_end(&cache_cache), SLAB_NO_REAP,
+/* offset, num */ sizeof(kmem_cache_t), 0,
+/* c_magic, c_inuse */ SLAB_C_MAGIC, 0,
/* firstp, lastp */ kmem_slab_end(&cache_cache), kmem_slab_end(&cache_cache),
-/* hashbits, num, gfporder */ 0, 0, 0,
-/* org_size, magic */ sizeof(kmem_cache_t), SLAB_C_MAGIC,
-/* inuse, ctor, dtor, align */ 0, NULL, NULL, L1_CACHE_BYTES,
+/* spinlock */ SPIN_LOCK_UNLOCKED,
+/* growing */ 0,
+/* dflags */ 0,
+/* org_size, gfp */ 0, 0,
+/* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES,
/* colour, colour_next */ 0, 0,
+/* failures */ 0,
/* name */ "kmem_cache",
-/* nextp */ &cache_slab
+/* nextp */ &cache_cache,
+/* index */ NULL,
};
-/* constructor for hash tables */
-static void kmem_hash_ctor(void *ptr, int size, unsigned long flags)
-{
- memset(ptr, 0, sizeof(kmem_bufctl_t*)*KMEM_HASH_SIZE);
-}
+/* Guard access to the cache-chain. */
+static struct semaphore cache_chain_sem;
-/* place maintainer for reaping */
+/* Place maintainer for reaping. */
static kmem_cache_t *clock_searchp = &cache_cache;
-/* Init an internal cache */
-__initfunc(static void
-kmem_own_cache_init(kmem_cache_t *cachep))
-{
- unsigned long size, i;
-
- if (cachep->c_inuse || cachep->c_magic != SLAB_C_MAGIC) {
- panic("Bad init of internal cache %s", cachep->c_name);
- /* NOTREACHED */
- }
- size = cachep->c_offset + kmem_bufctl_short_size;
- i = size % cachep->c_align;
- if (i)
- size += (cachep->c_align-i);
- cachep->c_offset = size-kmem_bufctl_short_size;
-
- i = ((PAGE_SIZE<<cachep->c_gfporder)-sizeof(kmem_slab_t));
- cachep->c_num = i / size; /* num of objs per slab */
+/* Internal slab mgmt cache, for when slab mgmt is off-slab. */
+static kmem_cache_t *cache_slabp = NULL;
- /* cache colouring */
- cachep->c_colour = 1 + (i-(cachep->c_num*size))/cachep->c_align;
- cachep->c_colour_next = cachep->c_colour;
-}
+/* Max number of objs-per-slab for caches which use bufctl's.
+ * Needed to avoid a possible looping condition in kmem_cache_grow().
+ */
+static unsigned long bufctl_limit = 0;
-/* Initialisation - setup all internal caches */
-__initfunc(long
-kmem_cache_init(long start, long end))
+/* Initialisation - setup the `cache' cache. */
+__initfunc(long kmem_cache_init(long start, long end))
{
- /* sanity */
+ size_t size, i;
+
+#define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x)
+#define kmem_slab_diff(a,b) (kmem_slab_offset(a) - kmem_slab_offset(b))
#define kmem_cache_offset(x) ((unsigned long)&((kmem_cache_t *)0)->x)
-#define kmem_slab_offset(x) ((unsigned long)&((kmem_slab_t *)0)->x)
- if (((kmem_cache_offset(c_magic)-kmem_cache_offset(c_firstp)) != kmem_slab_offset(s_magic)) ||
- ((kmem_cache_offset(c_inuse)-kmem_cache_offset(c_firstp)) != kmem_slab_offset(s_inuse))) {
+#define kmem_cache_diff(a,b) (kmem_cache_offset(a) - kmem_cache_offset(b))
+
+ /* Sanity checks... */
+ if (kmem_cache_diff(c_firstp, c_magic) != kmem_slab_diff(s_nextp, s_magic) ||
+ kmem_cache_diff(c_firstp, c_inuse) != kmem_slab_diff(s_nextp, s_inuse) ||
+ ((kmem_cache_offset(c_lastp) -
+ ((unsigned long) kmem_slab_end((kmem_cache_t*)NULL))) !=
+ kmem_slab_offset(s_prevp)) ||
+ kmem_cache_diff(c_lastp, c_firstp) != kmem_slab_diff(s_prevp, s_nextp)) {
/* Offsets to the magic are incorrect, either the structures have
* been incorrectly changed, or adjustments are needed for your
* architecture.
*/
- panic("kmem_cache_init(): Offsets are different - been messed with!\n");
+ panic("kmem_cache_init(): Offsets are wrong - I've been messed with!");
/* NOTREACHED */
}
#undef kmem_cache_offset
+#undef kmem_cache_diff
#undef kmem_slab_offset
+#undef kmem_slab_diff
+
+ cache_chain_sem = MUTEX;
+
+ size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
+ size += (L1_CACHE_BYTES-1);
+ size &= ~(L1_CACHE_BYTES-1);
+ cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
+
+ i = (PAGE_SIZE<<cache_cache.c_gfporder)-slab_align_size;
+ cache_cache.c_num = i / size; /* num of objs per slab */
+
+ /* Cache colouring. */
+ cache_cache.c_colour = (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
+ cache_cache.c_colour_next = cache_cache.c_colour;
- kmem_own_cache_init(&cache_cache);
- kmem_own_cache_init(&cache_slab);
- kmem_own_cache_init(&cache_bufctl);
- kmem_own_cache_init(&cache_hash);
return start;
}
-/* Initialisation - setup general caches */
-__initfunc(void
-kmem_cache_sizes_init(void))
+/* Initialisation - setup remaining internal and general caches.
+ * Called after the gfp() functions have been enabled, and before smp_init().
+ */
+__initfunc(void kmem_cache_sizes_init(void))
{
- unsigned long i;
+ unsigned int found = 0;
- i = sizeof(cache_sizes)/sizeof(cache_sizes[0])-1;
- while (i--)
- cache_sizes[i].cs_cachep = kmem_cache_create(cache_sizes_name[i],
- cache_sizes[i].cs_size,
- 0, 0, NULL, NULL);
+ cache_slabp = kmem_cache_create("slab_cache", sizeof(kmem_slab_t),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (cache_slabp) {
+ char **names = cache_sizes_name;
+ cache_sizes_t *sizes = cache_sizes;
+ do {
+ /* For performance, all the general-caches are L1 aligned.
+ * This should be particularly beneficial on SMP boxes, as it
+ * elimantes "false sharing".
+ * Note for systems short on memory removing the alignment will
+ * allow tighter packing of the smaller caches. */
+ if (!(sizes->cs_cachep =
+ kmem_cache_create(*names++, sizes->cs_size,
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
+ goto panic_time;
+ if (!found) {
+ /* Inc off-slab bufctl limit until the ceiling is hit. */
+ if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
+ found++;
+ else
+ bufctl_limit =
+ (sizes->cs_size/sizeof(kmem_bufctl_t));
+ }
+ sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
+ sizes++;
+ } while (sizes->cs_size);
+#if SLAB_SELFTEST
+ kmem_self_test();
+#endif /* SLAB_SELFTEST */
+ return;
+ }
+panic_time:
+ panic("kmem_cache_sizes_init: Error creating caches");
+ /* NOTREACHED */
}
-/* Interface to system's page allocator.
- * dma pts to non-zero if all of the mem is suitable for DMA
+/* Interface to system's page allocator. Dma pts to non-zero if all
+ * of memory is DMAable. No need to hold the cache-lock.
*/
static inline void *
-kmem_getpages(const kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
+kmem_getpages(kmem_cache_t *cachep, unsigned long flags, unsigned int *dma)
{
- struct page *page;
void *addr;
- addr = (void*) __get_free_pages(flags & SLAB_LEVEL_MASK, \
- cachep->c_gfporder, flags & SLAB_DMA);
- *dma = 1<<cachep->c_gfporder;
- if (!(flags & SLAB_DMA) && addr) {
- /* need to check if can dma */
- page = mem_map + MAP_NR(addr);
+ *dma = flags & SLAB_DMA;
+ addr = (void*) __get_free_pages(flags & SLAB_LEVEL_MASK,
+ cachep->c_gfporder, *dma);
+ /* Assume that now we have the pages no one else can legally
+ * messes with the 'struct page's.
+ * However vm_scan() might try to test the structure to see if
+ * it is a named-page or buffer-page. The members it tests are
+ * of no interest here.....
+ */
+ if (!*dma && addr) {
+ /* Need to check if can dma. */
+ struct page *page = mem_map + MAP_NR(addr);
+ *dma = 1<<cachep->c_gfporder;
while ((*dma)--) {
if (!PageDMA(page)) {
*dma = 0;
@@ -358,58 +525,52 @@
return addr;
}
-/* Interface to system's page release */
+/* Interface to system's page release. */
static inline void
kmem_freepages(kmem_cache_t *cachep, void *addr)
{
+ unsigned long i = (1<<cachep->c_gfporder);
+ struct page *page = &mem_map[MAP_NR(addr)];
+
+ /* free_pages() does not clear the type bit - we do that.
+ * The pages have been unlinked from their cache-slab,
+ * but their 'struct page's might be accessed in
+ * vm_scan(). Shouldn't be a worry.
+ */
+ while (i--) {
+ PageClearSlab(page);
+ page++;
+ }
free_pages((unsigned long)addr, cachep->c_gfporder);
}
-/* Hashing function - used for caches with off-slab bufctls */
-static inline int
-kmem_hash(const kmem_cache_t *cachep, const void *objp)
+#if SLAB_DEBUG_SUPPORT
+static inline void
+kmem_poision_obj(kmem_cache_t *cachep, void *addr)
{
- return (((unsigned long)objp >> cachep->c_hashbits) & (KMEM_HASH_SIZE-1));
+ memset(addr, SLAB_POISION_BYTE, cachep->c_org_size);
+ *(unsigned char *)(addr+cachep->c_org_size-1) = SLAB_POISION_END;
}
-/* Link bufctl into a hash table - used for caches with off-slab bufctls
- * - called with ints disabled
- */
-static inline void *
-kmem_add_to_hash(kmem_cache_t *cachep, kmem_bufctl_t *bufp)
+static inline int
+kmem_check_poision_obj(kmem_cache_t *cachep, void *addr)
{
- kmem_bufctl_t **bufpp = bufp->buf_hashp;
-
- bufp->buf_hnextp = *bufpp;
- return (*bufpp = bufp)->buf_objp;
+ void *end;
+ end = memchr(addr, SLAB_POISION_END, cachep->c_org_size);
+ if (end != (addr+cachep->c_org_size-1))
+ return 1;
+ return 0;
}
+#endif /* SLAB_DEBUG_SUPPORT */
-/* Find bufcntl for given obj addr, and unlink.
- * - called with ints disabled
+/* Three slab chain funcs - all called with ints disabled and the appropiate
+ * cache-lock held.
*/
-static inline kmem_bufctl_t *
-kmem_remove_from_hash(kmem_cache_t *cachep, const void *objp)
-{
- kmem_bufctl_t *bufp;
- kmem_bufctl_t **bufpp = &cachep->c_hashp[kmem_hash(cachep, objp)];
-
- for (;*bufpp; bufpp = &(*bufpp)->buf_hnextp) {
- if ((*bufpp)->buf_objp != objp)
- continue;
- bufp = *bufpp;
- *bufpp = bufp->buf_hnextp;
- return bufp;
- }
- return NULL;
-}
-
-/* Three slab chain funcs - all called with ints disabled */
static inline void
kmem_slab_unlink(kmem_slab_t *slabp)
{
kmem_slab_t *prevp = slabp->s_prevp;
kmem_slab_t *nextp = slabp->s_nextp;
-
prevp->s_nextp = nextp;
nextp->s_prevp = prevp;
}
@@ -417,781 +578,881 @@
static inline void
kmem_slab_link_end(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
+ kmem_slab_t *lastp = cachep->c_lastp;
slabp->s_nextp = kmem_slab_end(cachep);
- slabp->s_prevp = cachep->c_lastp;
- kmem_slab_end(cachep)->s_prevp = slabp;
- slabp->s_prevp->s_nextp = slabp;
+ slabp->s_prevp = lastp;
+ cachep->c_lastp = slabp;
+ lastp->s_nextp = slabp;
}
static inline void
kmem_slab_link_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
kmem_slab_t *nextp = cachep->c_freep;
-
+ kmem_slab_t *prevp = nextp->s_prevp;
slabp->s_nextp = nextp;
- cachep->c_freep = slabp;
- slabp->s_prevp = nextp->s_prevp;
+ slabp->s_prevp = prevp;
nextp->s_prevp = slabp;
slabp->s_prevp->s_nextp = slabp;
}
-/* Cal the num objs, wastage, and bytes left over for a given slab size */
-static int
-kmem_cache_cal_waste(unsigned long gfporder, unsigned long size,
- unsigned long extra, unsigned long flags,
- unsigned long *left_over, unsigned long *num)
-{
- unsigned long wastage;
-
- wastage = PAGE_SIZE << gfporder;
- gfporder = 0;
- if (!SLAB_OFF_SLAB(flags))
- gfporder = sizeof(kmem_slab_t);
+/* Destroy all the objs in a slab, and release the mem back to the system.
+ * Before calling the slab must have been unlinked from the cache.
+ * The cache-lock is not held/needed.
+ */
+static void
+kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp)
+{
+ if (cachep->c_dtor
+#if SLAB_DEBUG_SUPPORT
+ || cachep->c_flags & (SLAB_POISION || SLAB_RED_ZONE)
+#endif /*SLAB_DEBUG_SUPPORT*/
+ ) {
+ /* Doesn't use the bufctl ptrs to find objs. */
+ unsigned long num = cachep->c_num;
+ void *objp = slabp->s_mem;
+ do {
+#if SLAB_DEBUG_SUPPORT
+ if (cachep->c_flags & SLAB_RED_ZONE) {
+ if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
+ printk(KERN_ERR "kmem_slab_destroy: "
+ "Bad front redzone - %s\n",
+ cachep->c_name);
+ objp += BYTES_PER_WORD;
+ if (*((unsigned long*)(objp+cachep->c_org_size)) !=
+ SLAB_RED_MAGIC1)
+ printk(KERN_ERR "kmem_slab_destroy: "
+ "Bad rear redzone - %s\n",
+ cachep->c_name);
+ }
+ if (cachep->c_dtor)
+#endif /*SLAB_DEBUG_SUPPORT*/
+ (cachep->c_dtor)(objp, cachep, 0);
+#if SLAB_DEBUG_SUPPORT
+ else if (cachep->c_flags & SLAB_POISION) {
+ if (kmem_check_poision_obj(cachep, objp))
+ printk(KERN_ERR "kmem_slab_destory: "
+ "Bad poision - %s\n", cachep->c_name);
+ }
+ if (cachep->c_flags & SLAB_RED_ZONE)
+ objp -= BYTES_PER_WORD;
+#endif /* SLAB_DEBUG_SUPPORT */
+ objp += cachep->c_offset;
+ if (!slabp->s_index)
+ objp += sizeof(kmem_bufctl_t);
+ } while (--num);
+ }
+
+ slabp->s_magic = SLAB_MAGIC_DESTROYED;
+ kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
+ if (slabp->s_index)
+ kmem_cache_free(cachep->c_index_cachep, slabp->s_index);
+ if (SLAB_OFF_SLAB(cachep->c_flags))
+ kmem_cache_free(cache_slabp, slabp);
+}
+
+/* Cal the num objs, wastage, and bytes left over for a given slab size. */
+static inline size_t
+kmem_cache_cal_waste(unsigned long gfporder, size_t size, size_t extra,
+ unsigned long flags, size_t *left_over, unsigned long *num)
+{
+ size_t wastage = PAGE_SIZE<<gfporder;
+
+ if (SLAB_OFF_SLAB(flags))
+ gfporder = 0;
+ else
+ gfporder = slab_align_size;
wastage -= gfporder;
*num = wastage / size;
wastage -= (*num * size);
*left_over = wastage;
- wastage += (extra * *num);
- wastage += gfporder;
-
- return wastage;
+ return (wastage + gfporder + (extra * *num));
}
-/* Create a cache
+/* Create a cache:
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a int, but can be interrupted.
* NOTE: The 'name' is assumed to be memory that is _not_ going to disappear.
*/
kmem_cache_t *
-kmem_cache_create(const char *name, unsigned long size, unsigned long align,
- unsigned long flags, void (*ctor)(void*, int, unsigned long),
- void (*dtor)(void*, int, unsigned long))
-{
- const char *func_nm="kmem_create: ";
- kmem_cache_t *searchp, *cachep;
- unsigned long words, i;
- unsigned long num, left_over;
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+ void (*dtor)(void*, kmem_cache_t *, unsigned long))
+{
+ const char *func_nm= KERN_ERR "kmem_create: ";
+ kmem_cache_t *searchp;
+ kmem_cache_t *cachep=NULL;
+ size_t extra;
+ size_t left_over;
+ size_t align;
- /* sanity checks */
-#if defined(SLAB_MGMT_CHECKS)
+ /* Sanity checks... */
+#if SLAB_MGMT_CHECKS
if (!name) {
- printk(KERN_ERR "%sNULL ptr\n", func_nm);
- return NULL;
+ printk("%sNULL ptr\n", func_nm);
+ goto opps;
}
if (in_interrupt()) {
- printk(KERN_ERR "%sCalled during int - %s\n", func_nm, name);
- return NULL;
+ printk("%sCalled during int - %s\n", func_nm, name);
+ goto opps;
}
- if (size < kmem_bufctl_very_short_size) {
- printk(KERN_WARNING "%sSize too small %lu - %s\n", func_nm, size, name);
- size = kmem_bufctl_very_short_size;
+ if (size < BYTES_PER_WORD) {
+ printk("%sSize too small %d - %s\n", func_nm, (int) size, name);
+ size = BYTES_PER_WORD;
}
if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
- printk(KERN_ERR "%sSize too large %lu - %s\n", func_nm, size, name);
- return NULL;
- }
-#endif /* SLAB_MGMT_CHECKS */
-
- /* always checks flags, a caller might be expecting debug support which
- * isn't available
- */
- if (flags & ~SLAB_C_MASK) {
- /* Illegal flags */
- printk(KERN_WARNING "%sIllgl flg %lX - %s\n", func_nm, flags, name);
- flags &= SLAB_C_MASK;
+ printk("%sSize too large %d - %s\n", func_nm, (int) size, name);
+ goto opps;
}
-#if defined(SLAB_MGMT_CHECKS)
- if (align < 0 || align >= size) {
- printk(KERN_WARNING "%sAlign weired %lu - %s\n", func_nm, align, name);
- align = 0;
+ if (dtor && !ctor) {
+ /* Decon, but no con - doesn't make sense */
+ printk("%sDecon but no con - %s\n", func_nm, name);
+ goto opps;
}
- if (dtor && !ctor) {
- /* Descon, but no con - doesn't make sense */
- printk(KERN_ERR "%sDecon but no con - %s\n", func_nm, name);
- return NULL;
+ if (offset < 0 || offset > size) {
+ printk("%sOffset weired %d - %s\n", func_nm, (int) offset, name);
+ offset = 0;
}
+#if SLAB_DEBUG_SUPPORT
if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
/* No constructor, but inital state check requested */
- printk(KERN_WARNING "%sNo con, but init state check requested - %s\n",
- func_nm, name);
+ printk("%sNo con, but init state check requested - %s\n", func_nm, name);
flags &= ~SLAB_DEBUG_INITIAL;
}
+
+ if ((flags & SLAB_POISION) && ctor) {
+ /* request for poisioning, but we can't do that with a constructor */
+ printk("%sPoisioning requested, but con given - %s\n", func_nm, name);
+ flags &= ~SLAB_POISION;
+ }
+#if 0
+ if ((flags & SLAB_HIGH_PACK) && ctor) {
+ printk("%sHigh pack requested, but con given - %s\n", func_nm, name);
+ flags &= ~SLAB_HIGH_PACK;
+ }
+ if ((flags & SLAB_HIGH_PACK) && (flags & (SLAB_POISION|SLAB_RED_ZONE))) {
+ printk("%sHigh pack requested, but with poisioning/red-zoning - %s\n",
+ func_nm, name);
+ flags &= ~SLAB_HIGH_PACK;
+ }
+#endif
+#endif /* SLAB_DEBUG_SUPPORT */
#endif /* SLAB_MGMT_CHECKS */
- /* get cache's description obj */
+ /* Always checks flags, a caller might be expecting debug
+ * support which isn't available.
+ */
+ if (flags & ~SLAB_C_MASK) {
+ printk("%sIllgl flg %lX - %s\n", func_nm, flags, name);
+ flags &= SLAB_C_MASK;
+ }
+
+ /* Get cache's description obj. */
cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
if (!cachep)
goto opps;
+ memset(cachep, 0, sizeof(kmem_cache_t));
- /* remember original size, so can be passed to a constructor or decon.
- * Allows the same con/decon to be used for caches of similar objs
- * that have a different size data buffer assoicated with them
+ /* Check that size is in terms of words. This is needed to avoid
+ * unaligned accesses for some archs when redzoning is used, and makes
+ * sure any on-slab bufctl's are also correctly aligned.
*/
- cachep->c_org_size = size;
+ if (size & (BYTES_PER_WORD-1)) {
+ size += (BYTES_PER_WORD-1);
+ size &= ~(BYTES_PER_WORD-1);
+ printk("%sForcing size word alignment - %s\n", func_nm, name);
+ }
-#if defined(SLAB_DEBUG_SUPPORT)
- if (flags & SLAB_RED_ZONE)
- size += BYTES_PER_WORD; /* word for redzone */
+#if SLAB_DEBUG_SUPPORT
+ if (flags & SLAB_RED_ZONE) {
+ /* There is no point trying to honour cache alignment when redzoning. */
+ flags &= ~SLAB_HWCACHE_ALIGN;
+ size += 2*BYTES_PER_WORD; /* words for redzone */
+ }
#endif /* SLAB_DEBUG_SUPPORT */
+ cachep->c_org_size = size;
- /* Make a guess if slab mngmnt obj and/or bufctls are 'on' or 'off' slab */
- i = kmem_bufctl_short_size;
+ align = BYTES_PER_WORD;
+ if (flags & SLAB_HWCACHE_ALIGN)
+ align = L1_CACHE_BYTES;
+
+ /* Determine if the slab mgmt and/or bufclts are 'on' or 'off' slab. */
+ extra = sizeof(kmem_bufctl_t);
if (size < (PAGE_SIZE>>3)) {
- /* Size is small(ish). Use format where bufctl size per
- * obj is low, and slab mngmnt is on-slab
+ /* Size is small(ish). Use packing where bufctl size per
+ * obj is low, and slab mngmnt is on-slab.
*/
- if (!ctor && !dtor && !(flags & SLAB_RED_ZONE)) {
- /* the objs in this cache have no state - can store
- * store freelist ptr within obj. (redzoning is a state)
+#if 0
+ if ((flags & SLAB_HIGH_PACK)) {
+ /* Special high packing for small objects
+ * (mainly for vm_mapping structs, but
+ * others can use it).
*/
-#if defined(SLAB_HIGH_PACK)
- i=0;
- flags |= SLAB_CFLGS_PTR_IN_OBJ;
-#else
- i = kmem_bufctl_very_short_size;
-#endif
+ if (size == (L1_CACHE_BYTES/4) || size == (L1_CACHE_BYTES/2) ||
+ size == L1_CACHE_BYTES) {
+ /* The bufctl is stored with the object. */
+ extra = 0;
+ } else
+ flags &= ~SLAB_HIGH_PACK;
}
+#endif
} else {
/* Size is large, assume best to place the slab mngmnt obj
- * off-slab (should allow better packing of objs)
+ * off-slab (should allow better packing of objs).
*/
flags |= SLAB_CFLGS_OFF_SLAB;
- if (!(size & ~PAGE_MASK) ||
- size == (PAGE_SIZE+PAGE_SIZE/2) ||
- size == (PAGE_SIZE/2) ||
- size == (PAGE_SIZE/4) ||
- size == (PAGE_SIZE/8)) {
- /* to avoid waste the bufctls are off-slab */
+ if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2)
+ || size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
+ /* To avoid waste the bufctls are off-slab... */
flags |= SLAB_CFLGS_BUFCTL;
- /* get hash table for cache */
- cachep->c_hashp = kmem_cache_alloc(&cache_hash, SLAB_KERNEL);
- if (cachep->c_hashp == NULL) {
- kmem_cache_free(&cache_cache, cachep);
- goto opps;
- }
- i = 0;
- cachep->c_hashbits = PAGE_SHIFT;
- if (size <= (PAGE_SIZE/2)) {
- cachep->c_hashbits--;
- if (size <= (PAGE_SIZE/4)) cachep->c_hashbits--;
- if (size <= (PAGE_SIZE/8)) cachep->c_hashbits -= 2;
- }
- } /* else slab mngmnt is off-slab, but freelist ptrs are on */
+ extra = 0;
+ } /* else slab mngmnt is off-slab, but freelist ptrs are on. */
}
- size += i;
-
- /* Adjust the mem used for objs so they will align correctly.
- * Force objs to start on word boundaries, but caller may specify
- * h/w cache line boundaries. This 'alignment' is slightly different
- * to the 'align' argument. Objs may be requested to start on h/w
- * lines (as that is how the members of the obj have been organised),
- * but the 'align' may be quite high (say 64) as the first 64 bytes
- * are commonly accessed/modified within a loop (stops h/w line
- * thrashing). The 'align' is the slab colouring.
- */
- words = BYTES_PER_WORD;
- if (flags & SLAB_HWCACHE_ALIGN)
- words = L1_CACHE_BYTES;
- words--;
- size += words;
- size = size & ~words;
- /* alignment might not be a factor of the boundary alignment - fix-up */
- align += words;
- align = align & ~words;
+ size += extra;
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ /* Need to adjust size so that objs are cache aligned. */
+ if (size > (L1_CACHE_BYTES/2)) {
+ size_t words = size % L1_CACHE_BYTES;
+ if (words)
+ size += (L1_CACHE_BYTES-words);
+ } else {
+ /* Small obj size, can get at least two per cache line. */
+ int num_per_line = L1_CACHE_BYTES/size;
+ left_over = L1_CACHE_BYTES - (num_per_line*size);
+ if (left_over) {
+ /* Need to adjust size so objs cache align. */
+ if (left_over%num_per_line) {
+ /* Odd num of objs per line - fixup. */
+ num_per_line--;
+ left_over += size;
+ }
+ size += (left_over/num_per_line);
+ }
+ }
+ } else if (!(size%L1_CACHE_BYTES)) {
+ /* Size happens to cache align... */
+ flags |= SLAB_HWCACHE_ALIGN;
+ align = L1_CACHE_BYTES;
+ }
/* Cal size (in pages) of slabs, and the num of objs per slab.
- * This could be made much more intelligent. */
- cachep->c_gfporder=0;
+ * This could be made much more intelligent. For now, try to avoid
+ * using high page-orders for slabs. When the gfp() funcs are more
+ * friendly towards high-order requests, this should be changed.
+ */
do {
- unsigned long wastage;
- wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, i,
- flags, &left_over, &num);
- if (!num)
+ size_t wastage;
+ unsigned int break_flag = 0;
+cal_wastage:
+ wastage = kmem_cache_cal_waste(cachep->c_gfporder, size, extra,
+ flags, &left_over, &cachep->c_num);
+ if (!cachep->c_num)
goto next;
- if (SLAB_PTR_IN_OBJ(flags))
+ if (break_flag)
break;
+ if (SLAB_BUFCTL(flags) && cachep->c_num > bufctl_limit) {
+ /* Oops, this num of objs will cause problems. */
+ cachep->c_gfporder--;
+ break_flag++;
+ goto cal_wastage;
+ }
if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
break;
- /* large num of objs is good, but v. large slabs are bad for the
- * VM sub-system
+
+ /* Large num of objs is good, but v. large slabs are currently
+ * bad for the gfp()s.
*/
- if (num <= SLAB_MIN_OBJS_PER_SLAB) {
+ if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
if (cachep->c_gfporder < SLAB_BREAK_GFP_ORDER)
goto next;
}
- /* stop caches with small objs having a large num of pages */
- if (left_over <= sizeof(kmem_slab_t))
+
+ /* Stop caches with small objs having a large num of pages. */
+ if (left_over <= slab_align_size)
break;
if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
- break; /* acceptable wastage */
+ break; /* Acceptable internal fragmentation. */
next:
cachep->c_gfporder++;
} while (1);
- cachep->c_num = num;
- /* try with requested alignment, but reduce it if that will
- * allow at least some alignment words
+ /* If the slab has been placed off-slab, and we have enough space then
+ * move it on-slab. This is at the expense of any extra colouring.
*/
- words++;
- if (left_over < align)
- align = (left_over / words) * words;
- else if (!align && words <= left_over) {
- /* no alignment given, but space enough - give one */
- align = words;
- if (words == BYTES_PER_WORD) {
- if (BYTES_PER_WORD*4 <= left_over)
- align += align;
- if (BYTES_PER_WORD*8 <= left_over)
- align += align;
+ if ((flags & SLAB_CFLGS_OFF_SLAB) && !SLAB_BUFCTL(flags) &&
+ left_over >= slab_align_size) {
+ flags &= ~SLAB_CFLGS_OFF_SLAB;
+ left_over -= slab_align_size;
+ }
+
+ /* Offset must be a factor of the alignment. */
+ offset += (align-1);
+ offset &= ~(align-1);
+
+ /* Mess around with the offset alignment. */
+ if (!left_over) {
+ offset = 0;
+ } else if (left_over < offset) {
+ offset = align;
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ if (left_over < offset)
+ offset = 0;
+ } else {
+ /* Offset is BYTES_PER_WORD, and left_over is at
+ * least BYTES_PER_WORD.
+ */
+ if (left_over >= (BYTES_PER_WORD*2)) {
+ offset >>= 1;
+ if (left_over >= (BYTES_PER_WORD*4))
+ offset >>= 1;
+ }
+ }
+ } else if (!offset) {
+ /* No offset requested, but space enough - give one. */
+ offset = left_over/align;
+ if (flags & SLAB_HWCACHE_ALIGN) {
+ if (offset >= 8) {
+ /* A large number of colours - use a larger alignment. */
+ align <<= 1;
+ }
+ } else {
+ if (offset >= 10) {
+ align <<= 1;
+ if (offset >= 16)
+ align <<= 1;
+ }
}
+ offset = align;
}
- cachep->c_align = align;
#if 0
- printk("Size:%lu Orig:%lu Left:%lu Align %lu Pages:%d - %s\n",
- size, cachep->c_org_size, left_over, align, 1<<cachep->c_gfporder, name);
- if (SLAB_OFF_SLAB(flags)) printk("OFF SLAB\n");
- if (SLAB_BUFCTL(flags)) printk("BUFCTL PTRS\n");
+printk("%s: Left_over:%d Align:%d Size:%d\n", name, left_over, offset, size);
#endif
- /* if the bufctl's are on-slab, c_offset does not inc the size of the bufctl */
+ if ((cachep->c_align = (unsigned long) offset))
+ cachep->c_colour = (left_over/offset);
+ cachep->c_colour_next = cachep->c_colour;
+
+ /* If the bufctl's are on-slab, c_offset does not include the size of bufctl. */
if (!SLAB_BUFCTL(flags))
- size -= kmem_bufctl_short_size;
+ size -= sizeof(kmem_bufctl_t);
+ else
+ cachep->c_index_cachep =
+ kmem_find_general_cachep(cachep->c_num*sizeof(kmem_bufctl_t));
+ cachep->c_offset = (unsigned long) size;
cachep->c_freep = kmem_slab_end(cachep);
- cachep->c_flags = flags;
- cachep->c_offset = size;
cachep->c_firstp = kmem_slab_end(cachep);
cachep->c_lastp = kmem_slab_end(cachep);
+ cachep->c_flags = flags;
cachep->c_ctor = ctor;
cachep->c_dtor = dtor;
cachep->c_magic = SLAB_C_MAGIC;
- cachep->c_inuse = 0; /* always zero */
- cachep->c_name = name; /* simply point to the name */
-
- cachep->c_colour = 1;
- if (align)
- cachep->c_colour += (left_over/align);
- cachep->c_colour_next = cachep->c_colour;
+ cachep->c_name = name; /* Simply point to the name. */
+ spin_lock_init(&cachep->c_spinlock);
- /* warn on dup cache names */
+ /* Need the semaphore to access the chain. */
+ down(&cache_chain_sem);
searchp = &cache_cache;
do {
+ /* The name field is constant - no lock needed. */
if (!strcmp(searchp->c_name, name)) {
- printk(KERN_WARNING "%sDup name - %s\n", func_nm, name);
+ printk("%sDup name - %s\n", func_nm, name);
break;
}
searchp = searchp->c_nextp;
} while (searchp != &cache_cache);
+
+ /* There is no reason to lock our new cache before we
+ * link it in - no one knows about it yet...
+ */
cachep->c_nextp = cache_cache.c_nextp;
cache_cache.c_nextp = cachep;
- return cachep;
+ up(&cache_chain_sem);
opps:
- printk(KERN_WARNING "%sOut of mem creating cache %s\n", func_nm, name);
- return NULL;
-}
-
-/* Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked
- */
-static void
-kmem_slab_destroy(kmem_cache_t *cachep, kmem_slab_t *slabp, unsigned long flags)
-{
- if (cachep->c_dtor || SLAB_BUFCTL(cachep->c_flags)) {
- kmem_bufctl_t *bufp = slabp->s_freep;
-
- /* for each obj in slab... */
- while (bufp) {
- kmem_bufctl_t *freep;
- if (cachep->c_dtor) {
- void *objp = ((void*)bufp)-cachep->c_offset;
- if (SLAB_BUFCTL(cachep->c_flags))
- objp = bufp->buf_objp;
- (cachep->c_dtor)(objp, cachep->c_org_size, flags);
- }
- freep = bufp;
- bufp = bufp->buf_nextp;
- if (SLAB_BUFCTL(cachep->c_flags))
- kmem_cache_free(&cache_bufctl, freep);
- }
- }
-
- slabp->s_magic = SLAB_MAGIC_UNALLOC;
- kmem_freepages(cachep, slabp->s_mem);
- if (SLAB_OFF_SLAB(cachep->c_flags))
- kmem_cache_free(&cache_slab, slabp);
-}
-
-/* Destroy (remove) a cache.
- * All objs in the cache should be inactive
- */
-int
-kmem_cache_destroy(kmem_cache_t *cachep)
-{
- kmem_cache_t **searchp;
- kmem_slab_t *slabp;
- unsigned long save_flags;
-
-#if defined(SLAB_MGMT_CHECKS)
- if (!cachep) {
- printk(KERN_ERR "kmem_dest: NULL ptr\n");
- goto err_end;
- }
-
- if (in_interrupt()) {
- printk(KERN_ERR "kmem_dest: Called during int - %s\n", cachep->c_name);
-err_end:
- return 1;
- }
-#endif /* SLAB_MGMT_CHECKS */
-
- /* unlink the cache from the chain of active caches.
- * Note: the chain is never modified during an int
- */
- searchp = &(cache_cache.c_nextp);
- for (;*searchp != &cache_cache; searchp = &((*searchp)->c_nextp)) {
- if (*searchp != cachep)
- continue;
- goto good_cache;
- }
- printk(KERN_ERR "kmem_dest: Invalid cache addr %p\n", cachep);
- return 1;
-good_cache:
- /* disable cache so attempts to allocated from an int can
- * be caught.
- */
- save_flags(save_flags);
- cli();
- if (cachep->c_freep != kmem_slab_end(cachep)) {
- restore_flags(save_flags);
- printk(KERN_ERR "kmem_dest: active cache - %s\n", cachep->c_name);
- return 2;
- }
- *searchp = cachep->c_nextp; /* remove from cache chain */
- cachep->c_flags |= SLAB_CFLGS_RELEASED;
- cachep->c_freep = kmem_slab_end(cachep);
- if (cachep == clock_searchp)
- clock_searchp = cachep->c_nextp;
- restore_flags(save_flags);
-
- while ((slabp = cachep->c_firstp) != kmem_slab_end(cachep)) {
- kmem_slab_unlink(slabp);
- kmem_slab_destroy(cachep, slabp, 0);
- }
-
- if (SLAB_BUFCTL(cachep->c_flags))
- kmem_cache_free(&cache_hash, cachep->c_hashp);
- kmem_cache_free(&cache_cache, cachep);
- return 0;
+ return cachep;
}
-/* Shrink a cache, ie. remove _all_ inactive slabs.
- * Can be called when a user of a cache knows they are not going to be
- * needing any new objs for a while.
- * NOTE: This func is probably going to disappear - let me know if you
- * are using it!
+/* Shrink a cache. Releases as many slabs as possible for a cache.
+ * It is expected this function will be called by a module when it is
+ * unloaded. The cache is _not_ removed, this creates too many problems and
+ * the cache-structure does not take up much room. A module should keep its
+ * cache pointer(s) in unloaded memory, so when reloaded it knows the cache
+ * is available. To help debugging, a zero exit status indicates all slabs
+ * were released.
*/
int
-kmem_cache_shrink(kmem_cache_t *cachep, int wait)
+kmem_cache_shrink(kmem_cache_t *cachep)
{
+ kmem_cache_t *searchp;
kmem_slab_t *slabp;
- unsigned long dtor_flags;
- unsigned long save_flags, num_freed=0;
+ int ret;
-#if defined(SLAB_MGMT_CHECKS)
if (!cachep) {
printk(KERN_ERR "kmem_shrink: NULL ptr\n");
- goto end;
+ return 2;
}
-
if (in_interrupt()) {
printk(KERN_ERR "kmem_shrink: Called during int - %s\n", cachep->c_name);
- goto end;
+ return 2;
}
-#endif /* SLAB_MGMT_CHECKS */
- dtor_flags = 0;
- if (!wait) /* not allowed to wait */
- dtor_flags = SLAB_DTOR_ATOMIC;
-
- save_flags(save_flags);
- while (0) {
- cli();
- slabp = cachep->c_lastp;
- if (slabp == kmem_slab_end(cachep) || slabp->s_inuse) {
- restore_flags(save_flags);
- goto end;
- }
- kmem_slab_unlink(slabp);
- if (cachep->c_freep == slabp)
- cachep->c_freep = kmem_slab_end(cachep);
- restore_flags(save_flags);
- num_freed++;
- kmem_slab_destroy(cachep, slabp, dtor_flags);
- }
-end:
- return num_freed;
-}
+ /* Find the cache in the chain of caches. */
+ down(&cache_chain_sem); /* Semaphore is needed. */
+ searchp = &cache_cache;
+ for (;searchp->c_nextp != &cache_cache; searchp = searchp->c_nextp) {
+ if (searchp->c_nextp != cachep)
+ continue;
-/* Search for a slab whose objs are suitable for DMA.
- * Note: since testing the first free slab (in __kmem_cache_alloc()),
- * ints must not have been enabled!
- */
-static inline kmem_slab_t *
-kmem_cache_search_dma(kmem_cache_t *cachep)
-{
- kmem_slab_t *slabp = cachep->c_freep->s_nextp;
+ /* Accessing clock_searchp is safe - we hold the mutex. */
+ if (cachep == clock_searchp)
+ clock_searchp = cachep->c_nextp;
+ goto found;
+ }
+ up(&cache_chain_sem);
+ printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n", cachep);
+ return 2;
+found:
+ /* Relase the sempahore before getting the cache-lock. This could
+ * mean multiple engines are shrinking the cache, but so what...
+ */
+ up(&cache_chain_sem);
+ spin_lock_irq(&cachep->c_spinlock);
- for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
- if (!(slabp->s_flags & SLAB_SFLGS_DMA))
- continue;
+ /* If the cache is growing, stop shrinking. */
+ while (!cachep->c_growing) {
+ slabp = cachep->c_lastp;
+ if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
+ break;
kmem_slab_unlink(slabp);
- kmem_slab_link_free(cachep, slabp);
- return slabp;
- }
- return NULL;
+ spin_unlock_irq(&cachep->c_spinlock);
+ kmem_slab_destroy(cachep, slabp);
+ spin_lock_irq(&cachep->c_spinlock);
+ }
+ ret = 1;
+ if (cachep->c_lastp == kmem_slab_end(cachep))
+ ret--; /* Cache is empty. */
+ spin_unlock_irq(&cachep->c_spinlock);
+ return ret;
}
-/* get the mem for a slab mgmt obj */
+/* Get the mem for a slab mgmt obj. */
static inline kmem_slab_t *
-kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, unsigned long local_flags, unsigned long offset)
+kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp, int local_flags)
{
kmem_slab_t *slabp;
if (SLAB_OFF_SLAB(cachep->c_flags)) {
- /* slab mngmnt obj is off-slab */
- if (!(slabp = kmem_cache_alloc(&cache_slab, local_flags)))
- return NULL;
+ /* Slab mgmt obj is off-slab. */
+ slabp = kmem_cache_alloc(cache_slabp, local_flags);
} else {
- /* slab mngmnt at end of slab mem */
- slabp = objp + (PAGE_SIZE << cachep->c_gfporder);
- slabp--;
- if (!SLAB_PTR_IN_OBJ(cachep->c_flags)) {
- /* A bit of extra help for the L1 cache; try to position the slab
- * mgmnt struct at different offsets within the gap at the end
- * of a slab. This helps avoid thrashing the h/w cache lines,
- * that map to the end of a page, too much...
- */
- unsigned long gap = cachep->c_offset;
- if (!SLAB_BUFCTL(cachep->c_flags))
- gap += kmem_bufctl_short_size;
- gap = (PAGE_SIZE << cachep->c_gfporder)-((gap*cachep->c_num)+offset+sizeof(*slabp));
- gap /= (sizeof(*slabp)/2);
- gap *= (sizeof(*slabp)/2);
- slabp = (((void*)slabp)-gap);
- }
+ /* Slab mgmnt at end of slab mem, placed so that
+ * the position is 'coloured'.
+ */
+ void *end;
+ end = objp + (cachep->c_num * cachep->c_offset);
+ if (!SLAB_BUFCTL(cachep->c_flags))
+ end += (cachep->c_num * sizeof(kmem_bufctl_t));
+ slabp = (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
}
- slabp->s_flags = slabp->s_inuse = slabp->s_jiffies = 0;
+ if (slabp) {
+ slabp->s_inuse = 0;
+ slabp->s_dma = 0;
+ slabp->s_index = NULL;
+ }
return slabp;
}
-static inline int
-kmem_cache_init_objs(kmem_cache_t *cachep, kmem_slab_t *slabp, void *objp,
- unsigned long local_flags, unsigned long ctor_flags)
+static inline void
+kmem_cache_init_objs(kmem_cache_t * cachep, kmem_slab_t * slabp, void *objp,
+ unsigned long ctor_flags)
{
kmem_bufctl_t **bufpp = &slabp->s_freep;
- unsigned long num = cachep->c_num;
+ unsigned long num = cachep->c_num-1;
do {
- if (SLAB_BUFCTL(cachep->c_flags)) {
- if (!(*bufpp = kmem_cache_alloc(&cache_bufctl, local_flags))) {
- kmem_slab_destroy(cachep, slabp, 0);
- return 1;
- }
- (*bufpp)->buf_objp = objp;
- (*bufpp)->buf_hashp = &cachep->c_hashp[kmem_hash(cachep, objp)];
+#if SLAB_DEBUG_SUPPORT
+ if (cachep->c_flags & SLAB_RED_ZONE) {
+ *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
+ objp += BYTES_PER_WORD;
+ *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
}
+#endif /* SLAB_DEBUG_SUPPORT */
+ /* Constructors are not allowed to allocate memory from the same cache
+ * which they are a constructor for. Otherwise, deadlock.
+ * They must also be threaded.
+ */
if (cachep->c_ctor)
- cachep->c_ctor(objp, cachep->c_org_size, ctor_flags);
+ cachep->c_ctor(objp, cachep, ctor_flags);
+#if SLAB_DEBUG_SUPPORT
+ else if (cachep->c_flags & SLAB_POISION) {
+ /* need to poision the objs */
+ kmem_poision_obj(cachep, objp);
+ }
-#if defined(SLAB_DEBUG_SUPPORT)
- if (cachep->c_flags & SLAB_RED_ZONE)
- *((unsigned long*)(objp+cachep->c_org_size)) = SLAB_RED_MAGIC1;
+ if (cachep->c_flags & SLAB_RED_ZONE) {
+ if (*((unsigned long*)(objp+cachep->c_org_size)) !=
+ SLAB_RED_MAGIC1) {
+ *((unsigned long*)(objp+cachep->c_org_size)) =
+ SLAB_RED_MAGIC1;
+ printk(KERN_ERR "kmem_init_obj: Bad rear redzone "
+ "after constructor - %s\n", cachep->c_name);
+ }
+ objp -= BYTES_PER_WORD;
+ if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
+ *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
+ printk(KERN_ERR "kmem_init_obj: Bad front redzone "
+ "after constructor - %s\n", cachep->c_name);
+ }
+ }
#endif /* SLAB_DEBUG_SUPPORT */
objp += cachep->c_offset;
- if (!SLAB_BUFCTL(cachep->c_flags)) {
+ if (!slabp->s_index) {
*bufpp = objp;
- objp += kmem_bufctl_short_size;
- }
- if (!SLAB_PTR_IN_OBJ(cachep->c_flags))
- (*bufpp)->buf_slabp = slabp;
+ objp += sizeof(kmem_bufctl_t);
+ } else
+ *bufpp = &slabp->s_index[num];
bufpp = &(*bufpp)->buf_nextp;
- } while (--num);
+ } while (num--);
+
*bufpp = NULL;
- return 0;
}
-/* Grow (by 1) the number of slabs within a cache.
- * This is called by kmem_cache_alloc() when there are no
- * inactive objs left in a cache
+/* Grow (by 1) the number of slabs within a cache. This is called by
+ * kmem_cache_alloc() when there are no active objs left in a cache.
*/
-static void
-kmem_cache_grow(kmem_cache_t *cachep, unsigned long flags)
+static int
+kmem_cache_grow(kmem_cache_t * cachep, int flags)
{
kmem_slab_t *slabp;
+ struct page *page;
void *objp;
- unsigned int offset, dma;
- unsigned long ctor_flags, local_flags, save_flags;
+ size_t offset;
+ unsigned int dma, local_flags;
+ unsigned long ctor_flags;
+ unsigned long save_flags;
+
+ /* Be lazy and only check for valid flags here,
+ * keeping it out of the critical path in kmem_cache_alloc().
+ */
+ if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
+ printk(KERN_WARNING "kmem_grow: Illegal flgs %X (correcting) - %s\n",
+ flags, cachep->c_name);
+ flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
+ }
if (flags & SLAB_NO_GROW)
- return; /* caller doesn't want us to grow */
+ return 0;
- save_flags(save_flags);
/* The test for missing atomic flag is performed here, rather than
* the more obvious place, simply to reduce the critical path length
- * in kmem_cache_alloc(). If a caller is slightly mis-behaving,
- * will eventually be caught here (where it matters)
+ * in kmem_cache_alloc(). If a caller is slightly mis-behaving they
+ * will eventually be caught here (where it matters).
*/
if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
- static int count = 0;
- if (count < 8) {
- printk(KERN_ERR "kmem_grow: Called nonatomically from "
- "int - %s\n", cachep->c_name);
- count++;
- }
+ printk(KERN_ERR "kmem_grow: Called nonatomically from int - %s\n",
+ cachep->c_name);
flags &= ~SLAB_LEVEL_MASK;
flags |= SLAB_ATOMIC;
}
- local_flags = (flags & SLAB_LEVEL_MASK);
ctor_flags = SLAB_CTOR_CONSTRUCTOR;
- if ((flags & SLAB_LEVEL_MASK) == SLAB_ATOMIC) {
- /* Not allowed to sleep.
- * Need to tell a constructor about this - it
- * might need to know....
+ local_flags = (flags & SLAB_LEVEL_MASK);
+ if (local_flags == SLAB_ATOMIC) {
+ /* Not allowed to sleep. Need to tell a constructor about
+ * this - it might need to know...
*/
ctor_flags |= SLAB_CTOR_ATOMIC;
}
- slabp = NULL;
- /* get mem for the objs */
- if (!(objp = kmem_getpages(cachep, flags, &dma)))
- goto opps1;
+ /* About to mess with non-constant members - lock. */
+ spin_lock_irqsave(&cachep->c_spinlock, save_flags);
- /* get colour for the slab, and cal the next value */
- cli();
- if (!(offset = --(cachep->c_colour_next)))
+ /* Get colour for the slab, and cal the next value. */
+ if (!(offset = cachep->c_colour_next--))
cachep->c_colour_next = cachep->c_colour;
- restore_flags(save_flags);
offset *= cachep->c_align;
+ cachep->c_dflags = SLAB_CFLGS_GROWN;
+
+ cachep->c_growing++;
+re_try:
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+
+ /* A series of memory allocations for a new slab.
+ * Neither the cache-chain semaphore, or cache-lock, are
+ * held, but the incrementing c_growing prevents this
+ * this cache from being reaped or shrunk.
+ * Note: The cache could be selected in for reaping in
+ * kmem_cache_reap(), but when the final test is made the
+ * growing value will be seen.
+ */
+
+ /* Get mem for the objs. */
+ if (!(objp = kmem_getpages(cachep, flags, &dma)))
+ goto failed;
- /* get slab mgmt */
- if (!(slabp = kmem_cache_slabmgmt(cachep, objp, local_flags, offset)))
- goto opps2;
+ /* Get slab mgmt. */
+ if (!(slabp = kmem_cache_slabmgmt(cachep, objp+offset, local_flags)))
+ goto opps1;
if (dma)
- slabp->s_flags = SLAB_SFLGS_DMA;
-
+ slabp->s_dma = 1;
+ if (SLAB_BUFCTL(cachep->c_flags)) {
+ slabp->s_index = kmem_cache_alloc(cachep->c_index_cachep, local_flags);
+ if (!slabp->s_index)
+ goto opps2;
+ }
+
+ /* Nasty!!!!!! I hope this is OK. */
+ dma = 1 << cachep->c_gfporder;
+ page = &mem_map[MAP_NR(objp)];
+ do {
+ SLAB_SET_PAGE_CACHE(page, cachep);
+ SLAB_SET_PAGE_SLAB(page, slabp);
+ PageSetSlab(page);
+ page++;
+ } while (--dma);
+
+ slabp->s_offset = offset; /* It will fit... */
+ objp += offset; /* Address of first object. */
slabp->s_mem = objp;
- objp += offset; /* address of first object */
/* For on-slab bufctls, c_offset is the distance between the start of
* an obj and its related bufctl. For off-slab bufctls, c_offset is
* the distance between objs in the slab.
- * Reason for bufctl at end of obj (when on slab), as opposed to the front;
- * if stored within the obj (has no state), and the obj is 'used' after being
- * freed then (normally) most activity occurs at the beginning of the obj.
- * By keeping the bufctl ptr away from the front, should reduce the chance of
- * corruption. Also, allows easier alignment of objs onto cache lines when
- * bufctl is not stored with the objs.
- * Downsize; if, while an obj is active, a write is made past its end, then the
- * bufctl will be corrupted :(
*/
- if (kmem_cache_init_objs(cachep, slabp, objp, local_flags, ctor_flags))
- goto no_objs;
+ kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
+
+ spin_lock_irq(&cachep->c_spinlock);
- cli();
- /* make slab active */
+ /* Make slab active. */
slabp->s_magic = SLAB_MAGIC_ALLOC;
kmem_slab_link_end(cachep, slabp);
if (cachep->c_freep == kmem_slab_end(cachep))
cachep->c_freep = slabp;
- restore_flags(save_flags);
- return;
-no_objs:
- kmem_freepages(cachep, slabp->s_mem);
+ SLAB_STATS_INC_GROWN(cachep);
+ cachep->c_failures = 0;
+ cachep->c_growing--;
+
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ return 1;
opps2:
- kmem_freepages(cachep, objp);
+ if (SLAB_OFF_SLAB(cachep->c_flags))
+ kmem_cache_free(cache_slabp, slabp);
opps1:
- if (slabp && SLAB_OFF_SLAB(cachep->c_flags))
- kmem_cache_free(&cache_slab, slabp);
- /* printk("kmem_alloc: Out of mem - %s\n", cachep->c_name); */
- return;
+ kmem_freepages(cachep, objp);
+failed:
+ if (local_flags != SLAB_ATOMIC && cachep->c_gfporder) {
+ /* For large order (>0) slabs, we try again.
+ * Needed because the gfp() functions are not good at giving
+ * out contigious pages unless pushed (but do not push too hard).
+ */
+ spin_lock_irq(&cachep->c_spinlock);
+ if (cachep->c_failures++ < 4 && cachep->c_freep == kmem_slab_end(cachep))
+ goto re_try;
+ cachep->c_failures = 1; /* Memory is low, don't try as hard next time. */
+ cachep->c_growing--;
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ }
+ return 0;
+}
+
+static void
+kmem_report_alloc_err(const char *str, kmem_cache_t * cachep)
+{
+ if (cachep)
+ SLAB_STATS_INC_ERR(cachep); /* this is atomic */
+ printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
+ str, cachep ? cachep->c_name : "unknown");
}
-#if defined(SLAB_DEBUG_SUPPORT)
-/* Perform extra freeing checks.
- * Currently, this check is only for caches that use bufctl structures
- * within the slab. Those which use bufctl's from the internal cache
- * have a reasonable check when the address is searched for.
+static void
+kmem_report_free_err(const char *str, void *objp, kmem_cache_t * cachep)
+{
+ if (cachep)
+ SLAB_STATS_INC_ERR(cachep);
+ printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
+ str, objp, cachep ? cachep->c_name : "unknown");
+}
+
+/* Search for a slab whose objs are suitable for DMA.
+ * Note: since testing the first free slab (in __kmem_cache_alloc()),
+ * ints must not have been enabled, or the cache-lock released!
+ */
+static inline kmem_slab_t *
+kmem_cache_search_dma(kmem_cache_t * cachep)
+{
+ kmem_slab_t *slabp = cachep->c_freep->s_nextp;
+
+ for (; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
+ if (!(slabp->s_dma))
+ continue;
+ kmem_slab_unlink(slabp);
+ kmem_slab_link_free(cachep, slabp);
+ cachep->c_freep = slabp;
+ break;
+ }
+ return slabp;
+}
+
+#if SLAB_DEBUG_SUPPORT
+/* Perform extra freeing checks. Currently, this check is only for caches
+ * that use bufctl structures within the slab. Those which use bufctl's
+ * from the internal cache have a reasonable check when the address is
+ * searched for. Called with the cache-lock held.
*/
static void *
-kmem_extra_free_checks(const kmem_cache_t *cachep, kmem_bufctl_t *search_bufp,
- const kmem_bufctl_t *bufp, void * objp)
+kmem_extra_free_checks(kmem_cache_t * cachep, kmem_bufctl_t *search_bufp,
+ kmem_bufctl_t *bufp, void * objp)
{
if (SLAB_BUFCTL(cachep->c_flags))
- goto end;
+ return objp;
- /* check slab's freelist to see if this obj is there */
+ /* Check slab's freelist to see if this obj is there. */
for (; search_bufp; search_bufp = search_bufp->buf_nextp) {
if (search_bufp != bufp)
continue;
- printk(KERN_ERR "kmem_free: Double free detected during checking "
- "%p - %s\n", objp, cachep->c_name);
return NULL;
}
-end:
return objp;
}
#endif /* SLAB_DEBUG_SUPPORT */
+/* Called with cache lock held. */
static inline void
kmem_cache_full_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
- if (!slabp->s_nextp->s_inuse)
- return; /* at correct position */
- slabp->s_jiffies = jiffies; /* set release time */
- if (cachep->c_freep == slabp)
- cachep->c_freep = slabp->s_nextp;
- kmem_slab_unlink(slabp);
- kmem_slab_link_end(cachep, slabp);
-
- return;
+ if (slabp->s_nextp->s_inuse) {
+ /* Not at correct position. */
+ if (cachep->c_freep == slabp)
+ cachep->c_freep = slabp->s_nextp;
+ kmem_slab_unlink(slabp);
+ kmem_slab_link_end(cachep, slabp);
+ }
}
+/* Called with cache lock held. */
static inline void
kmem_cache_one_free(kmem_cache_t *cachep, kmem_slab_t *slabp)
{
- if (slabp->s_nextp->s_inuse != cachep->c_num) {
- cachep->c_freep = slabp;
- return;
+ if (slabp->s_nextp->s_inuse == cachep->c_num) {
+ kmem_slab_unlink(slabp);
+ kmem_slab_link_free(cachep, slabp);
}
- kmem_slab_unlink(slabp);
- kmem_slab_link_free(cachep, slabp);
- return;
+ cachep->c_freep = slabp;
}
-/* Returns a ptr to an obj in the given cache.
- * The obj is in the initial state (if there is one)
- */
+/* Returns a ptr to an obj in the given cache. */
static inline void *
-__kmem_cache_alloc(kmem_cache_t *cachep, unsigned long flags)
+__kmem_cache_alloc(kmem_cache_t *cachep, int flags)
{
kmem_slab_t *slabp;
kmem_bufctl_t *bufp;
void *objp;
unsigned long save_flags;
- /* sanity check */
+ /* Sanity check. */
if (!cachep)
goto nul_ptr;
- save_flags(save_flags);
- cli();
- /* get slab alloc is to come from */
+ spin_lock_irqsave(&cachep->c_spinlock, save_flags);
+try_again:
+ /* Get slab alloc is to come from. */
slabp = cachep->c_freep;
- /* magic is a sanity check _and_ says if we need a new slab */
+ /* Magic is a sanity check _and_ says if we need a new slab. */
if (slabp->s_magic != SLAB_MAGIC_ALLOC)
goto alloc_new_slab;
-try_again:
- /* DMA allocations are 'rare' - keep out of critical path */
+ /* DMA requests are 'rare' - keep out of the critical path. */
if (flags & SLAB_DMA)
goto search_dma;
try_again_dma:
+ SLAB_STATS_INC_ALLOCED(cachep);
+ SLAB_STATS_INC_ACTIVE(cachep);
+ SLAB_STATS_SET_HIGH(cachep);
slabp->s_inuse++;
bufp = slabp->s_freep;
slabp->s_freep = bufp->buf_nextp;
- if (!SLAB_BUFCTL(cachep->c_flags)) {
- /* Nasty - we want the 'if' to be taken in the common case */
- if (slabp->s_freep) {
-short_finished:
+ if (slabp->s_freep) {
+ret_obj:
+ if (!slabp->s_index) {
+ bufp->buf_slabp = slabp;
objp = ((void*)bufp) - cachep->c_offset;
- restore_flags(save_flags);
-#if defined(SLAB_DEBUG_SUPPORT)
+finished:
+ /* The lock is not needed by the red-zone or poision ops, and the
+ * obj has been removed from the slab. Should be safe to drop
+ * the lock here.
+ */
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+#if SLAB_DEBUG_SUPPORT
if (cachep->c_flags & SLAB_RED_ZONE)
goto red_zone;
+ret_red:
+ if ((cachep->c_flags & SLAB_POISION) && kmem_check_poision_obj(cachep, objp))
+ kmem_report_alloc_err("Bad poision", cachep);
#endif /* SLAB_DEBUG_SUPPORT */
return objp;
- } else {
- cachep->c_freep = slabp->s_nextp;
- goto short_finished;
}
+ /* Update index ptr. */
+ objp = ((bufp-slabp->s_index)*cachep->c_offset) + slabp->s_mem;
+ bufp->buf_objp = objp;
+ goto finished;
}
+ cachep->c_freep = slabp->s_nextp;
+ goto ret_obj;
- if (!slabp->s_freep)
- cachep->c_freep = slabp->s_nextp;
-
- /* link into hash chain */
- objp = kmem_add_to_hash(cachep, bufp);
- restore_flags(save_flags);
-#if defined(SLAB_DEBUG_SUPPORT)
- if (!(cachep->c_flags & SLAB_RED_ZONE))
-#endif /* SLAB_DEBUG_SUPPORT */
- return objp;
-
-#if defined(SLAB_DEBUG_SUPPORT)
+#if SLAB_DEBUG_SUPPORT
red_zone:
- /* set alloc red-zone, and check old one */
+ /* Set alloc red-zone, and check old one. */
+ if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
+ kmem_report_alloc_err("Bad front redzone", cachep);
+ objp += BYTES_PER_WORD;
if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
- printk(KERN_ERR "kmem_alloc: Bad redzone %p - %s\n",
- objp, cachep->c_name);
- return objp;
+ kmem_report_alloc_err("Bad rear redzone", cachep);
+ goto ret_red;
#endif /* SLAB_DEBUG_SUPPORT */
search_dma:
- if (slabp->s_flags & SLAB_SFLGS_DMA)
- goto try_again_dma;
- /* need to search... */
- if ((slabp = kmem_cache_search_dma(cachep)))
+ if (slabp->s_dma || (slabp = kmem_cache_search_dma(cachep))!=kmem_slab_end(cachep))
goto try_again_dma;
alloc_new_slab:
- /* Either out of slabs, or magic number corruption */
- if (slabp != kmem_slab_end(cachep))
- goto bad_slab;
- /* need a new slab */
- restore_flags(save_flags);
- if (SLAB_RELEASED(cachep->c_flags)) {
- printk(KERN_ERR "kmem_alloc: destroyed cache\n");
- goto end;
- }
-
- /* Be lazy and only check for valid flags
- * here (keeping it out of the critical path above)
- */
- if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
- printk(KERN_ERR "kmem_alloc: Illegal flgs %lX (correcting) - %s\n",
- flags, cachep->c_name);
- flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
+ /* Either out of slabs, or magic number corruption. */
+ if (slabp == kmem_slab_end(cachep)) {
+ /* Need a new slab. Release the lock before calling kmem_cache_grow().
+ * This allows objs to be released back into the cache while growing.
+ */
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ if (kmem_cache_grow(cachep, flags)) {
+ /* Someone may have stolen our objs. Doesn't matter, we'll
+ * just come back here again.
+ */
+ goto try_again;
+ }
+ /* Couldn't grow, but some objs may have been freed. */
+ spin_lock_irq(&cachep->c_spinlock);
+ if (cachep->c_freep != kmem_slab_end(cachep))
+ goto try_again;
+ } else {
+ /* Very serious error - maybe panic() here? */
+ kmem_report_alloc_err("Bad slab magic (corrupt)", cachep);
}
-
- kmem_cache_grow(cachep, flags);
- cli();
- if ((slabp=cachep->c_freep) != kmem_slab_end(cachep))
- goto try_again;
- restore_flags(save_flags);
-end:
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+err_exit:
return NULL;
-bad_slab:
- /* v. serious error - maybe panic() here? */
- printk(KERN_ERR "kmem_alloc: Bad slab magic (corruption) - %s\n",
- cachep->c_name);
- goto end;
nul_ptr:
- printk(KERN_ERR "kmem_alloc: NULL ptr\n");
- goto end;
+ kmem_report_alloc_err("NULL ptr", NULL);
+ goto err_exit;
}
-/* Release an obj back to its cache.
- * If the obj has a constructed state, it should be
- * in this state _before_ it is released.
+/* Release an obj back to its cache. If the obj has a constructed state,
+ * it should be in this state _before_ it is released.
*/
static inline void
__kmem_cache_free(kmem_cache_t *cachep, void *objp)
@@ -1200,128 +1461,137 @@
kmem_bufctl_t *bufp;
unsigned long save_flags;
- /* basic sanity checks */
- if (!cachep)
- goto nul_cache;
- if (!objp)
- goto nul_obj;
+ /* Basic sanity checks. */
+ if (!cachep || !objp)
+ goto null_addr;
- save_flags(save_flags);
-#if defined(SLAB_DEBUG_SUPPORT)
+#if SLAB_DEBUG_SUPPORT
+ if (cachep->c_flags & SLAB_RED_ZONE)
+ objp -= BYTES_PER_WORD;
+#endif /* SLAB_DEBUG_SUPPORT */
+
+
+#if SLAB_DEBUG_SUPPORT
+ /* A verify func is called without the cache-lock held. */
if (cachep->c_flags & SLAB_DEBUG_INITIAL)
goto init_state_check;
finished_initial:
#endif /* SLAB_DEBUG_SUPPORT */
+ spin_lock_irqsave(&cachep->c_spinlock, save_flags);
+
if (SLAB_BUFCTL(cachep->c_flags))
goto bufctl;
-
bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
- /* get slab for the obj */
- if (SLAB_PTR_IN_OBJ(cachep->c_flags)) {
- /* if SLAB_HIGH_PACK is undef, the below is optimised away */
- slabp = (kmem_slab_t *)((((unsigned long)objp)&PAGE_MASK)+PAGE_SIZE);
- slabp--;
- } else
- slabp = (kmem_slab_t *) bufp->buf_slabp;
+ /* Get slab for the object. */
+#if 0
+ /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref for some objects.
+ * Is this worth while? XXX
+ */
+ if (cachep->c_flags & SLAB_HIGH_PACK)
+ slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
+ else
+#endif
+ slabp = bufp->buf_slabp;
- if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* sanity check */
- goto bad_obj;
- cli();
+check_magic:
+ if (slabp->s_magic != SLAB_MAGIC_ALLOC) /* Sanity check. */
+ goto bad_slab;
-#if defined(SLAB_DEBUG_SUPPORT)
- if (cachep->c_flags & (SLAB_DEBUG_FREE|SLAB_RED_ZONE))
+#if SLAB_DEBUG_SUPPORT
+ if (cachep->c_flags & SLAB_DEBUG_FREE)
goto extra_checks;
+passed_extra:
#endif /* SLAB_DEBUG_SUPPORT */
-passed_extra:
- if (!slabp->s_inuse) /* sanity check */
- goto too_many;
- bufp->buf_nextp = slabp->s_freep;
- slabp->s_freep = bufp;
- if (--(slabp->s_inuse)) {
- if (bufp->buf_nextp) {
- restore_flags(save_flags);
- return;
+ if (slabp->s_inuse) { /* Sanity check. */
+ SLAB_STATS_DEC_ACTIVE(cachep);
+ slabp->s_inuse--;
+ bufp->buf_nextp = slabp->s_freep;
+ slabp->s_freep = bufp;
+ if (slabp->s_inuse) {
+ if (bufp->buf_nextp) {
+ /* (hopefully) The most common case. */
+finished:
+#if SLAB_DEBUG_SUPPORT
+ /* Need to poision the obj while holding the lock. */
+ if (cachep->c_flags & SLAB_POISION)
+ kmem_poision_obj(cachep, objp);
+ if (cachep->c_flags & SLAB_RED_ZONE)
+ goto red_zone;
+return_red:
+#endif /* SLAB_DEBUG_SUPPORT */
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ return;
+ }
+ kmem_cache_one_free(cachep, slabp);
+ goto finished;
}
- kmem_cache_one_free(cachep, slabp);
- restore_flags(save_flags);
- return;
+ kmem_cache_full_free(cachep, slabp);
+ goto finished;
}
- kmem_cache_full_free(cachep, slabp);
- restore_flags(save_flags);
+
+ /* Don't add to freelist. */
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ kmem_report_free_err("free with no active objs", objp, cachep);
return;
bufctl:
- /* Off-slab bufctls. Need to search hash for bufctl, and hence the slab.
- * No 'extra' checks are performed for objs stored this way, finding
- * the obj a check enough
+ /* No 'extra' checks are performed for objs stored this way, finding
+ * the obj is check enough.
*/
- cli();
- if ((bufp = kmem_remove_from_hash(cachep, objp))) {
- slabp = (kmem_slab_t *) bufp->buf_slabp;
-#if defined(SLAB_DEBUG_SUPPORT)
- if (cachep->c_flags & SLAB_RED_ZONE)
- goto red_zone;
-#endif /* SLAB_DEBUG_SUPPORT */
- goto passed_extra;
- }
- restore_flags(save_flags);
- printk(KERN_ERR "kmem_free: Either bad obj addr or double free: %p - %s\n",
- objp, cachep->c_name);
+ slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
+ bufp = &slabp->s_index[(objp - slabp->s_mem)/cachep->c_offset];
+ if (bufp->buf_objp == objp)
+ goto check_magic;
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ kmem_report_free_err("Either bad obj addr or double free", objp, cachep);
return;
-#if defined(SLAB_DEBUG_SUPPORT)
-red_zone:
- if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
- /* Either write past end of the object, or a double free */
- printk(KERN_ERR "kmem_free: Bad redzone %p - %s\n",
- objp, cachep->c_name);
- }
- goto passed_extra;
+#if SLAB_DEBUG_SUPPORT
init_state_check:
- /* Need to call the slab's constructor so that
- * the caller can perform a verify of its state (debugging)
+ /* Need to call the slab's constructor so the
+ * caller can perform a verify of its state (debugging).
*/
- cachep->c_ctor(objp, cachep->c_org_size, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
+ cachep->c_ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
goto finished_initial;
extra_checks:
- if ((cachep->c_flags & SLAB_DEBUG_FREE) &&
- (objp != kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp))) {
- restore_flags(save_flags);
+ if (!kmem_extra_free_checks(cachep, slabp->s_freep, bufp, objp)) {
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ kmem_report_free_err("Double free detected during checks", objp, cachep);
return;
}
- if (cachep->c_flags & SLAB_RED_ZONE)
- goto red_zone;
goto passed_extra;
-#endif /* SLAB_DEBUG_SUPPORT */
-bad_obj:
- /* The addr of the slab doesn't contain the correct
- * magic num
+red_zone:
+ /* We hold the cache-lock while checking the red-zone, just incase
+ * some tries to take this obj from us...
*/
- if (slabp->s_magic == SLAB_MAGIC_UNALLOC) {
- /* magic num says this is an unalloc slab */
- printk(KERN_ERR "kmem_free: obj %p from destroyed slab - %s\n",
- objp, cachep->c_name);
- return;
+ if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
+ /* Either write before start of obj, or a double free. */
+ kmem_report_free_err("Bad front redzone", objp, cachep);
}
- printk(KERN_ERR "kmem_free: Bad obj %p - %s\n", objp, cachep->c_name);
- return;
-too_many:
- /* don't add to freelist */
- restore_flags(save_flags);
- printk(KERN_ERR "kmem_free: obj free for slab with no active objs - %s\n",
- cachep->c_name);
- return;
-nul_obj:
- printk(KERN_ERR "kmem_free: NULL obj - %s\n", cachep->c_name);
+ objp += BYTES_PER_WORD;
+ if (xchg((unsigned long *)(objp+cachep->c_org_size), SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
+ /* Either write past end of obj, or a double free. */
+ kmem_report_free_err("Bad rear redzone", objp, cachep);
+ }
+ goto return_red;
+#endif /* SLAB_DEBUG_SUPPORT */
+bad_slab:
+ /* Slab doesn't contain the correct magic num. */
+ if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
+ /* Magic num says this is a destroyed slab. */
+ kmem_report_free_err("free from inactive slab", objp, cachep);
+ } else
+ kmem_report_free_err("Bad obj addr", objp, cachep);
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
return;
-nul_cache:
- printk(KERN_ERR "kmem_free: NULL cache ptr\n");
+null_addr:
+ kmem_report_free_err("NULL ptr", objp, cachep);
return;
}
void *
-kmem_cache_alloc(kmem_cache_t *cachep, unsigned long flags)
+kmem_cache_alloc(kmem_cache_t *cachep, int flags)
{
return __kmem_cache_alloc(cachep, flags);
}
@@ -1333,163 +1603,248 @@
}
void *
-kmem_alloc(unsigned long size, unsigned long flags)
+kmalloc(size_t size, int flags)
{
- cache_sizes_t *cachep = cache_sizes;
+ cache_sizes_t *csizep = cache_sizes;
- for (; cachep->cs_size; cachep++) {
- if (size > cachep->cs_size)
+ for (; csizep->cs_size; csizep++) {
+ if (size > csizep->cs_size)
continue;
- /* should the inline version be used here? */
- return kmem_cache_alloc(cachep->cs_cachep, flags);
+ return __kmem_cache_alloc(csizep->cs_cachep, flags);
}
- printk(KERN_ERR "kmem_alloc: Size (%lu) too large\n", size);
+ printk(KERN_ERR "kmalloc: Size (%lu) too large\n", (unsigned long) size);
return NULL;
}
void
-kmem_free(void *objp, unsigned long size)
+kfree(void *objp)
{
- cache_sizes_t *cachep = cache_sizes;
+ struct page *page;
+ int nr;
- for (; cachep->cs_size; cachep++) {
- if (size > cachep->cs_size)
- continue;
- /* should the inline version be used here? */
- kmem_cache_free(cachep->cs_cachep, objp);
- return;
+ if (!objp)
+ goto null_ptr;
+ nr = MAP_NR(objp);
+ if (nr >= max_mapnr)
+ goto null_ptr;
+
+ /* Assume we own the page structure - hence no locking.
+ * If someone is misbehaving (eg. someone calling us with a bad
+ * address), then access to the page structure can race with the
+ * kmem_slab_destory() code. Need to add a spin_lock to each page
+ * structure, which would be useful in threading the gfp() functions....
+ */
+ page = &mem_map[nr];
+ if (PageSlab(page)) {
+ kmem_cache_t *cachep;
+
+ /* Here, we (again) assume the obj address is good.
+ * If it isn't, and happens to map onto another
+ * general-cache page which has no active objs, then
+ * we race....
+ */
+ cachep = SLAB_GET_PAGE_CACHE(page);
+ if (cachep && (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
+ __kmem_cache_free(cachep, objp);
+ return;
+ }
+ }
+null_ptr:
+ printk(KERN_ERR "kfree: Bad obj %p\n", objp);
+ return;
+}
+
+void
+kfree_s(void *objp, size_t size)
+{
+ struct page *page;
+ int nr;
+
+ if (!objp)
+ goto null_ptr;
+ nr = MAP_NR(objp);
+ if (nr >= max_mapnr)
+ goto null_ptr;
+ /* See comment in kfree() */
+ page = &mem_map[nr];
+ if (PageSlab(page)) {
+ kmem_cache_t *cachep;
+ /* See comment in kfree() */
+ cachep = SLAB_GET_PAGE_CACHE(page);
+ if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
+ if (size <= cachep->c_org_size) { /* XXX better check */
+ __kmem_cache_free(cachep, objp);
+ return;
+ }
+ }
}
- printk(KERN_ERR "kmem_free: Size (%lu) too large - strange\n", size);
+null_ptr:
+ printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
+ return;
}
+kmem_cache_t *
+kmem_find_general_cachep(size_t size)
+{
+ cache_sizes_t *csizep = cache_sizes;
+
+ /* This function could be moved to the header-file, and
+ * made inline so consumers can quickly determine what
+ * cache-ptr they require.
+ */
+ for (; csizep->cs_size; csizep++) {
+ if (size > csizep->cs_size)
+ continue;
+ break;
+ }
+ return csizep->cs_cachep;
+}
/* Called from try_to_free_page().
- * Ideal solution would have a weight for each cache, based on;
- * o num of fully free slabs
- * o if the objs have a constructor/deconstructor
- * o length of time slabs have been fully free (ie. ageing)
* This function _cannot_ be called within a int, but it
* can be interrupted.
*/
int
kmem_cache_reap(int pri, int dma, int wait)
{
- unsigned long dtor_flags = 0;
- unsigned long best_jiffie;
- unsigned long now;
- int count = 8;
- kmem_slab_t *best_slabp = NULL;
- kmem_cache_t *best_cachep = NULL;
kmem_slab_t *slabp;
kmem_cache_t *searchp;
- unsigned long save_flags;
+ kmem_cache_t *best_cachep;
+ unsigned long scan;
+ unsigned long reap_level;
- /* 'pri' maps to the number of caches to examine, not the number of slabs.
- * This avoids only checking the jiffies for slabs in one cache at the
- * expensive spending more cycles
+ if (in_interrupt()) {
+ printk("kmem_cache_reap() called within int!\n");
+ return 0;
+ }
+ scan = 9-pri;
+ reap_level = pri >> 1;
+
+ /* We really need a test semphore op so we can avoid sleeping when
+ * !wait is true.
*/
- pri = (9 - pri);
- if (!wait) /* not allowed to wait */
- dtor_flags = SLAB_DTOR_ATOMIC;
+ down(&cache_chain_sem);
+ best_cachep = NULL;
searchp = clock_searchp;
- save_flags(save_flags);
- now = jiffies;
- best_jiffie = now - (2*HZ); /* 2secs - avoid heavy thrashing */
- while (pri--) {
- kmem_slab_t *local_slabp;
- unsigned long local_jiffie;
- if (searchp == &cache_cache)
+ do {
+ unsigned long full_free;
+ /* It's safe to test this without holding the cache-lock. */
+ if (searchp->c_flags & SLAB_NO_REAP)
goto next;
-
- /* sanity check for corruption */
+ spin_lock_irq(&searchp->c_spinlock);
+ if (searchp->c_growing)
+ goto next_unlock;
+ if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
+ searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
+ goto next_unlock;
+ }
+ /* Sanity check for corruption of static values. */
if (searchp->c_inuse || searchp->c_magic != SLAB_C_MAGIC) {
- printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n",
- searchp->c_name);
+ spin_unlock_irq(&searchp->c_spinlock);
+ printk(KERN_ERR "kmem_reap: Corrupted cache struct for %s\n", searchp->c_name);
goto next;
}
+ full_free = 0;
- local_slabp = NULL;
- local_jiffie = now - (2*HZ);
- cli();
- /* As the fully free slabs, within a cache, have no particular
- * order, we need to test them all. Infact, we only check 'count'
- * slabs.
+ /* Count num of fully free slabs. Hopefully there are not many,
+ * we are holding the cache lock....
*/
slabp = searchp->c_lastp;
- for (;count && slabp != kmem_slab_end(searchp) && !slabp->s_inuse; slabp = slabp->s_prevp, count--) {
- if (slabp->s_jiffies >= local_jiffie)
- continue;
-
- /* weight caches with a con/decon */
- if ((searchp->c_ctor || searchp->c_dtor) && slabp->s_jiffies >= (local_jiffie - (2*HZ)))
- continue;
-
- /* weight caches with high page orders. Avoids stressing the
- * VM sub-system by reducing the frequency requests for a large
- * num of contigious pages
- */
- if (searchp->c_gfporder > 1 && slabp->s_jiffies >= (local_jiffie - (4*HZ)))
- continue;
+ while (!slabp->s_inuse && slabp != kmem_slab_end(searchp)) {
+ slabp = slabp->s_prevp;
+ full_free++;
+ }
+ spin_unlock_irq(&searchp->c_spinlock);
- local_jiffie = slabp->s_jiffies;
- local_slabp = slabp;
- if (!searchp->c_gfporder && (now-local_jiffie) >= (300*HZ)) {
- /* an old, one page slab. Make a quick get away... */
- pri = 0;
+ if (full_free) {
+ if (full_free >= 10) {
+ best_cachep = searchp;
break;
}
- }
- if (local_slabp) {
- if (!count || local_jiffie < best_jiffie) {
- best_slabp = local_slabp;
- best_jiffie = local_jiffie;
+
+ /* Try to avoid slabs with constructors and/or
+ * more than one page per slab (as it can be difficult
+ * to get high orders from gfp()).
+ */
+ if (pri == 6) { /* magic '6' from try_to_free_page() */
+ if (searchp->c_ctor)
+ full_free--;
+ if (full_free && searchp->c_gfporder)
+ full_free--;
+ }
+ if (full_free >= reap_level) {
+ reap_level = full_free;
best_cachep = searchp;
- if (!count)
- break;
}
}
- restore_flags(save_flags);
+ goto next;
+next_unlock:
+ spin_unlock_irq(&searchp->c_spinlock);
next:
searchp = searchp->c_nextp;
- if (searchp == clock_searchp)
- break;
- count = 8; /* # of slabs at which we force a reap */
- }
+ } while (--scan && searchp != clock_searchp);
- /* only move along with we didn't find an over allocated cache */
- if (count)
- clock_searchp = clock_searchp->c_nextp;
+ clock_searchp = searchp;
+ up(&cache_chain_sem);
- if (!best_slabp)
+ if (!best_cachep) {
+ /* couldn't find anthying to reap */
return 0;
+ }
- cli();
- if (best_slabp->s_inuse) {
- /* an object in our selected slab has been
- * allocated. This souldn't happen v. often, so we
- * simply fail - which isn't ideal but will do.
- * NOTE: No test for the case where an obj has been
- * allocated from the slab, and then freed. While
- * this would change our idea of the best slab to
- * reap, it's not worth the re-calculation effort.
+ spin_lock_irq(&best_cachep->c_spinlock);
+ if (!best_cachep->c_growing && !(slabp = best_cachep->c_lastp)->s_inuse && slabp != kmem_slab_end(best_cachep)) {
+ if (slabp == best_cachep->c_freep)
+ best_cachep->c_freep = kmem_slab_end(best_cachep);
+ kmem_slab_unlink(slabp);
+ SLAB_STATS_INC_REAPED(best_cachep);
+
+ /* Safe to drop the lock. The slab is no longer linked to the
+ * cache.
*/
- restore_flags(save_flags);
- return 0;
+ spin_unlock_irq(&best_cachep->c_spinlock);
+ kmem_slab_destroy(best_cachep, slabp);
+ return 1;
}
+ spin_unlock_irq(&best_cachep->c_spinlock);
+ return 0;
+}
- if (best_cachep->c_freep == best_slabp)
- best_cachep->c_freep = best_slabp->s_nextp;
- kmem_slab_unlink(best_slabp);
-
- restore_flags(save_flags);
- kmem_slab_destroy(best_cachep, best_slabp, dtor_flags);
+#if SLAB_SELFTEST
+/* A few v. simple tests */
+static void
+kmem_self_test(void)
+{
+ kmem_cache_t *test_cachep;
- return 1;
+ printk(KERN_INFO "kmem_test() - start\n");
+ test_cachep = kmem_cache_create("test-cachep", 16, 0, SLAB_RED_ZONE|SLAB_POISION, NULL, NULL);
+ if (test_cachep) {
+ char *objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
+ if (objp) {
+ /* Write in front and past end, red-zone test. */
+ *(objp-1) = 1;
+ *(objp+16) = 1;
+ kmem_cache_free(test_cachep, objp);
+
+ /* Mess up poisioning. */
+ *objp = 10;
+ objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
+ kmem_cache_free(test_cachep, objp);
+
+ /* Mess up poisioning (again). */
+ *objp = 10;
+ kmem_cache_shrink(test_cachep);
+ }
+ }
+ printk(KERN_INFO "kmem_test() - finished\n");
}
+#endif /* SLAB_SELFTEST */
+#if defined(CONFIG_PROC_FS)
/* /proc/slabinfo
- * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
+ * cache-name num-active-objs total-objs num-active-slabs total-slabs num-pages-per-slab
*/
int
get_slabinfo(char *buf)
@@ -1497,31 +1852,62 @@
kmem_cache_t *cachep;
kmem_slab_t *slabp;
unsigned long active_objs;
- unsigned long num_slabs, active_slabs;
unsigned long save_flags;
+ unsigned long num_slabs;
+ unsigned long num_objs;
int len=0;
+#if SLAB_STATS
+ unsigned long active_slabs;
+#endif /* SLAB_STATS */
- /* output format version, so at least we can change it without _too_
- * many complaints
+ __save_flags(save_flags);
+
+ /* Output format version, so at least we can change it without _too_
+ * many complaints.
*/
+#if SLAB_STATS
+ len = sprintf(buf, "slabinfo - version: 1.0 (statistics)\n");
+#else
len = sprintf(buf, "slabinfo - version: 1.0\n");
- save_flags(save_flags);
+#endif /* SLAB_STATS */
+ down(&cache_chain_sem);
cachep = &cache_cache;
do {
- active_slabs = num_slabs = active_objs = 0;
- cli();
- for (slabp = cachep->c_firstp;
- slabp != kmem_slab_end(cachep);
- slabp = slabp->s_nextp) {
- num_slabs++;
+#if SLAB_STATS
+ active_slabs = 0;
+#endif /* SLAB_STATS */
+ num_slabs = active_objs = 0;
+ spin_lock_irq(&cachep->c_spinlock);
+ for (slabp = cachep->c_firstp; slabp != kmem_slab_end(cachep); slabp = slabp->s_nextp) {
active_objs += slabp->s_inuse;
+ num_slabs++;
+#if SLAB_STATS
if (slabp->s_inuse)
active_slabs++;
+#endif /* SLAB_STATS */
}
- restore_flags(save_flags);
- len += sprintf(buf+len, "%-20s%lu %lu %lu %lu %d\n", cachep->c_name,
- active_objs, cachep->c_num*num_slabs,
- active_slabs, num_slabs, 1<<cachep->c_gfporder);
+ num_objs = cachep->c_num*num_slabs;
+#if SLAB_STATS
+ {
+ unsigned long errors;
+ unsigned long high = cachep->c_high_mark;
+ unsigned long grown = cachep->c_grown;
+ unsigned long reaped = cachep->c_reaped;
+ unsigned long allocs = cachep->c_num_allocations;
+ errors = (unsigned long) atomic_read(&cachep->c_errors);
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ len += sprintf(buf+len, "%-16s %6lu %6lu %4lu %4lu %4lu %6lu %7lu %5lu %4lu %4lu\n",
+ cachep->c_name, active_objs, num_objs, active_slabs, num_slabs,
+ (1<<cachep->c_gfporder)*num_slabs,
+ high, allocs, grown, reaped, errors);
+ }
+#else
+ spin_unlock_irqrestore(&cachep->c_spinlock, save_flags);
+ len += sprintf(buf+len, "%-17s %6lu %6lu\n", cachep->c_name, active_objs, num_objs);
+#endif /* SLAB_STATS */
} while ((cachep = cachep->c_nextp) != &cache_cache);
+ up(&cache_chain_sem);
+
return len;
}
+#endif /* CONFIG_PROC_FS */
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov