patch-2.3.26 linux/ipc/shm.c
Next file: linux/ipc/util.c
Previous file: linux/include/net/irda/wrapper.h
Back to the patch index
Back to the overall index
- Lines: 578
- Date:
Fri Nov 5 10:01:11 1999
- Orig file:
v2.3.25/linux/ipc/shm.c
- Orig date:
Mon Nov 1 13:56:27 1999
diff -u --recursive --new-file v2.3.25/linux/ipc/shm.c linux/ipc/shm.c
@@ -9,6 +9,8 @@
* BIGMEM support, Andrea Arcangeli <andrea@suse.de>
* SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
* HIGHMEM support, Ingo Molnar <mingo@redhat.com>
+ * avoid vmalloc and make shmmax, shmall, shmmni sysctl'able,
+ * Christoph Rohland <hans-christoph.rohland@sap.com>
*/
#include <linux/config.h>
@@ -25,7 +27,17 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
+#include "util.h"
+
+struct shmid_kernel /* extend struct shmis_ds with private fields */
+{
+ struct shmid_ds u;
+ unsigned long shm_npages; /* size of segment (pages) */
+ pte_t **shm_dir; /* ptr to array of ptrs to frames -> SHMMAX */
+ struct vm_area_struct *attaches; /* descriptors for attaches */
+ int id; /* backreference to id for shm_close */
+};
+
static int findkey (key_t key);
static int newseg (key_t key, int shmflg, size_t size);
static int shm_map (struct vm_area_struct *shmd);
@@ -38,13 +50,15 @@
static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
#endif
+unsigned int shm_prm[3] = {SHMMAX, SHMALL, SHMMNI};
+
static int shm_tot = 0; /* total number of shared memory pages */
static int shm_rss = 0; /* number of shared memory pages that are in memory */
static int shm_swp = 0; /* number of shared memory pages that are in swap */
-static int max_shmid = 0; /* every used id is <= max_shmid */
+static int max_shmid = -1; /* every used id is <= max_shmid */
static DECLARE_WAIT_QUEUE_HEAD(shm_wait); /* calling findkey() may need to wait */
-static struct shmid_kernel *shm_segs[SHMMNI];
-
+static struct shmid_kernel **shm_segs = NULL;
+static unsigned int num_segs = 0;
static unsigned short shm_seq = 0; /* incremented, for recognizing stale ids */
spinlock_t shm_lock = SPIN_LOCK_UNLOCKED;
@@ -56,22 +70,109 @@
void __init shm_init (void)
{
- int id;
-
- for (id = 0; id < SHMMNI; id++)
- shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
- shm_tot = shm_rss = shm_seq = max_shmid = used_segs = 0;
- init_waitqueue_head(&shm_wait);
#ifdef CONFIG_PROC_FS
create_proc_read_entry("sysvipc/shm", 0, 0, sysvipc_shm_read_proc, NULL);
#endif
return;
}
+#define SHM_ENTRY(shp, index) (shp)->shm_dir[(index)/PTRS_PER_PTE][(index)%PTRS_PER_PTE]
+
+static pte_t **shm_alloc(unsigned long pages)
+{
+ unsigned short dir = pages / PTRS_PER_PTE;
+ unsigned short last = pages % PTRS_PER_PTE;
+ pte_t **ret, **ptr;
+
+ ret = kmalloc ((dir+1) * sizeof(pte_t *), GFP_KERNEL);
+ if (!ret)
+ goto out;
+
+ for (ptr = ret; ptr < ret+dir ; ptr++)
+ {
+ *ptr = (pte_t *)__get_free_page (GFP_KERNEL);
+ if (!*ptr)
+ goto free;
+ memset (*ptr, 0, PAGE_SIZE);
+ }
+
+ /* The last one is probably not of PAGE_SIZE: we use kmalloc */
+ if (last) {
+ *ptr = kmalloc (last*sizeof(pte_t), GFP_KERNEL);
+ if (!*ptr)
+ goto free;
+ memset (*ptr, 0, last*sizeof(pte_t));
+ }
+out:
+ return ret;
+
+free:
+ /* The last failed: we decrement first */
+ while (--ptr >= ret)
+ free_page ((unsigned long)*ptr);
+
+ kfree (ret);
+ return NULL;
+}
+
+
+static void shm_free(pte_t** dir, unsigned long pages)
+{
+ pte_t **ptr = dir+pages/PTRS_PER_PTE;
+
+ /* first the last page */
+ if (pages%PTRS_PER_PTE)
+ kfree (*ptr);
+ /* now the whole pages */
+ while (--ptr >= dir)
+ free_page ((unsigned long)*ptr);
+
+ /* Now the indirect block */
+ kfree (dir);
+}
+
+static int shm_expand (unsigned int size)
+{
+ int id;
+ struct shmid_kernel ** new_array;
+
+ spin_unlock(&shm_lock);
+ new_array = kmalloc (size * sizeof(struct shmid_kernel *), GFP_KERNEL);
+ spin_lock(&shm_lock);
+
+ if (!new_array)
+ return -ENOMEM;
+
+ if (size <= num_segs){ /* We check this after kmalloc so
+ nobody changes num_segs afterwards */
+ /*
+ * We never shrink the segment. If we shrink we have to
+ * check for stale handles in newseg
+ */
+ kfree (new_array);
+ return 0;
+ }
+
+ if (num_segs) {
+ memcpy (new_array, shm_segs,
+ size*sizeof(struct shmid_kernel *));
+ kfree (shm_segs);
+ }
+ for (id = num_segs; id < size; id++)
+ new_array[id] = (void *) IPC_UNUSED;
+
+ shm_segs = new_array;
+ num_segs = size;
+ return 0;
+}
+
static int findkey (key_t key)
{
int id;
struct shmid_kernel *shp;
+
+ if (!num_segs)
+ return -1;
for (id = 0; id <= max_shmid; id++) {
if ((shp = shm_segs[id]) == IPC_NOID) {
@@ -89,9 +190,8 @@
__set_current_state(TASK_RUNNING);
remove_wait_queue(&shm_wait, &wait);
}
- if (shp == IPC_UNUSED)
- continue;
- if (key == shp->u.shm_perm.key)
+ if (shp != IPC_UNUSED &&
+ key == shp->u.shm_perm.key)
return id;
}
return -1;
@@ -99,18 +199,31 @@
/*
* allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
+ * This has to be called with the shm_lock held
*/
static int newseg (key_t key, int shmflg, size_t size)
{
struct shmid_kernel *shp;
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
- int id;
+ int id, err;
+ unsigned int shmall, shmmni;
+
+ shmall = shm_prm[1];
+ shmmni = shm_prm[2];
+ if (shmmni > IPCMNI) {
+ printk ("shmmni reset to max of %u\n", IPCMNI);
+ shmmni = shm_prm[2] = IPCMNI;
+ }
+ if (shmmni < used_segs)
+ return -ENOSPC;
+ if ((err = shm_expand (shmmni)))
+ return err;
if (size < SHMMIN)
return -EINVAL;
- if (shm_tot + numpages >= SHMALL)
+ if (shm_tot + numpages >= shmall)
return -ENOSPC;
- for (id = 0; id < SHMMNI; id++)
+ for (id = 0; id < num_segs; id++)
if (shm_segs[id] == IPC_UNUSED) {
shm_segs[id] = (struct shmid_kernel *) IPC_NOID;
goto found;
@@ -126,10 +239,8 @@
wake_up (&shm_wait);
return -ENOMEM;
}
- lock_kernel();
- shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
- unlock_kernel();
- if (!shp->shm_pages) {
+ shp->shm_dir = shm_alloc (numpages);
+ if (!shp->shm_dir) {
kfree(shp);
spin_lock(&shm_lock);
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
@@ -137,8 +248,6 @@
return -ENOMEM;
}
- memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
-
shp->u.shm_perm.key = key;
shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
@@ -150,6 +259,7 @@
shp->u.shm_atime = shp->u.shm_dtime = 0;
shp->u.shm_ctime = CURRENT_TIME;
shp->shm_npages = numpages;
+ shp->id = id;
spin_lock(&shm_lock);
@@ -161,21 +271,22 @@
shm_segs[id] = shp;
used_segs++;
wake_up (&shm_wait);
- return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
+ return (unsigned int) shp->u.shm_perm.seq * IPCMNI + id;
}
-size_t shmmax = SHMMAX;
-
asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
{
struct shmid_kernel *shp;
int err, id = 0;
+ size_t shmmax;
+
+ shmmax = shm_prm[0];
+ if (size > shmmax)
+ return -EINVAL;
down(¤t->mm->mmap_sem);
spin_lock(&shm_lock);
- if (size > shmmax) {
- err = -EINVAL;
- } else if (key == IPC_PRIVATE) {
+ if (key == IPC_PRIVATE) {
err = newseg(key, shmflg, size);
} else if ((id = findkey (key)) == -1) {
if (!(shmflg & IPC_CREAT))
@@ -193,7 +304,7 @@
else if (ipcperms (&shp->u.shm_perm, shmflg))
err = -EACCES;
else
- err = (int) shp->u.shm_perm.seq * SHMMNI + id;
+ err = (int) shp->u.shm_perm.seq * IPCMNI + id;
}
spin_unlock(&shm_lock);
up(¤t->mm->mmap_sem);
@@ -214,18 +325,18 @@
if (shp == IPC_NOID || shp == IPC_UNUSED)
BUG();
shp->u.shm_perm.seq++; /* for shmat */
- shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
+ shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/IPCMNI); /* increment, but avoid overflow */
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
used_segs--;
if (id == max_shmid)
- while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
- if (!shp->shm_pages)
+ while (max_shmid-- > 0 && (shm_segs[max_shmid] == IPC_UNUSED));
+ if (!shp->shm_dir)
BUG();
spin_unlock(&shm_lock);
numpages = shp->shm_npages;
for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
pte_t pte;
- pte = shp->shm_pages[i];
+ pte = SHM_ENTRY (shp,i);
if (pte_none(pte))
continue;
if (pte_present(pte)) {
@@ -238,9 +349,7 @@
swp++;
}
}
- lock_kernel();
- vfree(shp->shm_pages);
- unlock_kernel();
+ shm_free (shp->shm_dir, numpages);
kfree(shp);
spin_lock(&shm_lock);
shm_rss -= rss;
@@ -269,19 +378,20 @@
case IPC_INFO:
{
struct shminfo shminfo;
+ spin_unlock(&shm_lock);
err = -EFAULT;
if (!buf)
goto out;
- shminfo.shmmni = SHMMNI;
- shminfo.shmmax = shmmax;
+
+ shminfo.shmmni = shminfo.shmseg = shm_prm[2];
+ shminfo.shmmax = shm_prm[0];
+ shminfo.shmall = shm_prm[1];
+
shminfo.shmmin = SHMMIN;
- shminfo.shmall = SHMALL;
- shminfo.shmseg = SHMSEG;
- spin_unlock(&shm_lock);
if(copy_to_user (buf, &shminfo, sizeof(struct shminfo)))
goto out_unlocked;
spin_lock(&shm_lock);
- err = max_shmid;
+ err = max_shmid < 0 ? 0 : max_shmid;
goto out;
}
case SHM_INFO:
@@ -298,7 +408,7 @@
if(copy_to_user (buf, &shm_info, sizeof(shm_info)))
goto out_unlocked;
spin_lock(&shm_lock);
- err = max_shmid;
+ err = max_shmid < 0 ? 0 : max_shmid;
goto out;
}
case SHM_STAT:
@@ -310,7 +420,7 @@
goto out;
if (ipcperms (&shp->u.shm_perm, S_IRUGO))
goto out;
- id = (unsigned int) shp->u.shm_perm.seq * SHMMNI + shmid;
+ id = (unsigned int) shp->u.shm_perm.seq * IPCMNI + shmid;
err = -EFAULT;
spin_unlock(&shm_lock);
if(copy_to_user (buf, &shp->u, sizeof(*buf)))
@@ -320,12 +430,13 @@
goto out;
}
- shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
err = -EINVAL;
- if (shp == IPC_UNUSED || shp == IPC_NOID)
+ if ((id = (unsigned int) shmid % IPCMNI) > max_shmid)
+ goto out;
+ if ((shp = shm_segs[id]) == IPC_UNUSED || shp == IPC_NOID)
goto out;
err = -EIDRM;
- if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+ if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
goto out;
ipcp = &shp->u.shm_perm;
@@ -480,7 +591,7 @@
if (shmid < 0)
goto out;
- shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
+ shp = shm_segs[id = (unsigned int) shmid % IPCMNI];
if (shp == IPC_UNUSED || shp == IPC_NOID)
goto out;
@@ -523,7 +634,7 @@
if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
goto out;
err = -EIDRM;
- if (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)
+ if (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)
goto out;
spin_unlock(&shm_lock);
@@ -532,13 +643,13 @@
spin_lock(&shm_lock);
if (!shmd)
goto out;
- if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / SHMMNI)) {
+ if ((shp != shm_segs[id]) || (shp->u.shm_perm.seq != (unsigned int) shmid / IPCMNI)) {
kmem_cache_free(vm_area_cachep, shmd);
err = -EIDRM;
goto out;
}
- shmd->vm_private_data = shm_segs + id;
+ shmd->vm_private_data = shm_segs[id];
shmd->vm_start = addr;
shmd->vm_end = addr + shp->shm_npages * PAGE_SIZE;
shmd->vm_mm = current->mm;
@@ -584,7 +695,7 @@
struct shmid_kernel *shp;
spin_lock(&shm_lock);
- shp = *(struct shmid_kernel **) shmd->vm_private_data;
+ shp = (struct shmid_kernel *) shmd->vm_private_data;
insert_attach(shp,shmd); /* insert shmd into shp->attaches */
shp->u.shm_nattch++;
shp->u.shm_atime = CURRENT_TIME;
@@ -604,14 +715,12 @@
spin_lock(&shm_lock);
/* remove from the list of attaches of the shm segment */
- shp = *(struct shmid_kernel **) shmd->vm_private_data;
+ shp = (struct shmid_kernel *) shmd->vm_private_data;
remove_attach(shp,shmd); /* remove from shp->attaches */
shp->u.shm_lpid = current->pid;
shp->u.shm_dtime = CURRENT_TIME;
- if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST) {
- unsigned int id = (struct shmid_kernel **)shmd->vm_private_data - shm_segs;
- killseg (id);
- }
+ if (--shp->u.shm_nattch <= 0 && shp->u.shm_perm.mode & SHM_DEST)
+ killseg (shp->id);
spin_unlock(&shm_lock);
}
@@ -648,7 +757,7 @@
}
/*
- * page not present ... go through shm_pages
+ * page not present ... go through shm_dir
*/
static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
{
@@ -657,13 +766,13 @@
unsigned int idx;
struct page * page;
- shp = *(struct shmid_kernel **) shmd->vm_private_data;
+ shp = (struct shmid_kernel *) shmd->vm_private_data;
idx = (address - shmd->vm_start) >> PAGE_SHIFT;
idx += shmd->vm_pgoff;
spin_lock(&shm_lock);
again:
- pte = shp->shm_pages[idx];
+ pte = SHM_ENTRY(shp,idx);
if (!pte_present(pte)) {
if (pte_none(pte)) {
spin_unlock(&shm_lock);
@@ -672,7 +781,7 @@
goto oom;
clear_highpage(page);
spin_lock(&shm_lock);
- if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
+ if (pte_val(pte) != pte_val(SHM_ENTRY(shp, idx)))
goto changed;
} else {
swp_entry_t entry = pte_to_swp_entry(pte);
@@ -694,18 +803,18 @@
unlock_kernel();
spin_lock(&shm_lock);
shm_swp--;
- pte = shp->shm_pages[idx];
+ pte = SHM_ENTRY(shp, idx);
if (pte_present(pte))
goto present;
}
shm_rss++;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
- shp->shm_pages[idx] = pte;
+ SHM_ENTRY(shp, idx) = pte;
} else
--current->maj_flt; /* was incremented in do_no_page */
done:
- /* pte_val(pte) == shp->shm_pages[idx] */
+ /* pte_val(pte) == SHM_ENTRY (shp, idx) */
get_page(pte_page(pte));
spin_unlock(&shm_lock);
current->min_flt++;
@@ -719,7 +828,7 @@
free_page_and_swap_cache(page);
goto done;
oom:
- return (struct page *)(-1);
+ return NOPAGE_OOM;
}
/*
@@ -770,7 +879,7 @@
if (idx >= shp->shm_npages)
goto next_id;
- page = shp->shm_pages[idx];
+ page = SHM_ENTRY(shp, idx);
if (!pte_present(page))
goto check_table;
page_map = pte_page(page);
@@ -788,11 +897,11 @@
unlock_kernel();
return 0;
}
- if (page_count(page_map))
+ if (page_count(page_map) != 1)
goto check_table;
if (!(page_map = prepare_highmem_swapout(page_map)))
- goto check_table;
- shp->shm_pages[idx] = swp_entry_to_pte(swap_entry);
+ goto failed;
+ SHM_ENTRY (shp, idx) = swp_entry_to_pte(swap_entry);
swap_successes++;
shm_swp++;
shm_rss--;
@@ -812,12 +921,12 @@
* Free the swap entry and set the new pte for the shm page.
*/
static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
- swp_entry_t entry, struct page *page)
+ swp_entry_t entry, struct page *page)
{
pte_t pte;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
- shp->shm_pages[idx] = pte;
+ SHM_ENTRY(shp, idx) = pte;
get_page(page);
shm_rss++;
@@ -837,16 +946,16 @@
int i, n;
spin_lock(&shm_lock);
- for (i = 0; i < SHMMNI; i++) {
+ for (i = 0; i <= max_shmid; i++) {
struct shmid_kernel *seg = shm_segs[i];
if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
continue;
for (n = 0; n < seg->shm_npages; n++) {
- if (pte_none(seg->shm_pages[n]))
+ if (pte_none(SHM_ENTRY(seg,n)))
continue;
- if (pte_present(seg->shm_pages[n]))
+ if (pte_present(SHM_ENTRY(seg,n)))
continue;
- if (pte_to_swp_entry(seg->shm_pages[n]).val == entry.val) {
+ if (pte_to_swp_entry(SHM_ENTRY(seg,n)).val == entry.val) {
shm_unuse_page(seg, n, entry, page);
return;
}
@@ -865,7 +974,7 @@
len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n");
spin_lock(&shm_lock);
- for(i = 0; i < SHMMNI; i++)
+ for(i = 0; i <= max_shmid; i++)
if(shm_segs[i] != IPC_UNUSED) {
#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
@@ -877,7 +986,7 @@
format = BIG_STRING;
len += sprintf(buffer + len, format,
shm_segs[i]->u.shm_perm.key,
- shm_segs[i]->u.shm_perm.seq * SHMMNI + i,
+ shm_segs[i]->u.shm_perm.seq * IPCMNI + i,
shm_segs[i]->u.shm_perm.mode,
shm_segs[i]->u.shm_segsz,
shm_segs[i]->u.shm_cpid,
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)