diff -urN --exclude-from=/usr/src/dontdiff linux-2.4.17_original/linux/include/linux/sched.h linux-2.4.17_SEMUNDO/linux/include/linux/sched.h --- linux-2.4.17_original/linux/include/linux/sched.h Fri Dec 21 09:42:03 2001 +++ linux-2.4.17_SEMUNDO/linux/include/linux/sched.h Mon Jan 21 19:12:35 2002 @@ -381,7 +381,7 @@ struct tty_struct *tty; /* NULL if no tty */ unsigned int locks; /* How many file locks are being held */ /* ipc stuff */ - struct sem_undo *semundo; + struct sem_undohd *semundohd; struct sem_queue *semsleeping; /* CPU-specific state of this task */ struct thread_struct thread; diff -urN --exclude-from=/usr/src/dontdiff linux-2.4.17_original/linux/include/linux/sem.h linux-2.4.17_SEMUNDO/linux/include/linux/sem.h --- linux-2.4.17_original/linux/include/linux/sem.h Thu Nov 22 11:46:18 2001 +++ linux-2.4.17_SEMUNDO/linux/include/linux/sem.h Mon Jan 21 19:12:35 2002 @@ -121,6 +121,18 @@ short * semadj; /* array of adjustments, one per semaphore */ }; +/* Each PROCESS (i.e. collection of tasks that are running POSIX style threads) + * must share the same semundo list, in order to support POSIX SEMUNDO + * semantics for threads. The sem_undohd controls shared access to this + * list among all the tasks (threads) in that process. + */ +struct sem_undohd { + atomic_t refcnt; + spinlock_t lock; + volatile unsigned long add_count; + struct sem_undo *proc_list; +}; + asmlinkage long sys_semget (key_t key, int nsems, int semflg); asmlinkage long sys_semop (int semid, struct sembuf *sops, unsigned nsops); asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg); diff -urN --exclude-from=/usr/src/dontdiff linux-2.4.17_original/linux/ipc/sem.c linux-2.4.17_SEMUNDO/linux/ipc/sem.c --- linux-2.4.17_original/linux/ipc/sem.c Sun Sep 30 12:26:42 2001 +++ linux-2.4.17_SEMUNDO/linux/ipc/sem.c Mon Jan 21 19:08:05 2002 @@ -788,12 +788,75 @@ } } -static struct sem_undo* freeundos(struct sem_array *sma, struct sem_undo* un) +static inline void lock_semundo(void) +{ + struct sem_undohd *undohd; + + undohd = current->semundohd; + if ((undohd != NULL) && (atomic_read(&undohd->refcnt) != 1)) + spin_lock(&undohd->lock); +} + +/* This code has an interesting interaction with copy_semundo(): + * two tasks could have been sharing the semundohd at the time "first" one + * of those tasks acquires the lock acquired in lock_semundo. If the other + * tasks exits before * "the first one" releases the lock (by calling + * unlock_semundo), then the spin_unlock would NOT be called. This would + * leave the semundohd in a locked state. This would NOT be a problem unless + * the remaining task once again creates a new task that once again shares the + * semundohd. Cleanup up this last case is dealt with in copy_semundo by + * having it reinitialize the spin_lock when it once again creates a second + * task sharing the semundo. + */ +static inline void unlock_semundo(void) +{ + struct sem_undohd *undohd; + + undohd = current->semundohd; + if ((undohd != NULL) && (atomic_read(&undohd->refcnt) != 1)) + spin_unlock(&undohd->lock); +} + + +/* If the task doesn't already have a semundohd, then allocate one + * here. We guarantee there is only one thread using this undo list, + * and current is THE ONE + * + * If this allocation and assignment succeeds, but later + * portions of this code fail, there is no need to free the sem_undohd. + * Just let it stay associated with the task, and it'll be freed later + * at exit time. + * + * This can block, so callers must hold no locks. + */ +static inline int get_undohd(struct sem_undohd **undohdp) +{ + struct sem_undohd *undohd; + int size; + + undohd = current->semundohd; + if (!undohd) { + size = sizeof(struct sem_undohd); + undohd = (struct sem_undohd *) kmalloc(size, GFP_KERNEL); + if (undohd == NULL) + return -ENOMEM; + memset(undohd, 0, size); + /* don't initialize unodhd->lock here. It's done + * in copy_semundo() instead. + */ + atomic_set(&undohd->refcnt, 1); + current->semundohd = undohd; + } + *undohdp = undohd; + return 0; +} + +static struct sem_undo* freeundos(struct sem_undo* un) { struct sem_undo* u; struct sem_undo** up; - for(up = ¤t->semundo;(u=*up);up=&u->proc_next) { + for(up = ¤t->semundohd->proc_list;(u=*up);up=&u->proc_next) { if(un==u) { un=u->proc_next; *up=un; @@ -805,33 +868,87 @@ return un->proc_next; } -/* returns without sem_lock on error! */ +static inline struct sem_undo *find_undo(int semid) +{ + struct sem_undo *un; + + un = NULL; + if (current->semundohd != NULL) { + un = current->semundohd->proc_list; + } + while(un != NULL) { + if(un->semid==semid) + break; + if(un->semid==-1) + un=freeundos(un); + else + un=un->proc_next; + } + return un; +} + +/* returns without sem_lock and semundo list locks on error! */ static int alloc_undo(struct sem_array *sma, struct sem_undo** unp, int semid, int alter) { int size, nsems, error; - struct sem_undo *un; + struct sem_undo *un, *new_un; + struct sem_undohd *unhd; + unsigned long saved_add_count; + nsems = sma->sem_nsems; - size = sizeof(struct sem_undo) + sizeof(short)*nsems; + saved_add_count = 0; + if (current->semundohd != NULL) + saved_add_count = current->semundohd->add_count; sem_unlock(semid); + unlock_semundo(); + error = get_undohd(&unhd); + if (error) + return error; + + size = sizeof(struct sem_undo) + sizeof(short)*nsems; un = (struct sem_undo *) kmalloc(size, GFP_KERNEL); if (!un) return -ENOMEM; memset(un, 0, size); + lock_semundo(); error = sem_revalidate(semid, sma, nsems, alter ? S_IWUGO : S_IRUGO); if(error) { + unlock_semundo(); kfree(un); return error; } - un->semadj = (short *) &un[1]; - un->semid = semid; - un->proc_next = current->semundo; - current->semundo = un; - un->id_next = sma->undo; - sma->undo = un; + + /* alloc_undo has just + * released all locks and reacquired them. + * But, another thread may have + * added the semundo we were looking for + * during that time. + * So, we check for it again. + * only initialize and add the new one + * if we don't discover one. + */ + new_un = NULL; + if (current->semundohd->add_count != saved_add_count) + new_un = find_undo(semid); + + if (new_un != NULL) { + if (sma->undo != new_un) + BUG(); + kfree(un); + un = new_un; + } else { + current->semundohd->add_count++; + un->semadj = (short *) &un[1]; + un->semid = semid; + un->proc_next = unhd->proc_list; + unhd->proc_list = un; + un->id_next = sma->undo; + sma->undo = un; + } *unp = un; return 0; } @@ -846,6 +963,7 @@ int undos = 0, decrease = 0, alter = 0; struct sem_queue queue; + if (nsops < 1 || semid < 0) return -EINVAL; if (nsops > sc_semopm) @@ -859,17 +977,18 @@ error=-EFAULT; goto out_free; } + lock_semundo(); sma = sem_lock(semid); error=-EINVAL; if(sma==NULL) - goto out_free; + goto out_semundo_free; error = -EIDRM; if (sem_checkid(sma,semid)) - goto out_unlock_free; + goto out_unlock_semundo_free; error = -EFBIG; for (sop = sops; sop < sops + nsops; sop++) { if (sop->sem_num >= sma->sem_nsems) - goto out_unlock_free; + goto out_unlock_semundo_free; if (sop->sem_flg & SEM_UNDO) undos++; if (sop->sem_op < 0) @@ -881,24 +1000,18 @@ error = -EACCES; if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) - goto out_unlock_free; + goto out_unlock_semundo_free; if (undos) { /* Make sure we have an undo structure * for this process and this semaphore set. */ - un=current->semundo; - while(un != NULL) { - if(un->semid==semid) - break; - if(un->semid==-1) - un=freeundos(sma,un); - else - un=un->proc_next; - } + + un = find_undo(semid); if (!un) { error = alloc_undo(sma,&un,semid,alter); - if(error) + if (error) goto out_free; + } } else un = NULL; @@ -930,16 +1043,18 @@ queue.sleeper = current; current->state = TASK_INTERRUPTIBLE; sem_unlock(semid); + unlock_semundo(); schedule(); + lock_semundo(); tmp = sem_lock(semid); if(tmp==NULL) { if(queue.prev != NULL) BUG(); current->semsleeping = NULL; error = -EIDRM; - goto out_free; + goto out_semundo_free; } /* * If queue.status == 1 we where woken up and @@ -960,7 +1075,7 @@ break; /* Everything done by update_queue */ current->semsleeping = NULL; - goto out_unlock_free; + goto out_unlock_semundo_free; } } current->semsleeping = NULL; @@ -968,14 +1083,61 @@ update: if (alter) update_queue (sma); -out_unlock_free: +out_unlock_semundo_free: sem_unlock(semid); +out_semundo_free: + unlock_semundo(); out_free: if(sops != fast_sops) kfree(sops); return error; } +/* For now, assume that if ALL clone flags are set, then + * we must be creating a POSIX thread, and we want undo lists + * to be shared among all the threads in that thread group. + * + * See the notes above unlock_semundo() regarding the spin_lock_init() + * in this code. Initialize the undohd->lock here instead of get_undohd() + * because of the reasoning in the note referenced here. + */ +#define CLONE_SEMUNDO (CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND) + +int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) +{ + struct sem_undohd *undohd; + int error; + + if (((clone_flags & CLONE_SEMUNDO) == CLONE_SEMUNDO) || + (clone_flags & CLONE_THREAD)) { + error = get_undohd(&undohd); + if (error) + return error; + if (atomic_read(&undohd->refcnt) == 1) + spin_lock_init(&undohd->lock); + atomic_inc(&undohd->refcnt); + tsk->semundohd = undohd; + } else + tsk->semundohd = NULL; + + return 0; +} + +static inline void __exit_semundo(struct task_struct *tsk) +{ + struct sem_undohd *unhd; + + unhd = tsk->semundohd; + if (!atomic_dec_and_test(&unhd->refcnt)) + kfree(unhd); +} + +void exit_semundo(struct task_struct *tsk) +{ + if (tsk->semundohd != NULL) + __exit_semundo(tsk); +} + /* * add semadj values to semaphores, free undo structures. * undo structures are not freed when semaphore arrays are destroyed @@ -993,6 +1155,7 @@ struct sem_queue *q; struct sem_undo *u, *un = NULL, **up, **unp; struct sem_array *sma; + struct sem_undohd *undohd; int nsems, i; /* If the current process was sleeping for a semaphore, @@ -1012,7 +1175,14 @@ sem_unlock(semid); } - for (up = ¤t->semundo; (u = *up); *up = u->proc_next, kfree(u)) { + undohd = current->semundohd; + if ((undohd == NULL) || (atomic_read(&undohd->refcnt) != 1)) + return; + + /* There's no need to hold the semundo list lock, as current + * is the last task exiting for this undo list. + */ + for (up = &undohd->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { int semid = u->semid; if(semid == -1) continue; @@ -1050,7 +1220,7 @@ next_entry: sem_unlock(semid); } - current->semundo = NULL; + __exit_semundo(current); } #ifdef CONFIG_PROC_FS diff -urN --exclude-from=/usr/src/dontdiff linux-2.4.17_original/linux/ipc/util.c linux-2.4.17_SEMUNDO/linux/ipc/util.c --- linux-2.4.17_original/linux/ipc/util.c Sun Aug 12 17:37:53 2001 +++ linux-2.4.17_SEMUNDO/linux/ipc/util.c Mon Jan 21 19:08:05 2002 @@ -340,6 +340,17 @@ * Dummy functions when SYSV IPC isn't configured */ +int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) +{ + return 0; +} + +void exit_semundo(struct task_struct *tsk) +{ + return; +} + + void sem_exit (void) { return; diff -urN --exclude-from=/usr/src/dontdiff linux-2.4.17_original/linux/kernel/fork.c linux-2.4.17_SEMUNDO/linux/kernel/fork.c --- linux-2.4.17_original/linux/kernel/fork.c Wed Nov 21 10:18:42 2001 +++ linux-2.4.17_SEMUNDO/linux/kernel/fork.c Mon Jan 21 19:08:05 2002 @@ -26,6 +26,9 @@ #include #include +extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk); +extern void exit_semundo(struct task_struct *tsk); + /* The idle threads do not count.. */ int nr_threads; int nr_running; @@ -653,8 +656,10 @@ retval = -ENOMEM; /* copy all the process information */ - if (copy_files(clone_flags, p)) + if (copy_semundo(clone_flags, p)) goto bad_fork_cleanup; + if (copy_files(clone_flags, p)) + goto bad_fork_cleanup_semundo; if (copy_fs(clone_flags, p)) goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) @@ -664,7 +669,6 @@ retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_mm; - p->semundo = NULL; /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ @@ -738,6 +742,8 @@ exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ +bad_fork_cleanup_semundo: + exit_semundo(p); bad_fork_cleanup: put_exec_domain(p->exec_domain); if (p->binfmt && p->binfmt->module)