1 diff -ruN a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
2 --- a/innobase/btr/btr0cur.c 2009-10-22 15:15:05.000000000 +0900
3 +++ b/innobase/btr/btr0cur.c 2009-10-22 15:18:44.000000000 +0900
5 #ifdef UNIV_SEARCH_PERF_STAT
8 - if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
9 + if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
10 && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
12 #ifdef PAGE_CUR_LE_OR_EXTENDS
13 diff -ruN a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
14 --- a/innobase/btr/btr0sea.c 2009-10-22 15:15:05.000000000 +0900
15 +++ b/innobase/btr/btr0sea.c 2009-10-22 15:18:44.000000000 +0900
17 rw_lock_s_lock(&btr_search_latch);
20 - ut_ad(btr_search_latch.writer != RW_LOCK_EX);
21 - ut_ad(btr_search_latch.reader_count > 0);
22 + ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
23 + ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
25 rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
27 diff -ruN a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
28 --- a/innobase/buf/buf0buf.c 2009-10-22 15:15:05.000000000 +0900
29 +++ b/innobase/buf/buf0buf.c 2009-10-22 15:18:44.000000000 +0900
32 if (mode == BUF_GET_NOWAIT) {
33 if (rw_latch == RW_S_LATCH) {
34 - success = rw_lock_s_lock_func_nowait(&(block->lock),
35 + success = rw_lock_s_lock_nowait(&(block->lock),
37 fix_type = MTR_MEMO_PAGE_S_FIX;
40 ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
42 if (rw_latch == RW_S_LATCH) {
43 - success = rw_lock_s_lock_func_nowait(&(block->lock),
44 + success = rw_lock_s_lock_nowait(&(block->lock),
46 fix_type = MTR_MEMO_PAGE_S_FIX;
49 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
51 if (rw_latch == RW_S_LATCH) {
52 - success = rw_lock_s_lock_func_nowait(&(block->lock),
53 + success = rw_lock_s_lock_nowait(&(block->lock),
55 fix_type = MTR_MEMO_PAGE_S_FIX;
57 diff -ruN a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
58 --- a/innobase/include/buf0buf.ic 2009-10-22 15:15:05.000000000 +0900
59 +++ b/innobase/include/buf0buf.ic 2009-10-22 16:12:25.000000000 +0900
61 #ifdef UNIV_SYNC_DEBUG
64 - ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
65 + ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
68 ut_ad(mutex_own(&block->mutex));
69 diff -ruN a/innobase/include/os0sync.h b/innobase/include/os0sync.h
70 --- a/innobase/include/os0sync.h 2009-09-10 04:02:59.000000000 +0900
71 +++ b/innobase/include/os0sync.h 2009-10-22 15:18:44.000000000 +0900
73 +/*****************************************************************************
75 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
76 +Copyright (c) 2008, Google Inc.
78 +Portions of this file contain modifications contributed and copyrighted by
79 +Google, Inc. Those modifications are gratefully acknowledged and are described
80 +briefly in the InnoDB documentation. The contributions by Google are
81 +incorporated with their permission, and subject to the conditions contained in
82 +the file COPYING.Google.
84 +This program is free software; you can redistribute it and/or modify it under
85 +the terms of the GNU General Public License as published by the Free Software
86 +Foundation; version 2 of the License.
88 +This program is distributed in the hope that it will be useful, but WITHOUT
89 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
90 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
92 +You should have received a copy of the GNU General Public License along with
93 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
94 +Place, Suite 330, Boston, MA 02111-1307 USA
96 +*****************************************************************************/
98 /******************************************************
99 The interface to the operating system
100 synchronization primitives.
102 -(c) 1995 Innobase Oy
104 Created 9/6/1995 Heikki Tuuri
105 *******************************************************/
112 os_fast_mutex_t* fast_mutex); /* in: mutex to free */
114 +#ifdef HAVE_ATOMIC_BUILTINS
115 +/**************************************************************
116 +Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins.
117 +Returns true if swapped, ptr is pointer to target, old_val is value to
118 +compare to, new_val is the value to swap in. */
119 +#define os_compare_and_swap(ptr, old_val, new_val) \
120 + __sync_bool_compare_and_swap(ptr, old_val, new_val)
122 +/**************************************************************
123 +Atomic increment for InnoDB. Currently requires GCC atomic builtins.
124 +Returns the resulting value, ptr is pointer to target, amount is the
125 +amount of increment. */
126 +#define os_atomic_increment(ptr, amount) \
127 + __sync_add_and_fetch(ptr, amount)
129 +#endif /* HAVE_ATOMIC_BUILTINS */
132 #include "os0sync.ic"
134 diff -ruN a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
135 --- a/innobase/include/sync0rw.h 2009-09-10 04:02:59.000000000 +0900
136 +++ b/innobase/include/sync0rw.h 2009-10-22 15:18:44.000000000 +0900
138 +/*****************************************************************************
140 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
141 +Copyright (c) 2008, Google Inc.
143 +Portions of this file contain modifications contributed and copyrighted by
144 +Google, Inc. Those modifications are gratefully acknowledged and are described
145 +briefly in the InnoDB documentation. The contributions by Google are
146 +incorporated with their permission, and subject to the conditions contained in
147 +the file COPYING.Google.
149 +This program is free software; you can redistribute it and/or modify it under
150 +the terms of the GNU General Public License as published by the Free Software
151 +Foundation; version 2 of the License.
153 +This program is distributed in the hope that it will be useful, but WITHOUT
154 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
155 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
157 +You should have received a copy of the GNU General Public License along with
158 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
159 +Place, Suite 330, Boston, MA 02111-1307 USA
161 +*****************************************************************************/
163 /******************************************************
164 The read-write lock (for threads, not for database transactions)
166 -(c) 1995 Innobase Oy
168 Created 9/11/1995 Heikki Tuuri
169 *******************************************************/
173 #define RW_NO_LATCH 3
175 +/* We decrement lock_word by this amount for each x_lock. It is also the
176 +start value for the lock_word, meaning that it limits the maximum number
177 +of concurrent read locks before the rw_lock breaks. The current value of
178 +0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
179 +#define X_LOCK_DECR 0x00100000
181 typedef struct rw_lock_struct rw_lock_t;
182 #ifdef UNIV_SYNC_DEBUG
183 typedef struct rw_lock_debug_struct rw_lock_debug_t;
185 there may be waiters for the event */
186 #endif /* UNIV_SYNC_DEBUG */
188 -extern ulint rw_s_system_call_count;
189 -extern ulint rw_s_spin_wait_count;
190 -extern ulint rw_s_exit_count;
191 -extern ulint rw_s_os_wait_count;
192 -extern ulint rw_x_system_call_count;
193 -extern ulint rw_x_spin_wait_count;
194 -extern ulint rw_x_os_wait_count;
195 -extern ulint rw_x_exit_count;
196 +extern ib_longlong rw_s_spin_wait_count;
197 +extern ib_longlong rw_s_spin_round_count;
198 +extern ib_longlong rw_s_exit_count;
199 +extern ib_longlong rw_s_os_wait_count;
200 +extern ib_longlong rw_x_spin_wait_count;
201 +extern ib_longlong rw_x_spin_round_count;
202 +extern ib_longlong rw_x_os_wait_count;
203 +extern ib_longlong rw_x_exit_count;
205 /**********************************************************************
206 Creates, or rather, initializes an rw-lock object in a specified memory
208 NOTE! The following macros should be used in rw s-locking, not the
209 corresponding function. */
211 -#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\
212 - (M), __FILE__, __LINE__)
213 +#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
215 +/**********************************************************************
216 +Low-level function which tries to lock an rw-lock in s-mode. Performs no
222 + /* out: TRUE if success */
223 + rw_lock_t* lock, /* in: pointer to rw-lock */
224 + ulint pass __attribute__((unused)),
225 + /* in: pass value; != 0, if the lock will be
226 + passed to another thread to unlock */
227 + const char* file_name, /* in: file name where lock requested */
228 + ulint line); /* in: line where requested */
229 /**********************************************************************
230 NOTE! Use the corresponding macro, not directly this function, except if
231 you supply the file name and line number. Lock an rw-lock in shared mode
233 const char* file_name,/* in: file name where lock requested */
234 ulint line); /* in: line where requested */
235 /**********************************************************************
236 -NOTE! Use the corresponding macro, not directly this function, except if
237 -you supply the file name and line number. Lock an rw-lock in shared mode
238 -for the current thread if the lock can be acquired immediately. */
241 -rw_lock_s_lock_func_nowait(
242 -/*=======================*/
243 - /* out: TRUE if success */
244 - rw_lock_t* lock, /* in: pointer to rw-lock */
245 - const char* file_name,/* in: file name where lock requested */
246 - ulint line); /* in: line where requested */
247 -/**********************************************************************
248 NOTE! Use the corresponding macro, not directly this function! Lock an
249 rw-lock in exclusive mode for the current thread if the lock can be
250 obtained immediately. */
252 rw_lock_get_reader_count(
253 /*=====================*/
255 +/**********************************************************************
256 +Decrements lock_word the specified amount if it is greater than 0.
257 +This is used by both s_lock and x_lock operations. */
260 +rw_lock_lock_word_decr(
261 +/*===================*/
262 + /* out: TRUE if decr occurs */
263 + rw_lock_t* lock, /* in: rw-lock */
264 + ulint amount); /* in: amount to decrement */
265 +/**********************************************************************
266 +Increments lock_word the specified amount and returns new value. */
269 +rw_lock_lock_word_incr(
270 +/*===================*/
271 + /* out: TRUE if decr occurs */
273 + ulint amount); /* in: rw-lock */
274 +/**********************************************************************
275 +This function sets the lock->writer_thread and lock->recursive fields.
276 +For platforms where we are using atomic builtins instead of lock->mutex
277 +it sets the lock->writer_thread field using atomics to ensure memory
278 +ordering. Note that it is assumed that the caller of this function
279 +effectively owns the lock i.e.: nobody else is allowed to modify
280 +lock->writer_thread at this point in time.
281 +The protocol is that lock->writer_thread MUST be updated BEFORE the
282 +lock->recursive flag is set. */
285 +rw_lock_set_writer_id_and_recursion_flag(
286 +/*=====================================*/
287 + rw_lock_t* lock, /* in/out: lock to work on */
288 + ibool recursive); /* in: TRUE if recursion
290 #ifdef UNIV_SYNC_DEBUG
291 /**********************************************************************
292 Checks if the thread has locked the rw-lock in the specified mode, with
293 @@ -417,47 +483,33 @@
294 field. Then no new readers are allowed in. */
296 struct rw_lock_struct {
297 + volatile lint lock_word;
298 + /* Holds the state of the lock. */
299 + volatile ulint waiters;/* 1: there are waiters */
300 + volatile ibool recursive;/* Default value FALSE which means the lock
301 + is non-recursive. The value is typically set
302 + to TRUE making normal rw_locks recursive. In
303 + case of asynchronous IO, when a non-zero
304 + value of 'pass' is passed then we keep the
305 + lock non-recursive.
306 + This flag also tells us about the state of
307 + writer_thread field. If this flag is set
308 + then writer_thread MUST contain the thread
309 + id of the current x-holder or wait-x thread.
310 + This flag must be reset in x_unlock
311 + functions before incrementing the lock_word */
312 + volatile os_thread_id_t writer_thread;
313 + /* Thread id of writer thread. Is only
314 + guaranteed to have sane and non-stale
315 + value iff recursive flag is set. */
316 os_event_t event; /* Used by sync0arr.c for thread queueing */
319 - os_event_t wait_ex_event; /* This windows specific event is
320 - used by the thread which has set the
321 - lock state to RW_LOCK_WAIT_EX. The
322 - rw_lock design guarantees that this
323 - thread will be the next one to proceed
324 - once the current the event gets
325 - signalled. See LEMMA 2 in sync0sync.c */
328 - ulint reader_count; /* Number of readers who have locked this
329 - lock in the shared mode */
330 - ulint writer; /* This field is set to RW_LOCK_EX if there
331 - is a writer owning the lock (in exclusive
332 - mode), RW_LOCK_WAIT_EX if a writer is
333 - queueing for the lock, and
334 - RW_LOCK_NOT_LOCKED, otherwise. */
335 - os_thread_id_t writer_thread;
336 - /* Thread id of a possible writer thread */
337 - ulint writer_count; /* Number of times the same thread has
338 - recursively locked the lock in the exclusive
340 + os_event_t wait_ex_event;
341 + /* Event for next-writer to wait on. A thread
342 + must decrement lock_word before waiting. */
343 +#ifndef HAVE_ATOMIC_BUILTINS
344 mutex_t mutex; /* The mutex protecting rw_lock_struct */
345 - ulint pass; /* Default value 0. This is set to some
346 - value != 0 given by the caller of an x-lock
347 - operation, if the x-lock is to be passed to
348 - another thread to unlock (which happens in
349 - asynchronous i/o). */
350 - ulint waiters; /* This ulint is set to 1 if there are
351 - waiters (readers or writers) in the global
352 - wait array, waiting for this rw_lock.
353 - Otherwise, == 0. */
354 - ibool writer_is_wait_ex;
355 - /* This is TRUE if the writer field is
356 - RW_LOCK_WAIT_EX; this field is located far
357 - from the memory update hotspot fields which
358 - are at the start of this struct, thus we can
359 - peek this field without causing much memory
361 +#endif /* HAVE_ATOMIC_BUILTINS */
363 UT_LIST_NODE_T(rw_lock_t) list;
364 /* All allocated rw locks are put into a
366 @@ -465,15 +517,23 @@
367 UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
368 /* In the debug version: pointer to the debug
369 info list of the lock */
370 + ulint level; /* Level in the global latching order. */
371 #endif /* UNIV_SYNC_DEBUG */
372 - ulint level; /* Level in the global latching
373 - order; default SYNC_LEVEL_NONE */
374 + ulint count_os_wait; /* Count of os_waits. May not be accurate */
375 const char* cfile_name;/* File name where lock created */
376 - ulint cline; /* Line where created */
377 + /* last s-lock file/line is not guaranteed to be correct */
378 const char* last_s_file_name;/* File name where last s-locked */
379 const char* last_x_file_name;/* File name where last x-locked */
380 - ulint last_s_line; /* Line number where last time s-locked */
381 - ulint last_x_line; /* Line number where last time x-locked */
382 + ibool writer_is_wait_ex;
383 + /* This is TRUE if the writer field is
384 + RW_LOCK_WAIT_EX; this field is located far
385 + from the memory update hotspot fields which
386 + are at the start of this struct, thus we can
387 + peek this field without causing much memory
389 + unsigned cline:14; /* Line where created */
390 + unsigned last_s_line:14; /* Line number where last time s-locked */
391 + unsigned last_x_line:14; /* Line number where last time x-locked */
395 diff -ruN a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic
396 --- a/innobase/include/sync0rw.ic 2009-09-10 04:02:59.000000000 +0900
397 +++ b/innobase/include/sync0rw.ic 2009-10-22 15:18:44.000000000 +0900
399 +/*****************************************************************************
401 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
402 +Copyright (c) 2008, Google Inc.
404 +Portions of this file contain modifications contributed and copyrighted by
405 +Google, Inc. Those modifications are gratefully acknowledged and are described
406 +briefly in the InnoDB documentation. The contributions by Google are
407 +incorporated with their permission, and subject to the conditions contained in
408 +the file COPYING.Google.
410 +This program is free software; you can redistribute it and/or modify it under
411 +the terms of the GNU General Public License as published by the Free Software
412 +Foundation; version 2 of the License.
414 +This program is distributed in the hope that it will be useful, but WITHOUT
415 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
416 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
418 +You should have received a copy of the GNU General Public License along with
419 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
420 +Place, Suite 330, Boston, MA 02111-1307 USA
422 +*****************************************************************************/
424 /******************************************************
425 The read-write lock (for threads)
427 -(c) 1995 Innobase Oy
429 Created 9/11/1995 Heikki Tuuri
430 *******************************************************/
437 + /* out: 1 if waiters, 0 otherwise */
438 + rw_lock_t* lock) /* in: rw-lock */
440 return(lock->waiters);
443 +/************************************************************************
444 +Sets lock->waiters to 1. It is not an error if lock->waiters is already
445 +1. On platforms where ATOMIC builtins are used this function enforces a
449 -rw_lock_set_waiters(
450 -/*================*/
453 +rw_lock_set_waiter_flag(
454 +/*====================*/
455 + rw_lock_t* lock) /* in: rw-lock */
457 - lock->waiters = flag;
458 +#ifdef HAVE_ATOMIC_BUILTINS
459 + os_compare_and_swap(&lock->waiters, 0, 1);
460 +#else /* HAVE_ATOMIC_BUILTINS */
462 +#endif /* HAVE_ATOMIC_BUILTINS */
465 +/************************************************************************
466 +Resets lock->waiters to 0. It is not an error if lock->waiters is already
467 +0. On platforms where ATOMIC builtins are used this function enforces a
475 +rw_lock_reset_waiter_flag(
476 +/*======================*/
477 + rw_lock_t* lock) /* in: rw-lock */
479 - return(lock->writer);
480 +#ifdef HAVE_ATOMIC_BUILTINS
481 + os_compare_and_swap(&lock->waiters, 1, 0);
482 +#else /* HAVE_ATOMIC_BUILTINS */
484 +#endif /* HAVE_ATOMIC_BUILTINS */
487 +/**********************************************************************
488 +Returns the write-status of the lock - this function made more sense
489 +with the old rw_lock implementation. */
500 - lock->writer = flag;
501 + lint lock_word = lock->lock_word;
502 + if(lock_word > 0) {
503 + /* return NOT_LOCKED in s-lock state, like the writer
504 + member of the old lock implementation. */
505 + return(RW_LOCK_NOT_LOCKED);
506 + } else if (((-lock_word) % X_LOCK_DECR) == 0) {
507 + return(RW_LOCK_EX);
509 + ut_ad(lock_word > -X_LOCK_DECR);
510 + return(RW_LOCK_WAIT_EX);
514 +/**********************************************************************
515 +Returns number of readers. */
518 rw_lock_get_reader_count(
519 /*=====================*/
522 - return(lock->reader_count);
526 -rw_lock_set_reader_count(
527 -/*=====================*/
531 - lock->reader_count = count;
532 + lint lock_word = lock->lock_word;
533 + if(lock_word > 0) {
534 + /* s-locked, no x-waiters */
535 + return(X_LOCK_DECR - lock_word);
536 + } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
537 + /* s-locked, with x-waiters */
538 + return((ulint)(-lock_word));
543 +#ifndef HAVE_ATOMIC_BUILTINS
549 return(&(lock->mutex));
553 /**********************************************************************
554 Returns the value of writer_count for the lock. Does not reserve the lock
555 @@ -115,7 +174,126 @@
556 /* out: value of writer_count */
557 rw_lock_t* lock) /* in: rw-lock */
559 - return(lock->writer_count);
560 + lint lock_copy = lock->lock_word;
561 + /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
562 + if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
565 + return(((-lock_copy) / X_LOCK_DECR) + 1);
568 +/**********************************************************************
569 +Two different implementations for decrementing the lock_word of a rw_lock:
570 +one for systems supporting atomic operations, one for others. This does
571 +does not support recusive x-locks: they should be handled by the caller and
572 +need not be atomic since they are performed by the current lock holder.
573 +Returns true if the decrement was made, false if not. */
576 +rw_lock_lock_word_decr(
577 +/*===================*/
578 + /* out: TRUE if decr occurs */
579 + rw_lock_t* lock, /* in: rw-lock */
580 + ulint amount) /* in: amount of decrement */
583 +#ifdef HAVE_ATOMIC_BUILTINS
585 + lint local_lock_word = lock->lock_word;
586 + while (local_lock_word > 0) {
587 + if(os_compare_and_swap(&(lock->lock_word),
589 + local_lock_word - amount)) {
592 + local_lock_word = lock->lock_word;
596 +#else /* HAVE_ATOMIC_BUILTINS */
598 + ibool success = FALSE;
599 + mutex_enter(&(lock->mutex));
600 + if(lock->lock_word > 0) {
601 + lock->lock_word -= amount;
604 + mutex_exit(&(lock->mutex));
607 +#endif /* HAVE_ATOMIC_BUILTINS */
610 +/**********************************************************************
611 +Two different implementations for incrementing the lock_word of a rw_lock:
612 +one for systems supporting atomic operations, one for others.
613 +Returns the value of lock_word after increment. */
616 +rw_lock_lock_word_incr(
617 +/*===================*/
618 + /* out: lock->lock_word after increment */
619 + rw_lock_t* lock, /* in: rw-lock */
620 + ulint amount) /* in: amount of increment */
623 +#ifdef HAVE_ATOMIC_BUILTINS
625 + return(os_atomic_increment(&(lock->lock_word), amount));
627 +#else /* HAVE_ATOMIC_BUILTINS */
629 + lint local_lock_word;
631 + mutex_enter(&(lock->mutex));
633 + lock->lock_word += amount;
634 + local_lock_word = lock->lock_word;
636 + mutex_exit(&(lock->mutex));
638 + return(local_lock_word);
640 +#endif /* HAVE_ATOMIC_BUILTINS */
643 +/**********************************************************************
644 +This function sets the lock->writer_thread and lock->recursive fields.
645 +For platforms where we are using atomic builtins instead of lock->mutex
646 +it sets the lock->writer_thread field using atomics to ensure memory
647 +ordering. Note that it is assumed that the caller of this function
648 +effectively owns the lock i.e.: nobody else is allowed to modify
649 +lock->writer_thread at this point in time.
650 +The protocol is that lock->writer_thread MUST be updated BEFORE the
651 +lock->recursive flag is set. */
654 +rw_lock_set_writer_id_and_recursion_flag(
655 +/*=====================================*/
656 + rw_lock_t* lock, /* in/out: lock to work on */
657 + ibool recursive) /* in: TRUE if recursion
660 + os_thread_id_t curr_thread = os_thread_get_curr_id();
662 +#ifdef HAVE_ATOMIC_BUILTINS
663 + os_thread_id_t local_thread;
666 + local_thread = lock->writer_thread;
667 + success = os_compare_and_swap(&lock->writer_thread,
668 + local_thread, curr_thread);
670 + lock->recursive = recursive;
672 +#else /* HAVE_ATOMIC_BUILTINS */
674 + mutex_enter(&lock->mutex);
675 + lock->writer_thread = curr_thread;
676 + lock->recursive = recursive;
677 + mutex_exit(&lock->mutex);
679 +#endif /* HAVE_ATOMIC_BUILTINS */
682 /**********************************************************************
683 @@ -133,26 +311,21 @@
684 const char* file_name, /* in: file name where lock requested */
685 ulint line) /* in: line where requested */
687 -#ifdef UNIV_SYNC_DEBUG
688 - ut_ad(mutex_own(rw_lock_get_mutex(lock)));
689 -#endif /* UNIV_SYNC_DEBUG */
690 - /* Check if the writer field is free */
692 - if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
693 - /* Set the shared lock by incrementing the reader count */
694 - lock->reader_count++;
695 + /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
696 + if (!rw_lock_lock_word_decr(lock, 1)) {
697 + /* Locking did not succeed */
701 #ifdef UNIV_SYNC_DEBUG
702 - rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
704 + rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
706 - lock->last_s_file_name = file_name;
707 - lock->last_s_line = line;
709 - return(TRUE); /* locking succeeded */
711 + /* These debugging values are not set safely: they may be incorrect
712 + or even refer to a line that is invalid for the file name. */
713 + lock->last_s_file_name = file_name;
714 + lock->last_s_line = line;
716 - return(FALSE); /* locking did not succeed */
717 + return(TRUE); /* locking succeeded */
720 /**********************************************************************
721 @@ -167,11 +340,10 @@
722 const char* file_name, /* in: file name where requested */
723 ulint line) /* in: line where lock requested */
725 - ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
726 - ut_ad(rw_lock_get_reader_count(lock) == 0);
727 + ut_ad(lock->lock_word == X_LOCK_DECR);
729 - /* Set the shared lock by incrementing the reader count */
730 - lock->reader_count++;
731 + /* Indicate there is a new reader by decrementing lock_word */
734 lock->last_s_file_name = file_name;
735 lock->last_s_line = line;
736 @@ -194,13 +366,11 @@
737 ulint line) /* in: line where lock requested */
739 ut_ad(rw_lock_validate(lock));
740 - ut_ad(rw_lock_get_reader_count(lock) == 0);
741 - ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
742 + ut_ad(lock->lock_word == X_LOCK_DECR);
744 - rw_lock_set_writer(lock, RW_LOCK_EX);
745 + lock->lock_word -= X_LOCK_DECR;
746 lock->writer_thread = os_thread_get_curr_id();
747 - lock->writer_count++;
749 + lock->recursive = TRUE;
751 lock->last_x_file_name = file_name;
752 lock->last_x_line = line;
753 @@ -241,15 +411,12 @@
754 ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
755 #endif /* UNIV_SYNC_DEBUG */
757 - mutex_enter(rw_lock_get_mutex(lock));
759 - if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
760 - mutex_exit(rw_lock_get_mutex(lock));
761 + /* TODO: study performance of UNIV_LIKELY branch prediction hints. */
762 + if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
764 return; /* Success */
766 /* Did not succeed, try spin wait */
767 - mutex_exit(rw_lock_get_mutex(lock));
769 rw_lock_s_lock_spin(lock, pass, file_name, line);
771 @@ -259,86 +426,60 @@
773 /**********************************************************************
774 NOTE! Use the corresponding macro, not directly this function! Lock an
775 -rw-lock in shared mode for the current thread if the lock can be acquired
777 +rw-lock in exclusive mode for the current thread if the lock can be
778 +obtained immediately. */
781 -rw_lock_s_lock_func_nowait(
782 +rw_lock_x_lock_func_nowait(
783 /*=======================*/
784 /* out: TRUE if success */
785 rw_lock_t* lock, /* in: pointer to rw-lock */
786 const char* file_name,/* in: file name where lock requested */
787 ulint line) /* in: line where requested */
789 - ibool success = FALSE;
791 - mutex_enter(rw_lock_get_mutex(lock));
793 - if (lock->writer == RW_LOCK_NOT_LOCKED) {
794 - /* Set the shared lock by incrementing the reader count */
795 - lock->reader_count++;
796 + os_thread_id_t curr_thread = os_thread_get_curr_id();
798 -#ifdef UNIV_SYNC_DEBUG
799 - rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
804 - lock->last_s_file_name = file_name;
805 - lock->last_s_line = line;
806 +#ifdef HAVE_ATOMIC_BUILTINS
807 + success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
811 + mutex_enter(&(lock->mutex));
812 + if (lock->lock_word == X_LOCK_DECR) {
813 + lock->lock_word = 0;
816 + mutex_exit(&(lock->mutex));
818 - mutex_exit(rw_lock_get_mutex(lock));
824 + rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
826 -/**********************************************************************
827 -NOTE! Use the corresponding macro, not directly this function! Lock an
828 -rw-lock in exclusive mode for the current thread if the lock can be
829 -obtained immediately. */
832 -rw_lock_x_lock_func_nowait(
833 -/*=======================*/
834 - /* out: TRUE if success */
835 - rw_lock_t* lock, /* in: pointer to rw-lock */
836 - const char* file_name,/* in: file name where lock requested */
837 - ulint line) /* in: line where requested */
839 - ibool success = FALSE;
840 - os_thread_id_t curr_thread = os_thread_get_curr_id();
841 - mutex_enter(rw_lock_get_mutex(lock));
842 + } else if (lock->recursive
843 + && os_thread_eq(lock->writer_thread, curr_thread)) {
844 + /* Relock: this lock_word modification is safe since no other
845 + threads can modify (lock, unlock, or reserve) lock_word while
846 + there is an exclusive writer and this is the writer thread. */
847 + lock->lock_word -= X_LOCK_DECR;
849 - if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
850 - } else if (UNIV_LIKELY(rw_lock_get_writer(lock)
851 - == RW_LOCK_NOT_LOCKED)) {
852 - rw_lock_set_writer(lock, RW_LOCK_EX);
853 - lock->writer_thread = curr_thread;
856 - lock->writer_count++;
857 + ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
863 #ifdef UNIV_SYNC_DEBUG
864 - rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
865 + rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
868 - lock->last_x_file_name = file_name;
869 - lock->last_x_line = line;
872 - } else if (rw_lock_get_writer(lock) == RW_LOCK_EX
874 - && os_thread_eq(lock->writer_thread, curr_thread)) {
878 - mutex_exit(rw_lock_get_mutex(lock));
879 + lock->last_x_file_name = file_name;
880 + lock->last_x_line = line;
882 ut_ad(rw_lock_validate(lock));
888 /**********************************************************************
889 @@ -354,39 +495,21 @@
893 - mutex_t* mutex = &(lock->mutex);
896 - /* Acquire the mutex protecting the rw-lock fields */
897 - mutex_enter(mutex);
899 - /* Reset the shared lock by decrementing the reader count */
901 - ut_a(lock->reader_count > 0);
902 - lock->reader_count--;
903 + ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
905 #ifdef UNIV_SYNC_DEBUG
906 rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
909 - /* If there may be waiters and this was the last s-lock,
910 - signal the object */
911 + /* Increment lock_word to indicate 1 less reader */
912 + if (rw_lock_lock_word_incr(lock, 1) == 0) {
914 - if (UNIV_UNLIKELY(lock->waiters)
915 - && lock->reader_count == 0) {
918 - rw_lock_set_waiters(lock, 0);
923 - if (UNIV_UNLIKELY(sg)) {
925 + /* wait_ex waiter exists. It may not be asleep, but we signal
926 + anyway. We do not wake other waiters, because they can't
927 + exist without wait_ex waiter and wait_ex waiter goes first.*/
928 os_event_set(lock->wait_ex_event);
930 - os_event_set(lock->event);
931 sync_array_object_signalled(sync_primary_wait_array);
935 ut_ad(rw_lock_validate(lock));
936 @@ -405,16 +528,15 @@
937 /*====================*/
938 rw_lock_t* lock) /* in: rw-lock */
940 - /* Reset the shared lock by decrementing the reader count */
942 - ut_ad(lock->reader_count > 0);
944 - lock->reader_count--;
945 + ut_ad(lock->lock_word < X_LOCK_DECR);
947 #ifdef UNIV_SYNC_DEBUG
948 rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
951 + /* Decrease reader count by incrementing lock_word */
954 ut_ad(!lock->waiters);
955 ut_ad(rw_lock_validate(lock));
956 #ifdef UNIV_SYNC_PERF_STAT
957 @@ -435,42 +557,32 @@
963 - /* Acquire the mutex protecting the rw-lock fields */
964 - mutex_enter(&(lock->mutex));
966 - /* Reset the exclusive lock if this thread no longer has an x-mode
969 - ut_ad(lock->writer_count > 0);
970 + ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
972 - lock->writer_count--;
974 - if (lock->writer_count == 0) {
975 - rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
976 + /* lock->recursive flag also indicates if lock->writer_thread is
977 + valid or stale. If we are the last of the recursive callers
978 + then we must unset lock->recursive flag to indicate that the
979 + lock->writer_thread is now stale.
980 + Note that since we still hold the x-lock we can safely read the
982 + if (lock->lock_word == 0) {
983 + /* Last caller in a possible recursive chain. */
984 + lock->recursive = FALSE;
987 #ifdef UNIV_SYNC_DEBUG
988 rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
991 - /* If there may be waiters, signal the lock */
992 - if (UNIV_UNLIKELY(lock->waiters)
993 - && lock->writer_count == 0) {
996 - rw_lock_set_waiters(lock, 0);
999 - mutex_exit(&(lock->mutex));
1001 - if (UNIV_UNLIKELY(sg)) {
1003 - os_event_set(lock->wait_ex_event);
1005 - os_event_set(lock->event);
1006 - sync_array_object_signalled(sync_primary_wait_array);
1007 + if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
1008 + /* Lock is now free. May have to signal read/write waiters.
1009 + We do not need to signal wait_ex waiters, since they cannot
1010 + exist when there is a writer. */
1011 + if (lock->waiters) {
1012 + rw_lock_reset_waiter_flag(lock);
1013 + os_event_set(lock->event);
1014 + sync_array_object_signalled(sync_primary_wait_array);
1018 ut_ad(rw_lock_validate(lock));
1019 @@ -492,18 +604,18 @@
1020 /* Reset the exclusive lock if this thread no longer has an x-mode
1023 - ut_ad(lock->writer_count > 0);
1025 - lock->writer_count--;
1027 - if (lock->writer_count == 0) {
1028 - rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
1030 + ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
1032 #ifdef UNIV_SYNC_DEBUG
1033 rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
1036 + if (lock->lock_word == 0) {
1037 + lock->recursive = FALSE;
1040 + lock->lock_word += X_LOCK_DECR;
1042 ut_ad(!lock->waiters);
1043 ut_ad(rw_lock_validate(lock));
1045 diff -ruN a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
1046 --- a/innobase/include/sync0sync.h 2009-10-22 15:15:05.000000000 +0900
1047 +++ b/innobase/include/sync0sync.h 2009-10-22 15:18:44.000000000 +0900
1049 +/*****************************************************************************
1051 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
1052 +Copyright (c) 2008, Google Inc.
1054 +Portions of this file contain modifications contributed and copyrighted by
1055 +Google, Inc. Those modifications are gratefully acknowledged and are described
1056 +briefly in the InnoDB documentation. The contributions by Google are
1057 +incorporated with their permission, and subject to the conditions contained in
1058 +the file COPYING.Google.
1060 +This program is free software; you can redistribute it and/or modify it under
1061 +the terms of the GNU General Public License as published by the Free Software
1062 +Foundation; version 2 of the License.
1064 +This program is distributed in the hope that it will be useful, but WITHOUT
1065 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1066 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
1068 +You should have received a copy of the GNU General Public License along with
1069 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
1070 +Place, Suite 330, Boston, MA 02111-1307 USA
1072 +*****************************************************************************/
1074 /******************************************************
1075 Mutex, the basic synchronization primitive
1077 -(c) 1995 Innobase Oy
1079 Created 9/5/1995 Heikki Tuuri
1080 *******************************************************/
1082 @@ -465,8 +488,11 @@
1083 struct mutex_struct {
1084 os_event_t event; /* Used by sync0arr.c for the wait queue */
1085 ulint lock_word; /* This ulint is the target of the atomic
1086 - test-and-set instruction in Win32 */
1087 -#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
1088 + test-and-set instruction in Win32 and
1089 + x86 32/64 with GCC 4.1.0 or later version */
1090 +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
1091 +#elif defined(HAVE_ATOMIC_BUILTINS)
1094 os_fast_mutex; /* In other systems we use this OS mutex
1095 in place of lock_word */
1097 /* The number of system calls made in this module. Intended for performance
1100 -extern ulint mutex_system_call_count;
1101 -extern ulint mutex_exit_count;
1102 +extern ib_longlong mutex_exit_count;
1104 /* Latching order checks start when this is set TRUE */
1105 extern ibool sync_order_checks_on;
1106 diff -ruN a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
1107 --- a/innobase/include/sync0sync.ic 2009-09-10 04:02:59.000000000 +0900
1108 +++ b/innobase/include/sync0sync.ic 2009-10-22 15:18:44.000000000 +0900
1110 +/*****************************************************************************
1112 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
1113 +Copyright (c) 2008, Google Inc.
1115 +Portions of this file contain modifications contributed and copyrighted by
1116 +Google, Inc. Those modifications are gratefully acknowledged and are described
1117 +briefly in the InnoDB documentation. The contributions by Google are
1118 +incorporated with their permission, and subject to the conditions contained in
1119 +the file COPYING.Google.
1121 +This program is free software; you can redistribute it and/or modify it under
1122 +the terms of the GNU General Public License as published by the Free Software
1123 +Foundation; version 2 of the License.
1125 +This program is distributed in the hope that it will be useful, but WITHOUT
1126 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1127 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
1129 +You should have received a copy of the GNU General Public License along with
1130 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
1131 +Place, Suite 330, Boston, MA 02111-1307 USA
1133 +*****************************************************************************/
1135 /******************************************************
1136 Mutex, the basic synchronization primitive
1138 -(c) 1995 Innobase Oy
1140 Created 9/5/1995 Heikki Tuuri
1141 *******************************************************/
1143 -#if defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
1144 -/* %z0: Use the size of operand %0 which in our case is *m to determine
1145 -instruction size, it should end up as xchgl. "1" in the input constraint,
1146 -says that "in" has to go in the same place as "out".*/
1147 -#define TAS(m, in, out) \
1148 - asm volatile ("xchg%z0 %2, %0" \
1149 - : "=g" (*(m)), "=r" (out) \
1150 - : "1" (in)) /* Note: "1" here refers to "=r" (out) */
1153 /**********************************************************************
1154 Sets the waiters field in a mutex. */
1157 /* mutex_fence(); */
1160 -#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
1163 - TAS(&mutex->lock_word, 1, res);
1166 +#elif defined(HAVE_ATOMIC_BUILTINS)
1167 + return __sync_lock_test_and_set(&(mutex->lock_word), 1);
1171 @@ -136,10 +145,11 @@
1174 __asm XCHG EDX, DWORD PTR [ECX]
1175 -#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
1178 - TAS(&mutex->lock_word, 0, res);
1179 +#elif defined(HAVE_ATOMIC_BUILTINS)
1180 + /* In theory __sync_lock_release should be used to release the lock.
1181 + Unfortunately, it does not work properly alone. The workaround is
1182 + that more conservative __sync_lock_test_and_set is used instead. */
1183 + __sync_lock_test_and_set(&(mutex->lock_word), 0);
1185 mutex->lock_word = 0;
1187 diff -ruN a/innobase/row/row0sel.c b/innobase/row/row0sel.c
1188 --- a/innobase/row/row0sel.c 2009-10-22 15:15:05.000000000 +0900
1189 +++ b/innobase/row/row0sel.c 2009-10-22 15:18:44.000000000 +0900
1190 @@ -1178,7 +1178,7 @@
1191 rw_lock_s_lock(&btr_search_latch);
1193 search_latch_locked = TRUE;
1194 - } else if (btr_search_latch.writer_is_wait_ex) {
1195 + } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
1197 /* There is an x-latch request waiting: release the
1198 s-latch for a moment; as an s-latch here is often
1199 @@ -3123,7 +3123,7 @@
1200 /* PHASE 0: Release a possible s-latch we are holding on the
1201 adaptive hash index latch if there is someone waiting behind */
1203 - if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
1204 + if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
1205 && trx->has_search_latch) {
1207 /* There is an x-latch request on the adaptive hash index:
1208 diff -ruN a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
1209 --- a/innobase/sync/sync0arr.c 2009-09-10 04:03:01.000000000 +0900
1210 +++ b/innobase/sync/sync0arr.c 2009-10-22 15:18:44.000000000 +0900
1212 +/*****************************************************************************
1214 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
1215 +Copyright (c) 2008, Google Inc.
1217 +Portions of this file contain modifications contributed and copyrighted by
1218 +Google, Inc. Those modifications are gratefully acknowledged and are described
1219 +briefly in the InnoDB documentation. The contributions by Google are
1220 +incorporated with their permission, and subject to the conditions contained in
1221 +the file COPYING.Google.
1223 +This program is free software; you can redistribute it and/or modify it under
1224 +the terms of the GNU General Public License as published by the Free Software
1225 +Foundation; version 2 of the License.
1227 +This program is distributed in the hope that it will be useful, but WITHOUT
1228 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1229 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
1231 +You should have received a copy of the GNU General Public License along with
1232 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
1233 +Place, Suite 330, Boston, MA 02111-1307 USA
1235 +*****************************************************************************/
1237 /******************************************************
1238 The wait array used in synchronization primitives
1240 -(c) 1995 Innobase Oy
1242 Created 9/5/1995 Heikki Tuuri
1243 *******************************************************/
1245 @@ -297,25 +320,21 @@
1248 /***********************************************************************
1249 -Puts the cell event in reset state. */
1250 +Returns the event that the thread owning the cell waits for. */
1253 -sync_cell_event_reset(
1254 -/*==================*/
1255 - /* out: value of signal_count
1256 - at the time of reset. */
1257 - ulint type, /* in: lock type mutex/rw_lock */
1258 - void* object) /* in: the rw_lock/mutex object */
1260 +sync_cell_get_event(
1261 +/*================*/
1262 + sync_cell_t* cell) /* in: non-empty sync array cell */
1264 + ulint type = cell->request_type;
1266 if (type == SYNC_MUTEX) {
1267 - return(os_event_reset(((mutex_t *) object)->event));
1269 + return(((mutex_t *) cell->wait_object)->event);
1270 } else if (type == RW_LOCK_WAIT_EX) {
1271 - return(os_event_reset(
1272 - ((rw_lock_t *) object)->wait_ex_event));
1275 - return(os_event_reset(((rw_lock_t *) object)->event));
1276 + return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
1277 + } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
1278 + return(((rw_lock_t *) cell->wait_object)->event);
1283 ulint* index) /* out: index of the reserved cell */
1291 /* Make sure the event is reset and also store
1292 the value of signal_count at which the event
1294 - cell->signal_count = sync_cell_event_reset(type,
1296 + event = sync_cell_get_event(cell);
1297 + cell->signal_count = os_event_reset(event);
1299 cell->reservation_time = time(NULL);
1301 @@ -413,19 +433,7 @@
1302 ut_a(!cell->waiting);
1303 ut_ad(os_thread_get_curr_id() == cell->thread);
1305 - if (cell->request_type == SYNC_MUTEX) {
1306 - event = ((mutex_t*) cell->wait_object)->event;
1308 - /* On windows if the thread about to wait is the one which
1309 - has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
1310 - it waits on a special event i.e.: wait_ex_event. */
1311 - } else if (cell->request_type == RW_LOCK_WAIT_EX) {
1312 - event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
1315 - event = ((rw_lock_t*) cell->wait_object)->event;
1318 + event = sync_cell_get_event(cell);
1319 cell->waiting = TRUE;
1321 #ifdef UNIV_SYNC_DEBUG
1328 type = cell->request_type;
1331 (ulong) mutex->waiters);
1333 } else if (type == RW_LOCK_EX
1335 || type == RW_LOCK_WAIT_EX
1337 || type == RW_LOCK_SHARED) {
1339 fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
1340 @@ -505,21 +512,24 @@
1341 " RW-latch at %p created in file %s line %lu\n",
1342 rwlock, rwlock->cfile_name,
1343 (ulong) rwlock->cline);
1344 - if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
1345 + writer = rw_lock_get_writer(rwlock);
1346 + if (writer != RW_LOCK_NOT_LOCKED) {
1348 "a writer (thread id %lu) has reserved it in mode %s",
1349 (ulong) os_thread_pf(rwlock->writer_thread),
1350 - rwlock->writer == RW_LOCK_EX
1351 + writer == RW_LOCK_EX
1353 : " wait exclusive\n");
1357 - "number of readers %lu, waiters flag %lu\n"
1358 + "number of readers %lu, waiters flag %lu, "
1359 + "lock_word: %lx\n"
1360 "Last time read locked in file %s line %lu\n"
1361 "Last time write locked in file %s line %lu\n",
1362 - (ulong) rwlock->reader_count,
1363 + (ulong) rw_lock_get_reader_count(rwlock),
1364 (ulong) rwlock->waiters,
1365 + rwlock->lock_word,
1366 rwlock->last_s_file_name,
1367 (ulong) rwlock->last_s_line,
1368 rwlock->last_x_file_name,
1369 @@ -773,28 +783,30 @@
1373 - } else if (cell->request_type == RW_LOCK_EX
1374 - || cell->request_type == RW_LOCK_WAIT_EX) {
1375 + } else if (cell->request_type == RW_LOCK_EX) {
1377 lock = cell->wait_object;
1379 - if (rw_lock_get_reader_count(lock) == 0
1380 - && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
1381 + if (lock->lock_word > 0) {
1382 + /* Either unlocked or only read locked. */
1387 - if (rw_lock_get_reader_count(lock) == 0
1388 - && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX
1389 - && os_thread_eq(lock->writer_thread, cell->thread)) {
1390 + } else if (cell->request_type == RW_LOCK_WAIT_EX) {
1392 + lock = cell->wait_object;
1394 + /* lock_word == 0 means all readers have left */
1395 + if (lock->lock_word == 0) {
1400 } else if (cell->request_type == RW_LOCK_SHARED) {
1401 lock = cell->wait_object;
1403 - if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
1404 + /* lock_word > 0 means no writer or reserved writer */
1405 + if (lock->lock_word > 0) {
1409 @@ -839,11 +851,15 @@
1410 /*========================*/
1411 sync_array_t* arr) /* in: wait array */
1413 +#ifdef HAVE_ATOMIC_BUILTINS
1414 + (void) os_atomic_increment(&arr->sg_count, 1);
1416 sync_array_enter(arr);
1420 sync_array_exit(arr);
1424 /**************************************************************************
1431 sync_array_enter(arr);
1433 @@ -868,36 +885,20 @@
1434 while (count < arr->n_reserved) {
1436 cell = sync_array_get_nth_cell(arr, i);
1439 - if (cell->wait_object != NULL) {
1441 + if (cell->wait_object == NULL) {
1446 if (sync_arr_cell_can_wake_up(cell)) {
1448 - if (cell->request_type == SYNC_MUTEX) {
1450 + event = sync_cell_get_event(cell);
1452 - mutex = cell->wait_object;
1453 - os_event_set(mutex->event);
1455 - } else if (cell->request_type
1456 - == RW_LOCK_WAIT_EX) {
1459 - lock = cell->wait_object;
1460 - os_event_set(lock->wait_ex_event);
1465 - lock = cell->wait_object;
1466 - os_event_set(lock->event);
1469 + os_event_set(event);
1475 sync_array_exit(arr);
1476 @@ -1014,4 +1015,3 @@
1478 sync_array_exit(arr);
1481 diff -ruN a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
1482 --- a/innobase/sync/sync0rw.c 2009-09-10 04:03:01.000000000 +0900
1483 +++ b/innobase/sync/sync0rw.c 2009-10-22 15:18:44.000000000 +0900
1485 +/*****************************************************************************
1487 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
1488 +Copyright (c) 2008, Google Inc.
1490 +Portions of this file contain modifications contributed and copyrighted by
1491 +Google, Inc. Those modifications are gratefully acknowledged and are described
1492 +briefly in the InnoDB documentation. The contributions by Google are
1493 +incorporated with their permission, and subject to the conditions contained in
1494 +the file COPYING.Google.
1496 +This program is free software; you can redistribute it and/or modify it under
1497 +the terms of the GNU General Public License as published by the Free Software
1498 +Foundation; version 2 of the License.
1500 +This program is distributed in the hope that it will be useful, but WITHOUT
1501 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1502 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
1504 +You should have received a copy of the GNU General Public License along with
1505 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
1506 +Place, Suite 330, Boston, MA 02111-1307 USA
1508 +*****************************************************************************/
1510 /******************************************************
1511 The read-write lock (for thread synchronization)
1513 -(c) 1995 Innobase Oy
1515 Created 9/11/1995 Heikki Tuuri
1516 *******************************************************/
1518 @@ -15,17 +38,110 @@
1519 #include "mem0mem.h"
1520 #include "srv0srv.h"
1522 -ulint rw_s_system_call_count = 0;
1523 -ulint rw_s_spin_wait_count = 0;
1524 -ulint rw_s_os_wait_count = 0;
1526 + IMPLEMENTATION OF THE RW_LOCK
1527 + =============================
1528 +The status of a rw_lock is held in lock_word. The initial value of lock_word is
1529 +X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
1530 +for each x-lock. This describes the lock state for each value of lock_word:
1532 +lock_word == X_LOCK_DECR: Unlocked.
1533 +0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
1534 + (X_LOCK_DECR - lock_word) is the
1535 + number of readers that hold the lock.
1536 +lock_word == 0: Write locked
1537 +-X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
1538 + (-lock_word) is the number of readers
1539 + that hold the lock.
1540 +lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
1541 + decremented by X_LOCK_DECR once for each lock,
1542 + so the number of locks is:
1543 + ((-lock_word) / X_LOCK_DECR) + 1
1544 +When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
1545 +other values of lock_word are invalid.
1547 +The lock_word is always read and updated atomically and consistently, so that
1548 +it always represents the state of the lock, and the state of the lock changes
1549 +with a single atomic operation. This lock_word holds all of the information
1550 +that a thread needs in order to determine if it is eligible to gain the lock
1551 +or if it must spin or sleep. The one exception to this is that writer_thread
1552 +must be verified before recursive write locks: to solve this scenario, we make
1553 +writer_thread readable by all threads, but only writeable by the x-lock holder.
1555 +The other members of the lock obey the following rules to remain consistent:
1557 +recursive: This and the writer_thread field together control the
1558 + behaviour of recursive x-locking.
1559 + lock->recursive must be FALSE in following states:
1560 + 1) The writer_thread contains garbage i.e.: the
1561 + lock has just been initialized.
1562 + 2) The lock is not x-held and there is no
1563 + x-waiter waiting on WAIT_EX event.
1564 + 3) The lock is x-held or there is an x-waiter
1565 + waiting on WAIT_EX event but the 'pass' value
1567 + lock->recursive is TRUE iff:
1568 + 1) The lock is x-held or there is an x-waiter
1569 + waiting on WAIT_EX event and the 'pass' value
1571 + This flag must be set after the writer_thread field
1572 + has been updated with a memory ordering barrier.
1573 + It is unset before the lock_word has been incremented.
1574 +writer_thread: Is used only in recursive x-locking. Can only be safely
1575 + read iff lock->recursive flag is TRUE.
1576 + This field is uninitialized at lock creation time and
1577 + is updated atomically when x-lock is acquired or when
1578 + move_ownership is called. A thread is only allowed to
1579 + set the value of this field to it's thread_id i.e.: a
1580 + thread cannot set writer_thread to some other thread's
1582 +waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
1583 + signals, it should only be set to 1 when there are threads
1584 + waiting on event. Must be 1 when a writer starts waiting to
1585 + ensure the current x-locking thread sends a wake-up signal
1586 + during unlock. May only be reset to 0 immediately before a
1587 + a wake-up signal is sent to event. On most platforms, a
1588 + memory barrier is required after waiters is set, and before
1589 + verifying lock_word is still held, to ensure some unlocker
1590 + really does see the flags new value.
1591 +event: Threads wait on event for read or writer lock when another
1592 + thread has an x-lock or an x-lock reservation (wait_ex). A
1593 + thread may only wait on event after performing the following
1595 + (1) Record the counter value of event (with os_event_reset).
1596 + (2) Set waiters to 1.
1597 + (3) Verify lock_word <= 0.
1598 + (1) must come before (2) to ensure signal is not missed.
1599 + (2) must come before (3) to ensure a signal is sent.
1600 + These restrictions force the above ordering.
1601 + Immediately before sending the wake-up signal, we should:
1602 + (1) Verify lock_word == X_LOCK_DECR (unlocked)
1603 + (2) Reset waiters to 0.
1604 +wait_ex_event: A thread may only wait on the wait_ex_event after it has
1605 + performed the following actions in order:
1606 + (1) Decrement lock_word by X_LOCK_DECR.
1607 + (2) Record counter value of wait_ex_event (os_event_reset,
1608 + called from sync_array_reserve_cell).
1609 + (3) Verify that lock_word < 0.
1610 + (1) must come first to ensures no other threads become reader
1611 + or next writer, and notifies unlocker that signal must be sent.
1612 + (2) must come before (3) to ensure the signal is not missed.
1613 + These restrictions force the above ordering.
1614 + Immediately before sending the wake-up signal, we should:
1615 + Verify lock_word == 0 (waiting thread holds x_lock)
1618 +ib_longlong rw_s_spin_wait_count = 0;
1619 +ib_longlong rw_s_spin_round_count = 0;
1620 +ib_longlong rw_s_os_wait_count = 0;
1622 +ib_longlong rw_s_exit_count = 0;
1624 +ib_longlong rw_x_spin_wait_count = 0;
1625 +ib_longlong rw_x_spin_round_count = 0;
1626 +ib_longlong rw_x_os_wait_count = 0;
1628 -ulint rw_s_exit_count = 0;
1630 -ulint rw_x_system_call_count = 0;
1631 -ulint rw_x_spin_wait_count = 0;
1632 -ulint rw_x_os_wait_count = 0;
1634 -ulint rw_x_exit_count = 0;
1635 +ib_longlong rw_x_exit_count = 0;
1637 /* The global list of rw-locks */
1638 rw_lock_list_t rw_lock_list;
1639 @@ -99,22 +215,30 @@
1640 object is created, then the following call initializes
1643 +#ifndef HAVE_ATOMIC_BUILTINS
1644 mutex_create(rw_lock_get_mutex(lock));
1645 mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
1647 lock->mutex.cfile_name = cfile_name;
1648 lock->mutex.cline = cline;
1649 -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
1650 +# if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
1651 lock->mutex.cmutex_name = cmutex_name;
1652 lock->mutex.mutex_type = 1;
1653 -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
1654 +# endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
1656 - rw_lock_set_waiters(lock, 0);
1657 - rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
1658 - lock->writer_count = 0;
1659 - rw_lock_set_reader_count(lock, 0);
1661 - lock->writer_is_wait_ex = FALSE;
1662 +#else /* HAVE_ATOMIC_BUILTINS */
1664 + UT_NOT_USED(cmutex_name);
1666 +#endif /* HAVE_ATOMIC_BUILTINS */
1668 + lock->lock_word = X_LOCK_DECR;
1669 + lock->waiters = 0;
1671 + /* We set this value to signify that lock->writer_thread
1672 + contains garbage at initialization and cannot be used for
1673 + recursive x-locking. */
1674 + lock->recursive = FALSE;
1676 #ifdef UNIV_SYNC_DEBUG
1677 UT_LIST_INIT(lock->debug_list);
1678 @@ -126,15 +250,13 @@
1679 lock->cfile_name = cfile_name;
1680 lock->cline = cline;
1682 + lock->count_os_wait = 0;
1683 lock->last_s_file_name = "not yet reserved";
1684 lock->last_x_file_name = "not yet reserved";
1685 lock->last_s_line = 0;
1686 lock->last_x_line = 0;
1687 lock->event = os_event_create(NULL);
1690 lock->wait_ex_event = os_event_create(NULL);
1693 mutex_enter(&rw_lock_list_mutex);
1695 @@ -158,23 +280,17 @@
1697 rw_lock_t* lock) /* in: rw-lock */
1700 ut_a(rw_lock_validate(lock));
1701 -#endif /* UNIV_DEBUG */
1702 - ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
1703 - ut_a(rw_lock_get_waiters(lock) == 0);
1704 - ut_a(rw_lock_get_reader_count(lock) == 0);
1705 + ut_a(lock->lock_word == X_LOCK_DECR);
1707 - lock->magic_n = 0;
1709 +#ifndef HAVE_ATOMIC_BUILTINS
1710 mutex_free(rw_lock_get_mutex(lock));
1711 +#endif /* HAVE_ATOMIC_BUILTINS */
1713 mutex_enter(&rw_lock_list_mutex);
1714 os_event_free(lock->event);
1717 os_event_free(lock->wait_ex_event);
1720 if (UT_LIST_GET_PREV(list, lock)) {
1721 ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
1723 UT_LIST_REMOVE(list, rw_lock_list, lock);
1725 mutex_exit(&rw_lock_list_mutex);
1727 + lock->magic_n = 0;
1730 /**********************************************************************
1731 @@ -199,19 +317,12 @@
1735 - mutex_enter(rw_lock_get_mutex(lock));
1736 + ulint waiters = rw_lock_get_waiters(lock);
1737 + lint lock_word = lock->lock_word;
1739 ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
1740 - ut_a((rw_lock_get_reader_count(lock) == 0)
1741 - || (rw_lock_get_writer(lock) != RW_LOCK_EX));
1742 - ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
1743 - || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
1744 - || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
1745 - ut_a((rw_lock_get_waiters(lock) == 0)
1746 - || (rw_lock_get_waiters(lock) == 1));
1747 - ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
1749 - mutex_exit(rw_lock_get_mutex(lock));
1750 + ut_a(waiters == 0 || waiters == 1);
1751 + ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
1755 @@ -232,18 +343,15 @@
1756 ulint line) /* in: line where requested */
1758 ulint index; /* index of the reserved wait cell */
1759 - ulint i; /* spin round count */
1760 + ulint i = 0; /* spin round count */
1762 ut_ad(rw_lock_validate(lock));
1764 + rw_s_spin_wait_count++; /* Count calls to this function */
1766 - rw_s_spin_wait_count++;
1768 /* Spin waiting for the writer field to become free */
1771 - while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
1772 - && i < SYNC_SPIN_ROUNDS) {
1773 + while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
1774 if (srv_spin_wait_delay) {
1775 ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
1777 @@ -262,28 +370,32 @@
1778 lock->cfile_name, (ulong) lock->cline, (ulong) i);
1781 - mutex_enter(rw_lock_get_mutex(lock));
1783 /* We try once again to obtain the lock */
1785 if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
1786 - mutex_exit(rw_lock_get_mutex(lock));
1787 + rw_s_spin_round_count += i;
1789 return; /* Success */
1791 - /* If we get here, locking did not succeed, we may
1792 - suspend the thread to wait in the wait array */
1794 - rw_s_system_call_count++;
1795 + if (i < SYNC_SPIN_ROUNDS) {
1799 + rw_s_spin_round_count += i;
1801 sync_array_reserve_cell(sync_primary_wait_array,
1802 lock, RW_LOCK_SHARED,
1806 - rw_lock_set_waiters(lock, 1);
1808 - mutex_exit(rw_lock_get_mutex(lock));
1809 + /* Set waiters before checking lock_word to ensure wake-up
1810 + signal is sent. This may lead to some unnecessary signals. */
1811 + rw_lock_set_waiter_flag(lock);
1813 + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
1814 + sync_array_free_cell(sync_primary_wait_array, index);
1815 + return; /* Success */
1818 if (srv_print_latch_waits) {
1820 @@ -292,11 +404,13 @@
1821 lock, lock->cfile_name, (ulong) lock->cline);
1824 - rw_s_system_call_count++;
1825 + /* these stats may not be accurate */
1826 + lock->count_os_wait++;
1827 rw_s_os_wait_count++;
1829 sync_array_wait_event(sync_primary_wait_array, index);
1835 @@ -318,114 +432,130 @@
1837 ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
1839 - mutex_enter(&(lock->mutex));
1841 - lock->writer_thread = os_thread_get_curr_id();
1845 - mutex_exit(&(lock->mutex));
1846 + rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
1849 /**********************************************************************
1850 -Low-level function for acquiring an exclusive lock. */
1851 +Function for the next writer to call. Waits for readers to exit.
1852 +The caller must have already decremented lock_word by X_LOCK_DECR.*/
1855 -rw_lock_x_lock_low(
1856 -/*===============*/
1857 - /* out: RW_LOCK_NOT_LOCKED if did
1858 - not succeed, RW_LOCK_EX if success,
1859 - RW_LOCK_WAIT_EX, if got wait reservation */
1861 +rw_lock_x_lock_wait(
1862 +/*================*/
1863 rw_lock_t* lock, /* in: pointer to rw-lock */
1864 +#ifdef UNIV_SYNC_DEBUG
1865 ulint pass, /* in: pass value; != 0, if the lock will
1866 be passed to another thread to unlock */
1868 const char* file_name,/* in: file name where lock requested */
1869 ulint line) /* in: line where requested */
1871 -#ifdef UNIV_SYNC_DEBUG
1872 - ut_ad(mutex_own(rw_lock_get_mutex(lock)));
1873 -#endif /* UNIV_SYNC_DEBUG */
1874 - if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
1878 - if (rw_lock_get_reader_count(lock) == 0) {
1879 + ut_ad(lock->lock_word <= 0);
1881 + while (lock->lock_word < 0) {
1882 + if (srv_spin_wait_delay) {
1883 + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
1885 + if(i < SYNC_SPIN_ROUNDS) {
1890 - rw_lock_set_writer(lock, RW_LOCK_EX);
1891 - lock->writer_thread = os_thread_get_curr_id();
1892 - lock->writer_count++;
1893 - lock->pass = pass;
1894 + /* If there is still a reader, then go to sleep.*/
1895 + rw_x_spin_round_count += i;
1897 + sync_array_reserve_cell(sync_primary_wait_array,
1902 + /* Check lock_word to ensure wake-up isn't missed.*/
1903 + if(lock->lock_word < 0) {
1905 + /* these stats may not be accurate */
1906 + lock->count_os_wait++;
1907 + rw_x_os_wait_count++;
1909 + /* Add debug info as it is needed to detect possible
1910 + deadlock. We must add info for WAIT_EX thread for
1911 + deadlock detection to work properly. */
1912 #ifdef UNIV_SYNC_DEBUG
1913 - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
1914 + rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
1917 - lock->last_x_file_name = file_name;
1918 - lock->last_x_line = line;
1920 - /* Locking succeeded, we may return */
1921 - return(RW_LOCK_EX);
1923 - /* There are readers, we have to wait */
1924 - rw_lock_set_writer(lock, RW_LOCK_WAIT_EX);
1925 - lock->writer_thread = os_thread_get_curr_id();
1926 - lock->pass = pass;
1927 - lock->writer_is_wait_ex = TRUE;
1929 + sync_array_wait_event(sync_primary_wait_array,
1931 #ifdef UNIV_SYNC_DEBUG
1932 - rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
1934 + rw_lock_remove_debug_info(lock, pass,
1938 - return(RW_LOCK_WAIT_EX);
1939 + /* It is possible to wake when lock_word < 0.
1940 + We must pass the while-loop check to proceed.*/
1942 + sync_array_free_cell(sync_primary_wait_array,
1946 + rw_x_spin_round_count += i;
1949 - } else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
1950 - && os_thread_eq(lock->writer_thread,
1951 - os_thread_get_curr_id())) {
1952 +/**********************************************************************
1953 +Low-level function for acquiring an exclusive lock. */
1956 +rw_lock_x_lock_low(
1957 +/*===============*/
1958 + /* out: RW_LOCK_NOT_LOCKED if did
1959 + not succeed, RW_LOCK_EX if success. */
1960 + rw_lock_t* lock, /* in: pointer to rw-lock */
1961 + ulint pass, /* in: pass value; != 0, if the lock will
1962 + be passed to another thread to unlock */
1963 + const char* file_name,/* in: file name where lock requested */
1964 + ulint line) /* in: line where requested */
1966 + os_thread_id_t curr_thread = os_thread_get_curr_id();
1968 - if (rw_lock_get_reader_count(lock) == 0) {
1969 + if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
1971 - rw_lock_set_writer(lock, RW_LOCK_EX);
1972 - lock->writer_count++;
1973 - lock->pass = pass;
1974 - lock->writer_is_wait_ex = FALSE;
1975 + /* lock->recursive also tells us if the writer_thread
1976 + field is stale or active. As we are going to write
1977 + our own thread id in that field it must be that the
1978 + current writer_thread value is not active. */
1979 + ut_a(!lock->recursive);
1981 + /* Decrement occurred: we are writer or next-writer. */
1982 + rw_lock_set_writer_id_and_recursion_flag(lock,
1983 + pass ? FALSE : TRUE);
1985 + rw_lock_x_lock_wait(lock,
1986 #ifdef UNIV_SYNC_DEBUG
1987 - rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
1988 - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
1994 - lock->last_x_file_name = file_name;
1995 - lock->last_x_line = line;
1997 - /* Locking succeeded, we may return */
1998 - return(RW_LOCK_EX);
2000 + /* Decrement failed: relock or failed lock */
2001 + if (!pass && lock->recursive
2002 + && os_thread_eq(lock->writer_thread, curr_thread)) {
2004 + lock->lock_word -= X_LOCK_DECR;
2006 + /* Another thread locked before us */
2010 - return(RW_LOCK_WAIT_EX);
2012 - } else if ((rw_lock_get_writer(lock) == RW_LOCK_EX)
2013 - && os_thread_eq(lock->writer_thread,
2014 - os_thread_get_curr_id())
2015 - && (lock->pass == 0)
2018 - lock->writer_count++;
2021 #ifdef UNIV_SYNC_DEBUG
2022 - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
2024 + rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
2027 + lock->last_x_file_name = file_name;
2028 + lock->last_x_line = (unsigned int) line;
2030 - lock->last_x_file_name = file_name;
2031 - lock->last_x_line = line;
2033 - /* Locking succeeded, we may return */
2034 - return(RW_LOCK_EX);
2037 - /* Locking did not succeed */
2038 - return(RW_LOCK_NOT_LOCKED);
2042 /**********************************************************************
2043 @@ -448,47 +578,30 @@
2044 ulint line) /* in: line where requested */
2046 ulint index; /* index of the reserved wait cell */
2047 - ulint state; /* lock state acquired */
2048 ulint i; /* spin round count */
2049 + ibool spinning = FALSE;
2051 ut_ad(rw_lock_validate(lock));
2054 - /* Acquire the mutex protecting the rw-lock fields */
2055 - mutex_enter_fast(&(lock->mutex));
2057 - state = rw_lock_x_lock_low(lock, pass, file_name, line);
2060 - mutex_exit(&(lock->mutex));
2063 - if (state == RW_LOCK_EX) {
2064 + if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
2065 + rw_x_spin_round_count += i;
2067 return; /* Locking succeeded */
2069 - } else if (state == RW_LOCK_NOT_LOCKED) {
2071 - /* Spin waiting for the writer field to become free */
2074 - while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
2075 - && i < SYNC_SPIN_ROUNDS) {
2076 - if (srv_spin_wait_delay) {
2077 - ut_delay(ut_rnd_interval(0,
2078 - srv_spin_wait_delay));
2084 - if (i == SYNC_SPIN_ROUNDS) {
2085 - os_thread_yield();
2088 + rw_x_spin_wait_count++;
2090 - } else if (state == RW_LOCK_WAIT_EX) {
2092 - /* Spin waiting for the reader count field to become zero */
2095 - while (rw_lock_get_reader_count(lock) != 0
2096 - && i < SYNC_SPIN_ROUNDS) {
2097 + /* Spin waiting for the lock_word to become free */
2098 + while (i < SYNC_SPIN_ROUNDS
2099 + && lock->lock_word <= 0) {
2100 if (srv_spin_wait_delay) {
2101 ut_delay(ut_rnd_interval(0,
2102 srv_spin_wait_delay));
2103 @@ -498,12 +611,13 @@
2105 if (i == SYNC_SPIN_ROUNDS) {
2111 - i = 0; /* Eliminate a compiler warning */
2115 + rw_x_spin_round_count += i;
2117 if (srv_print_latch_waits) {
2119 "Thread %lu spin wait rw-x-lock at %p cfile %s cline %lu rnds %lu\n",
2120 @@ -511,39 +625,20 @@
2121 lock->cfile_name, (ulong) lock->cline, (ulong) i);
2124 - rw_x_spin_wait_count++;
2126 - /* We try once again to obtain the lock. Acquire the mutex protecting
2127 - the rw-lock fields */
2129 - mutex_enter(rw_lock_get_mutex(lock));
2131 - state = rw_lock_x_lock_low(lock, pass, file_name, line);
2133 - if (state == RW_LOCK_EX) {
2134 - mutex_exit(rw_lock_get_mutex(lock));
2136 - return; /* Locking succeeded */
2139 - rw_x_system_call_count++;
2141 sync_array_reserve_cell(sync_primary_wait_array,
2144 - /* On windows RW_LOCK_WAIT_EX signifies
2145 - that this thread should wait on the
2146 - special wait_ex_event. */
2147 - (state == RW_LOCK_WAIT_EX)
2148 - ? RW_LOCK_WAIT_EX :
2154 - rw_lock_set_waiters(lock, 1);
2156 - mutex_exit(rw_lock_get_mutex(lock));
2157 + /* Waiters must be set before checking lock_word, to ensure signal
2158 + is sent. This could lead to a few unnecessary wake-up signals. */
2159 + rw_lock_set_waiter_flag(lock);
2161 + if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
2162 + sync_array_free_cell(sync_primary_wait_array, index);
2163 + return; /* Locking succeeded */
2166 if (srv_print_latch_waits) {
2168 @@ -552,11 +647,13 @@
2169 lock->cfile_name, (ulong) lock->cline);
2172 - rw_x_system_call_count++;
2173 + /* these stats may not be accurate */
2174 + lock->count_os_wait++;
2175 rw_x_os_wait_count++;
2177 sync_array_wait_event(sync_primary_wait_array, index);
2184 rw_lock_t* lock, /* in: rw-lock */
2185 ulint level) /* in: level */
2187 +#ifdef UNIV_SYNC_DEBUG
2188 lock->level = level;
2189 +#endif /* UNIV_SYNC_DEBUG */
2192 #ifdef UNIV_SYNC_DEBUG
2195 ut_ad(rw_lock_validate(lock));
2197 - mutex_enter(&(lock->mutex));
2198 + rw_lock_debug_mutex_enter();
2200 info = UT_LIST_GET_FIRST(lock->debug_list);
2203 && (info->pass == 0)
2204 && (info->lock_type == lock_type)) {
2206 - mutex_exit(&(lock->mutex));
2207 + rw_lock_debug_mutex_exit();
2213 info = UT_LIST_GET_NEXT(list, info);
2215 - mutex_exit(&(lock->mutex));
2216 + rw_lock_debug_mutex_exit();
2220 @@ -758,22 +857,18 @@
2222 ut_ad(rw_lock_validate(lock));
2224 - mutex_enter(&(lock->mutex));
2226 if (lock_type == RW_LOCK_SHARED) {
2227 - if (lock->reader_count > 0) {
2228 + if (rw_lock_get_reader_count(lock) > 0) {
2231 } else if (lock_type == RW_LOCK_EX) {
2232 - if (lock->writer == RW_LOCK_EX) {
2233 + if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
2240 - mutex_exit(&(lock->mutex));
2245 @@ -801,11 +896,10 @@
2249 +#ifndef HAVE_ATOMIC_BUILTINS
2250 mutex_enter(&(lock->mutex));
2252 - if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
2253 - || (rw_lock_get_reader_count(lock) != 0)
2254 - || (rw_lock_get_waiters(lock) != 0)) {
2256 + if (lock->lock_word != X_LOCK_DECR) {
2258 fprintf(stderr, "RW-LOCK: %p ", lock);
2260 @@ -821,8 +915,10 @@
2261 info = UT_LIST_GET_NEXT(list, info);
2265 +#ifndef HAVE_ATOMIC_BUILTINS
2266 mutex_exit(&(lock->mutex));
2269 lock = UT_LIST_GET_NEXT(list, lock);
2272 @@ -845,9 +941,10 @@
2274 "RW-LATCH: %p ", lock);
2276 - if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
2277 - || (rw_lock_get_reader_count(lock) != 0)
2278 - || (rw_lock_get_waiters(lock) != 0)) {
2279 +#ifndef HAVE_ATOMIC_BUILTINS
2280 + mutex_enter(&(lock->mutex));
2282 + if (lock->lock_word != X_LOCK_DECR) {
2284 if (rw_lock_get_waiters(lock)) {
2285 fputs(" Waiters for the lock exist\n", stderr);
2287 info = UT_LIST_GET_NEXT(list, info);
2290 +#ifndef HAVE_ATOMIC_BUILTINS
2291 + mutex_exit(&(lock->mutex));
2295 /*************************************************************************
2296 @@ -909,14 +1009,11 @@
2297 lock = UT_LIST_GET_FIRST(rw_lock_list);
2299 while (lock != NULL) {
2300 - mutex_enter(rw_lock_get_mutex(lock));
2302 - if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
2303 - || (rw_lock_get_reader_count(lock) != 0)) {
2304 + if (lock->lock_word != X_LOCK_DECR) {
2308 - mutex_exit(rw_lock_get_mutex(lock));
2309 lock = UT_LIST_GET_NEXT(list, lock);
2312 diff -ruN a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
2313 --- a/innobase/sync/sync0sync.c 2009-10-22 15:15:05.000000000 +0900
2314 +++ b/innobase/sync/sync0sync.c 2009-10-22 15:18:44.000000000 +0900
2316 +/*****************************************************************************
2318 +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
2319 +Copyright (c) 2008, Google Inc.
2321 +Portions of this file contain modifications contributed and copyrighted by
2322 +Google, Inc. Those modifications are gratefully acknowledged and are described
2323 +briefly in the InnoDB documentation. The contributions by Google are
2324 +incorporated with their permission, and subject to the conditions contained in
2325 +the file COPYING.Google.
2327 +This program is free software; you can redistribute it and/or modify it under
2328 +the terms of the GNU General Public License as published by the Free Software
2329 +Foundation; version 2 of the License.
2331 +This program is distributed in the hope that it will be useful, but WITHOUT
2332 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2333 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
2335 +You should have received a copy of the GNU General Public License along with
2336 +this program; if not, write to the Free Software Foundation, Inc., 59 Temple
2337 +Place, Suite 330, Boston, MA 02111-1307 USA
2339 +*****************************************************************************/
2341 /******************************************************
2342 Mutex, the basic synchronization primitive
2344 -(c) 1995 Innobase Oy
2346 Created 9/5/1995 Heikki Tuuri
2347 *******************************************************/
2349 @@ -140,17 +163,12 @@
2351 ulint sync_dummy = 0;
2353 -/* The number of system calls made in this module. Intended for performance
2356 -ulint mutex_system_call_count = 0;
2358 /* Number of spin waits on mutexes: for performance monitoring */
2360 -ulint mutex_spin_round_count = 0;
2361 -ulint mutex_spin_wait_count = 0;
2362 -ulint mutex_os_wait_count = 0;
2363 -ulint mutex_exit_count = 0;
2364 +ib_longlong mutex_spin_round_count = 0;
2365 +ib_longlong mutex_spin_wait_count = 0;
2366 +ib_longlong mutex_os_wait_count = 0;
2367 +ib_longlong mutex_exit_count = 0;
2369 /* The global array of wait cells for implementation of the database's own
2370 mutexes and read-write locks */
2373 #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
2374 mutex_reset_lock_word(mutex);
2375 +#elif defined(HAVE_ATOMIC_BUILTINS)
2376 + mutex_reset_lock_word(mutex);
2378 os_fast_mutex_init(&(mutex->os_fast_mutex));
2379 mutex->lock_word = 0;
2382 os_event_free(mutex->event);
2384 -#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
2385 +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
2386 +#elif defined(HAVE_ATOMIC_BUILTINS)
2388 os_fast_mutex_free(&(mutex->os_fast_mutex));
2390 /* If we free the mutex protecting the mutex list (freeing is
2391 @@ -421,6 +443,12 @@
2392 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
2395 + /* This update is not thread safe, but we don't mind if the count
2396 + isn't exact. Moved out of ifdef that follows because we are willing
2397 + to sacrifice the cost of counting this as the data is valuable.
2398 + Count the number of calls to mutex_spin_wait. */
2399 + mutex_spin_wait_count++;
2407 #if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
2408 - mutex_spin_wait_count++;
2409 mutex->count_spin_loop++;
2410 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
2413 sync_array_reserve_cell(sync_primary_wait_array, mutex,
2414 SYNC_MUTEX, file_name, line, &index);
2416 - mutex_system_call_count++;
2418 /* The memory order of the array reservation and the change in the
2419 waiters field is important: when we suspend a thread, we first
2420 reserve the cell and then set waiters field to 1. When threads are
2422 mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
2425 - mutex_system_call_count++;
2426 mutex_os_wait_count++;
2428 #ifndef UNIV_HOTBACKUP
2429 @@ -1368,20 +1392,31 @@
2430 FILE* file) /* in: file where to print */
2432 #ifdef UNIV_SYNC_DEBUG
2433 - fprintf(stderr, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
2434 + fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
2435 mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
2439 -"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
2440 -"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
2441 - (ulong) mutex_spin_wait_count,
2442 - (ulong) mutex_spin_round_count,
2443 - (ulong) mutex_os_wait_count,
2444 - (ulong) rw_s_spin_wait_count,
2445 - (ulong) rw_s_os_wait_count,
2446 - (ulong) rw_x_spin_wait_count,
2447 - (ulong) rw_x_os_wait_count);
2448 + "Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
2449 + "RW-shared spins %llu, OS waits %llu;"
2450 + " RW-excl spins %llu, OS waits %llu\n",
2451 + mutex_spin_wait_count,
2452 + mutex_spin_round_count,
2453 + mutex_os_wait_count,
2454 + rw_s_spin_wait_count,
2455 + rw_s_os_wait_count,
2456 + rw_x_spin_wait_count,
2457 + rw_x_os_wait_count);
2460 + "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
2462 + (double) mutex_spin_round_count /
2463 + (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
2464 + (double) rw_s_spin_round_count /
2465 + (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
2466 + (double) rw_x_spin_round_count /
2467 + (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
2470 /***********************************************************************
2471 diff -ruN a/patch_info/innodb_rw_lock.info b/patch_info/innodb_rw_lock.info
2472 --- /dev/null 1970-01-01 09:00:00.000000000 +0900
2473 +++ b/patch_info/innodb_rw_lock.info 2009-10-22 15:18:30.000000000 +0900
2475 +File=innodb_rw_lock.patch
2476 +Name=Fix of InnoDB rw_locks ported from InnoDB Plugin
2478 +Author=InnoBase Oy.