1 diff -r 2e0c46e78b50 innobase/buf/buf0buf.c
2 --- a/innobase/buf/buf0buf.c Mon Dec 22 00:33:53 2008 -0800
3 +++ b/innobase/buf/buf0buf.c Mon Dec 22 00:33:59 2008 -0800
5 mutex_create(&(buf_pool->mutex));
6 mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
8 + mutex_create(&(buf_pool->flush_list_mutex));
9 + mutex_create(&(buf_pool->LRU_mutex));
10 + mutex_create(&(buf_pool->free_mutex));
11 + rw_lock_create(&(buf_pool->hash_latch));
12 + mutex_set_level(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK);
13 + mutex_set_level(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK);
14 + mutex_set_level(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK);
15 + rw_lock_set_level(&(buf_pool->hash_latch), SYNC_NO_ORDER_CHECK);
17 + mutex_enter(&(buf_pool->LRU_mutex));
18 + mutex_enter(&(buf_pool->flush_list_mutex));
19 + mutex_enter(&(buf_pool->free_mutex));
20 + rw_lock_x_lock(&(buf_pool->hash_latch));
21 mutex_enter(&(buf_pool->mutex));
25 block->in_free_list = TRUE;
28 + mutex_exit(&(buf_pool->LRU_mutex));
29 + mutex_exit(&(buf_pool->flush_list_mutex));
30 + mutex_exit(&(buf_pool->free_mutex));
31 + rw_lock_x_unlock(&(buf_pool->hash_latch));
32 mutex_exit(&(buf_pool->mutex));
34 if (srv_use_adaptive_hash_indexes) {
36 if (buf_pool->freed_page_clock >= block->freed_page_clock
37 + 1 + (buf_pool->curr_size / 4)) {
39 - mutex_enter(&buf_pool->mutex);
40 + mutex_enter(&(buf_pool->LRU_mutex));
41 /* There has been freeing activity in the LRU list:
42 best to move to the head of the LRU list */
44 buf_LRU_make_block_young(block);
45 - mutex_exit(&buf_pool->mutex);
46 + mutex_exit(&(buf_pool->LRU_mutex));
54 - mutex_enter(&(buf_pool->mutex));
55 + mutex_enter(&(buf_pool->LRU_mutex));
57 block = buf_block_align(frame);
61 buf_LRU_make_block_young(block);
63 - mutex_exit(&(buf_pool->mutex));
64 + mutex_exit(&(buf_pool->LRU_mutex));
67 /************************************************************************
70 buf_block_t* block) /* in, own: block to be freed */
72 - mutex_enter(&(buf_pool->mutex));
73 + mutex_enter(&(buf_pool->free_mutex));
75 mutex_enter(&block->mutex);
79 mutex_exit(&block->mutex);
81 - mutex_exit(&(buf_pool->mutex));
82 + mutex_exit(&(buf_pool->free_mutex));
85 /*************************************************************************
90 - mutex_enter_fast(&(buf_pool->mutex));
91 + rw_lock_s_lock(&(buf_pool->hash_latch));
93 block = buf_page_hash_get(space, offset);
95 - mutex_exit(&(buf_pool->mutex));
96 + rw_lock_s_unlock(&(buf_pool->hash_latch));
104 - mutex_enter_fast(&(buf_pool->mutex));
105 + rw_lock_s_lock(&(buf_pool->hash_latch));
107 block = buf_page_hash_get(space, offset);
110 block->check_index_page_at_flush = FALSE;
113 - mutex_exit(&(buf_pool->mutex));
114 + rw_lock_s_unlock(&(buf_pool->hash_latch));
117 /************************************************************************
122 - mutex_enter_fast(&(buf_pool->mutex));
123 + rw_lock_s_lock(&(buf_pool->hash_latch));
125 block = buf_page_hash_get(space, offset);
127 @@ -1008,7 +1025,7 @@
128 is_hashed = block->is_hashed;
131 - mutex_exit(&(buf_pool->mutex));
132 + rw_lock_s_unlock(&(buf_pool->hash_latch));
136 @@ -1050,7 +1067,7 @@
140 - mutex_enter_fast(&(buf_pool->mutex));
141 + rw_lock_s_lock(&(buf_pool->hash_latch));
143 block = buf_page_hash_get(space, offset);
145 @@ -1058,7 +1075,7 @@
146 block->file_page_was_freed = TRUE;
149 - mutex_exit(&(buf_pool->mutex));
150 + rw_lock_s_unlock(&(buf_pool->hash_latch));
154 @@ -1079,7 +1096,7 @@
158 - mutex_enter_fast(&(buf_pool->mutex));
159 + rw_lock_s_lock(&(buf_pool->hash_latch));
161 block = buf_page_hash_get(space, offset);
163 @@ -1087,7 +1104,7 @@
164 block->file_page_was_freed = FALSE;
167 - mutex_exit(&(buf_pool->mutex));
168 + rw_lock_s_unlock(&(buf_pool->hash_latch));
172 @@ -1166,26 +1183,33 @@
173 buf_pool->n_page_gets++;
176 - mutex_enter_fast(&(buf_pool->mutex));
177 + // mutex_enter_fast(&(buf_pool->mutex));
180 block = buf_block_align(guess);
182 + mutex_enter(&block->mutex);
183 if ((offset != block->offset) || (space != block->space)
184 || (block->state != BUF_BLOCK_FILE_PAGE)) {
186 + mutex_exit(&block->mutex);
192 + rw_lock_s_lock(&(buf_pool->hash_latch));
193 block = buf_page_hash_get(space, offset);
195 + mutex_enter(&block->mutex);
197 + rw_lock_s_unlock(&(buf_pool->hash_latch));
201 /* Page not in buf_pool: needs to be read from file */
203 - mutex_exit(&(buf_pool->mutex));
204 + // mutex_exit(&(buf_pool->mutex));
206 if (mode == BUF_GET_IF_IN_POOL) {
208 @@ -1204,7 +1228,7 @@
212 - mutex_enter(&block->mutex);
213 + // mutex_enter(&block->mutex);
215 ut_a(block->state == BUF_BLOCK_FILE_PAGE);
217 @@ -1216,7 +1240,7 @@
219 if (mode == BUF_GET_IF_IN_POOL) {
220 /* The page is only being read to buffer */
221 - mutex_exit(&buf_pool->mutex);
222 + // mutex_exit(&buf_pool->mutex);
223 mutex_exit(&block->mutex);
226 @@ -1233,7 +1257,9 @@
227 LRU list and we must put it to awe_LRU_free_mapped list once
230 + mutex_enter_fast(&(buf_pool->mutex));
231 buf_awe_map_page_to_frame(block, TRUE);
232 + mutex_exit(&buf_pool->mutex);
235 #ifdef UNIV_SYNC_DEBUG
236 @@ -1241,7 +1267,7 @@
238 buf_block_buf_fix_inc(block);
240 - mutex_exit(&buf_pool->mutex);
241 + // mutex_exit(&buf_pool->mutex);
243 /* Check if this is the first access to the page */
245 @@ -1791,7 +1817,8 @@
249 - mutex_enter(&(buf_pool->mutex));
250 + mutex_enter(&(buf_pool->LRU_mutex));
251 + rw_lock_x_lock(&(buf_pool->hash_latch));
252 mutex_enter(&block->mutex);
254 if (fil_tablespace_deleted_or_being_deleted_in_mem(space,
255 @@ -1806,7 +1833,8 @@
256 being deleted, or the page is already in buf_pool, return */
258 mutex_exit(&block->mutex);
259 - mutex_exit(&(buf_pool->mutex));
260 + mutex_exit(&(buf_pool->LRU_mutex));
261 + rw_lock_x_unlock(&(buf_pool->hash_latch));
263 buf_block_free(block);
265 @@ -1821,10 +1849,14 @@
268 buf_page_init(space, offset, block);
269 + rw_lock_x_unlock(&(buf_pool->hash_latch));
271 /* The block must be put to the LRU list, to the old blocks */
273 buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
274 + mutex_exit(&(buf_pool->LRU_mutex));
276 + mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */
278 block->io_fix = BUF_IO_READ;
280 @@ -1873,7 +1905,8 @@
282 free_block = buf_LRU_get_free_block();
284 - mutex_enter(&(buf_pool->mutex));
285 + mutex_enter(&(buf_pool->LRU_mutex));
286 + rw_lock_x_lock(&(buf_pool->hash_latch));
288 block = buf_page_hash_get(space, offset);
290 @@ -1884,7 +1917,8 @@
291 block->file_page_was_freed = FALSE;
293 /* Page can be found in buf_pool */
294 - mutex_exit(&(buf_pool->mutex));
295 + mutex_exit(&(buf_pool->LRU_mutex));
296 + rw_lock_x_unlock(&(buf_pool->hash_latch));
298 buf_block_free(free_block);
300 @@ -1907,6 +1941,7 @@
301 mutex_enter(&block->mutex);
303 buf_page_init(space, offset, block);
304 + rw_lock_x_unlock(&(buf_pool->hash_latch));
306 /* The block must be put to the LRU list */
307 buf_LRU_add_block(block, FALSE);
308 @@ -1918,7 +1953,7 @@
310 buf_pool->n_pages_created++;
312 - mutex_exit(&(buf_pool->mutex));
313 + mutex_exit(&(buf_pool->LRU_mutex));
315 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
317 @@ -1932,7 +1967,7 @@
318 ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
320 /* Flush pages from the end of the LRU list if necessary */
321 - buf_flush_free_margin();
322 + buf_flush_free_margin(FALSE);
324 frame = block->frame;
326 @@ -1968,6 +2003,7 @@
332 buf_io_counter_t* io_counter;
334 @@ -2050,9 +2086,6 @@
338 - mutex_enter(&(buf_pool->mutex));
339 - mutex_enter(&block->mutex);
341 #ifdef UNIV_IBUF_DEBUG
342 ut_a(ibuf_count_get(block->space, block->offset) == 0);
344 @@ -2061,9 +2094,12 @@
345 removes the newest lock debug record, without checking the thread
350 if (io_type == BUF_IO_READ) {
351 + mutex_enter(&block->mutex);
352 + mutex_enter(&(buf_pool->mutex));
356 /* NOTE that the call to ibuf may have moved the ownership of
357 the x-latch to this OS thread: do not let this confuse you in
359 @@ -2094,6 +2130,8 @@
363 + mutex_exit(&(buf_pool->mutex));
364 + mutex_exit(&block->mutex);
366 if (buf_debug_prints) {
367 fputs("Has read ", stderr);
368 @@ -2102,10 +2140,25 @@
370 ut_ad(io_type == BUF_IO_WRITE);
372 + flush_type = block->flush_type;
373 + if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
374 + mutex_enter(&(buf_pool->LRU_mutex));
376 + mutex_enter(&(buf_pool->flush_list_mutex));
377 + mutex_enter(&block->mutex);
378 + mutex_enter(&(buf_pool->mutex));
382 /* Write means a flush operation: call the completion
383 routine in the flush system */
385 buf_flush_write_complete(block);
387 + mutex_exit(&(buf_pool->flush_list_mutex));
388 + if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
389 + mutex_exit(&(buf_pool->LRU_mutex));
392 rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
393 /* io_counter here */
394 @@ -2131,6 +2184,9 @@
396 buf_pool->n_pages_written++;
398 + mutex_exit(&(buf_pool->mutex));
399 + mutex_exit(&block->mutex);
402 if (buf_debug_prints) {
403 fputs("Has written ", stderr);
404 @@ -2138,9 +2194,6 @@
405 #endif /* UNIV_DEBUG */
408 - mutex_exit(&block->mutex);
409 - mutex_exit(&(buf_pool->mutex));
412 if (buf_debug_prints) {
413 fprintf(stderr, "page space %lu page no %lu\n",
414 @@ -2168,11 +2221,11 @@
415 freed = buf_LRU_search_and_free_block(100);
418 - mutex_enter(&(buf_pool->mutex));
419 + mutex_enter(&(buf_pool->LRU_mutex));
421 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
423 - mutex_exit(&(buf_pool->mutex));
424 + mutex_exit(&(buf_pool->LRU_mutex));
427 /*************************************************************************
428 @@ -2191,10 +2244,22 @@
432 + ulint n_single_flush_tmp = 0;
433 + ulint n_lru_flush_tmp = 0;
434 + ulint n_list_flush_tmp = 0;
438 + mutex_enter(&(buf_pool->LRU_mutex));
439 + mutex_enter(&(buf_pool->flush_list_mutex));
440 + mutex_enter(&(buf_pool->free_mutex));
441 + rw_lock_x_lock(&(buf_pool->hash_latch));
443 mutex_enter(&(buf_pool->mutex));
444 + n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
445 + n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST];
446 + n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU];
447 + mutex_exit(&(buf_pool->mutex));
449 for (i = 0; i < buf_pool->curr_size; i++) {
451 @@ -2262,11 +2327,14 @@
453 ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
455 - ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
456 - ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
457 - ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
458 + ut_a(n_single_flush_tmp == n_single_flush);
459 + ut_a(n_list_flush_tmp == n_list_flush);
460 + ut_a(n_lru_flush_tmp == n_lru_flush);
462 - mutex_exit(&(buf_pool->mutex));
463 + mutex_exit(&(buf_pool->LRU_mutex));
464 + mutex_exit(&(buf_pool->flush_list_mutex));
465 + mutex_exit(&(buf_pool->free_mutex));
466 + rw_lock_x_unlock(&(buf_pool->hash_latch));
468 ut_a(buf_LRU_validate());
469 ut_a(buf_flush_validate());
470 @@ -2298,7 +2366,9 @@
471 index_ids = mem_alloc(sizeof(dulint) * size);
472 counts = mem_alloc(sizeof(ulint) * size);
474 - mutex_enter(&(buf_pool->mutex));
475 + mutex_enter(&(buf_pool->LRU_mutex));
476 + mutex_enter(&(buf_pool->flush_list_mutex));
477 + mutex_enter(&(buf_pool->free_mutex));
480 "buf_pool size %lu\n"
481 @@ -2351,7 +2421,9 @@
485 - mutex_exit(&(buf_pool->mutex));
486 + mutex_exit(&(buf_pool->LRU_mutex));
487 + mutex_exit(&(buf_pool->flush_list_mutex));
488 + mutex_exit(&(buf_pool->free_mutex));
490 for (i = 0; i < n_found; i++) {
491 index = dict_index_get_if_in_cache(index_ids[i]);
492 @@ -2386,8 +2458,6 @@
494 ulint fixed_pages_number = 0;
496 - mutex_enter(&(buf_pool->mutex));
498 for (i = 0; i < buf_pool->curr_size; i++) {
500 block = buf_pool_get_nth_block(buf_pool, i);
501 @@ -2403,7 +2473,6 @@
505 - mutex_exit(&(buf_pool->mutex));
506 return fixed_pages_number;
508 #endif /* UNIV_DEBUG */
509 @@ -2431,7 +2500,9 @@
513 - mutex_enter(&(buf_pool->mutex));
514 + mutex_enter(&(buf_pool->LRU_mutex));
515 + mutex_enter(&(buf_pool->flush_list_mutex));
516 + mutex_enter(&(buf_pool->free_mutex));
518 ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
519 / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
520 @@ -2439,7 +2510,9 @@
522 /* 1 + is there to avoid division by zero */
524 - mutex_exit(&(buf_pool->mutex));
525 + mutex_exit(&(buf_pool->LRU_mutex));
526 + mutex_exit(&(buf_pool->flush_list_mutex));
527 + mutex_exit(&(buf_pool->free_mutex));
531 @@ -2459,6 +2532,9 @@
533 size = buf_pool->curr_size;
535 + mutex_enter(&(buf_pool->LRU_mutex));
536 + mutex_enter(&(buf_pool->flush_list_mutex));
537 + mutex_enter(&(buf_pool->free_mutex));
538 mutex_enter(&(buf_pool->mutex));
541 @@ -2532,6 +2608,9 @@
542 buf_pool->n_pages_written_old = buf_pool->n_pages_written;
543 buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
545 + mutex_exit(&(buf_pool->LRU_mutex));
546 + mutex_exit(&(buf_pool->flush_list_mutex));
547 + mutex_exit(&(buf_pool->free_mutex));
548 mutex_exit(&(buf_pool->mutex));
551 @@ -2562,8 +2641,6 @@
555 - mutex_enter(&(buf_pool->mutex));
557 for (i = 0; i < buf_pool->curr_size; i++) {
559 block = buf_pool_get_nth_block(buf_pool, i);
560 @@ -2584,8 +2661,6 @@
562 mutex_exit(&block->mutex);
565 - mutex_exit(&(buf_pool->mutex));
569 @@ -2625,11 +2700,11 @@
573 - mutex_enter(&(buf_pool->mutex));
574 + mutex_enter(&(buf_pool->free_mutex));
576 len = UT_LIST_GET_LEN(buf_pool->free);
578 - mutex_exit(&(buf_pool->mutex));
579 + mutex_exit(&(buf_pool->free_mutex));
583 diff -r 2e0c46e78b50 innobase/buf/buf0flu.c
584 --- a/innobase/buf/buf0flu.c Mon Dec 22 00:33:53 2008 -0800
585 +++ b/innobase/buf/buf0flu.c Mon Dec 22 00:33:59 2008 -0800
586 @@ -117,12 +117,14 @@
587 ut_ad(mutex_own(&block->mutex));
588 #endif /* UNIV_SYNC_DEBUG */
589 if (block->state != BUF_BLOCK_FILE_PAGE) {
590 + /* I permited not to own LRU_mutex.. */
592 ut_print_timestamp(stderr);
594 " InnoDB: Error: buffer block state %lu in the LRU list!\n",
595 (ulong)block->state);
596 ut_print_buf(stderr, (byte*)block, sizeof(buf_block_t));
602 @@ -536,18 +538,20 @@
603 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
604 || flush_type == BUF_FLUSH_SINGLE_PAGE);
606 - mutex_enter(&(buf_pool->mutex));
607 + rw_lock_s_lock(&(buf_pool->hash_latch));
609 block = buf_page_hash_get(space, offset);
611 ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
614 - mutex_exit(&(buf_pool->mutex));
615 + rw_lock_s_unlock(&(buf_pool->hash_latch));
619 mutex_enter(&block->mutex);
620 + mutex_enter(&(buf_pool->mutex));
621 + rw_lock_s_unlock(&(buf_pool->hash_latch));
623 if (flush_type == BUF_FLUSH_LIST
624 && buf_flush_ready_for_flush(block, flush_type)) {
626 high = fil_space_get_size(space);
629 - mutex_enter(&(buf_pool->mutex));
630 + rw_lock_s_lock(&(buf_pool->hash_latch));
632 for (i = low; i < high; i++) {
636 mutex_exit(&block->mutex);
638 - mutex_exit(&(buf_pool->mutex));
639 + rw_lock_s_unlock(&(buf_pool->hash_latch));
641 /* Note: as we release the buf_pool mutex
642 above, in buf_flush_try_page we cannot be sure
643 @@ -789,14 +793,14 @@
644 count += buf_flush_try_page(space, i,
647 - mutex_enter(&(buf_pool->mutex));
648 + rw_lock_s_lock(&(buf_pool->hash_latch));
650 mutex_exit(&block->mutex);
655 - mutex_exit(&(buf_pool->mutex));
656 + rw_lock_s_unlock(&(buf_pool->hash_latch));
663 (buf_pool->init_flush)[flush_type] = TRUE;
665 + mutex_exit(&(buf_pool->mutex));
667 + if (flush_type == BUF_FLUSH_LRU) {
668 + mutex_enter(&(buf_pool->LRU_mutex));
670 + mutex_enter(&(buf_pool->flush_list_mutex));
673 /* If we have flushed enough, leave the loop */
674 if (page_count >= min_n) {
676 offset = block->offset;
678 mutex_exit(&block->mutex);
679 - mutex_exit(&(buf_pool->mutex));
680 + if (flush_type == BUF_FLUSH_LRU) {
681 + mutex_exit(&(buf_pool->LRU_mutex));
683 + mutex_exit(&(buf_pool->flush_list_mutex));
685 old_page_count = page_count;
689 page_count - old_page_count); */
691 - mutex_enter(&(buf_pool->mutex));
692 + if (flush_type == BUF_FLUSH_LRU) {
693 + mutex_enter(&(buf_pool->LRU_mutex));
695 + mutex_enter(&(buf_pool->flush_list_mutex));
697 } else if (flush_type == BUF_FLUSH_LRU) {
704 + if (flush_type == BUF_FLUSH_LRU) {
705 + mutex_exit(&(buf_pool->LRU_mutex));
707 + mutex_exit(&(buf_pool->flush_list_mutex));
709 + mutex_enter(&(buf_pool->mutex));
711 (buf_pool->init_flush)[flush_type] = FALSE;
713 @@ -989,10 +1013,14 @@
718 - mutex_enter(&(buf_pool->mutex));
720 + /* optimistic search... */
721 + //mutex_enter(&(buf_pool->LRU_mutex));
722 + //mutex_enter(&(buf_pool->free_mutex));
724 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
726 + //mutex_exit(&(buf_pool->free_mutex));
728 block = UT_LIST_GET_LAST(buf_pool->LRU);
730 @@ -1014,7 +1042,7 @@
731 block = UT_LIST_GET_PREV(LRU, block);
734 - mutex_exit(&(buf_pool->mutex));
735 + //mutex_exit(&(buf_pool->LRU_mutex));
737 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
739 @@ -1033,8 +1061,9 @@
740 immediately, without waiting. */
743 -buf_flush_free_margin(void)
744 +buf_flush_free_margin(
745 /*=======================*/
750 @@ -1044,7 +1073,7 @@
751 if (n_to_flush > 0) {
752 n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
754 - if (n_flushed == ULINT_UNDEFINED) {
755 + if (wait && n_flushed == ULINT_UNDEFINED) {
756 /* There was an LRU type flush batch already running;
757 let us wait for it to end */
759 @@ -1094,11 +1123,11 @@
763 - mutex_enter(&(buf_pool->mutex));
764 + mutex_enter(&(buf_pool->flush_list_mutex));
766 ret = buf_flush_validate_low();
768 - mutex_exit(&(buf_pool->mutex));
769 + mutex_exit(&(buf_pool->flush_list_mutex));
773 diff -r 2e0c46e78b50 innobase/buf/buf0lru.c
774 --- a/innobase/buf/buf0lru.c Mon Dec 22 00:33:53 2008 -0800
775 +++ b/innobase/buf/buf0lru.c Mon Dec 22 00:33:59 2008 -0800
780 - mutex_enter(&(buf_pool->mutex));
781 + mutex_enter(&(buf_pool->LRU_mutex));
782 + mutex_enter(&(buf_pool->flush_list_mutex));
783 + mutex_enter(&(buf_pool->free_mutex));
784 + rw_lock_x_lock(&(buf_pool->hash_latch));
790 mutex_exit(&block->mutex);
792 - mutex_exit(&(buf_pool->mutex));
793 + mutex_exit(&(buf_pool->LRU_mutex));
794 + mutex_exit(&(buf_pool->flush_list_mutex));
795 + mutex_exit(&(buf_pool->free_mutex));
796 + rw_lock_x_unlock(&(buf_pool->hash_latch));
798 /* Note that the following call will acquire
799 an S-latch on the page */
801 block = UT_LIST_GET_PREV(LRU, block);
804 - mutex_exit(&(buf_pool->mutex));
805 + mutex_exit(&(buf_pool->LRU_mutex));
806 + mutex_exit(&(buf_pool->flush_list_mutex));
807 + mutex_exit(&(buf_pool->free_mutex));
808 + rw_lock_x_unlock(&(buf_pool->hash_latch));
811 os_thread_sleep(20000);
812 @@ -170,14 +179,14 @@
816 - mutex_enter(&(buf_pool->mutex));
817 + mutex_enter(&(buf_pool->LRU_mutex));
819 len = UT_LIST_GET_LEN(buf_pool->LRU);
821 if (len < BUF_LRU_OLD_MIN_LEN) {
822 /* The LRU list is too short to do read-ahead */
824 - mutex_exit(&(buf_pool->mutex));
825 + mutex_exit(&(buf_pool->LRU_mutex));
831 limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
833 - mutex_exit(&(buf_pool->mutex));
834 + mutex_exit(&(buf_pool->LRU_mutex));
838 @@ -210,13 +219,15 @@
842 - mutex_enter(&(buf_pool->mutex));
843 + /* optimistic search... */
844 + //mutex_enter(&(buf_pool->LRU_mutex));
848 block = UT_LIST_GET_LAST(buf_pool->LRU);
850 while (block != NULL) {
851 - ut_a(block->in_LRU_list);
852 + //ut_a(block->in_LRU_list); /* optimistic */
854 mutex_enter(&block->mutex);
858 #endif /* UNIV_DEBUG */
860 + mutex_exit(&block->mutex);
862 + mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */
864 + rw_lock_x_lock(&(buf_pool->hash_latch));
865 + mutex_enter(&block->mutex);
866 + if(block->in_LRU_list && buf_flush_ready_for_replace(block)) {
867 buf_LRU_block_remove_hashed_page(block);
868 + rw_lock_x_unlock(&(buf_pool->hash_latch));
870 - mutex_exit(&(buf_pool->mutex));
871 + mutex_exit(&(buf_pool->LRU_mutex));
872 mutex_exit(&block->mutex);
874 /* Remove possible adaptive hash index built on the
875 @@ -246,14 +265,25 @@
877 ut_a(block->buf_fix_count == 0);
879 - mutex_enter(&(buf_pool->mutex));
880 + mutex_enter(&(buf_pool->free_mutex));
881 mutex_enter(&block->mutex);
883 buf_LRU_block_free_hashed_page(block);
885 + mutex_exit(&(buf_pool->free_mutex));
886 mutex_exit(&block->mutex);
889 + } else { /* someone may interrupt...??? */
890 + mutex_exit(&(buf_pool->LRU_mutex));/* optimistic */
892 + rw_lock_x_unlock(&(buf_pool->hash_latch));
894 + if (!(block->in_LRU_list)) {
895 + mutex_exit(&block->mutex);
901 mutex_exit(&block->mutex);
902 @@ -264,13 +294,21 @@
903 if (!freed && n_iterations <= 10
904 && distance > 100 + (n_iterations * buf_pool->curr_size)
907 + mutex_enter(&(buf_pool->mutex));
908 buf_pool->LRU_flush_ended = 0;
909 + mutex_exit(&(buf_pool->mutex));
911 - mutex_exit(&(buf_pool->mutex));
912 + //mutex_exit(&(buf_pool->LRU_mutex));
918 + //mutex_exit(&(buf_pool->LRU_mutex));
921 + mutex_enter(&(buf_pool->mutex));
922 if (buf_pool->LRU_flush_ended > 0) {
923 buf_pool->LRU_flush_ended--;
929 - mutex_enter(&(buf_pool->mutex));
930 + mutex_enter(&(buf_pool->LRU_mutex));
931 + mutex_enter(&(buf_pool->free_mutex));
933 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
934 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
939 - mutex_exit(&(buf_pool->mutex));
940 + mutex_exit(&(buf_pool->LRU_mutex));
941 + mutex_exit(&(buf_pool->free_mutex));
946 ibool mon_value_was = FALSE;
947 ibool started_monitor = FALSE;
949 - mutex_enter(&(buf_pool->mutex));
950 + mutex_enter(&(buf_pool->free_mutex)); /* LRU info:optimistic */
952 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
953 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
955 /* If there is a block in the free list, take it */
956 if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
958 - block = UT_LIST_GET_FIRST(buf_pool->free);
959 + block = UT_LIST_GET_LAST(buf_pool->free);
960 ut_a(block->in_free_list);
961 UT_LIST_REMOVE(free, buf_pool->free, block);
962 block->in_free_list = FALSE;
965 mutex_exit(&block->mutex);
967 - mutex_exit(&(buf_pool->mutex));
968 + mutex_exit(&(buf_pool->free_mutex));
970 if (started_monitor) {
971 srv_print_innodb_monitor = mon_value_was;
973 /* If no block was in the free list, search from the end of the LRU
974 list and try to free a block there */
976 - mutex_exit(&(buf_pool->mutex));
977 + mutex_exit(&(buf_pool->free_mutex));
979 freed = buf_LRU_search_and_free_block(n_iterations);
983 /* No free block was found: try to flush the LRU list */
985 - buf_flush_free_margin();
986 + buf_flush_free_margin(TRUE);
987 ++srv_buf_pool_wait_free;
989 os_aio_simulated_wake_handler_threads();
994 - mutex_enter(&(buf_pool->mutex));
995 + mutex_enter(&(buf_pool->LRU_mutex));
997 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
999 @@ -1001,7 +1041,10 @@
1001 if (buf_pool->LRU_old) {
1002 ut_a(buf_pool->LRU_old_len == old_len);
1006 + mutex_exit(&(buf_pool->LRU_mutex));
1007 + mutex_enter(&(buf_pool->free_mutex));
1009 UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
1011 @@ -1013,7 +1056,7 @@
1012 block = UT_LIST_GET_NEXT(free, block);
1015 - mutex_exit(&(buf_pool->mutex));
1016 + mutex_exit(&(buf_pool->free_mutex));
1020 @@ -1029,7 +1072,7 @@
1024 - mutex_enter(&(buf_pool->mutex));
1025 + mutex_enter(&(buf_pool->LRU_mutex));
1027 fprintf(stderr, "Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock);
1029 @@ -1073,5 +1116,5 @@
1033 - mutex_exit(&(buf_pool->mutex));
1034 + mutex_exit(&(buf_pool->LRU_mutex));
1036 diff -r 2e0c46e78b50 innobase/buf/buf0rea.c
1037 --- a/innobase/buf/buf0rea.c Mon Dec 22 00:33:53 2008 -0800
1038 +++ b/innobase/buf/buf0rea.c Mon Dec 22 00:33:59 2008 -0800
1039 @@ -236,10 +236,12 @@
1043 + mutex_exit(&(buf_pool->mutex));
1045 /* Count how many blocks in the area have been recently accessed,
1046 that is, reside near the start of the LRU list. */
1048 + rw_lock_s_lock(&(buf_pool->hash_latch));
1049 for (i = low; i < high; i++) {
1050 block = buf_page_hash_get(space, i);
1056 + rw_lock_s_unlock(&(buf_pool->hash_latch));
1058 - mutex_exit(&(buf_pool->mutex));
1059 + // mutex_exit(&(buf_pool->mutex));
1061 if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
1066 /* Flush pages from the end of the LRU list if necessary */
1067 - buf_flush_free_margin();
1068 + buf_flush_free_margin(FALSE);
1070 return(count + count2);
1076 + mutex_exit(&(buf_pool->mutex));
1078 /* Check that almost all pages in the area have been accessed; if
1079 offset == low, the accesses must be in a descending order, otherwise,
1084 + rw_lock_s_lock(&(buf_pool->hash_latch));
1085 for (i = low; i < high; i++) {
1086 block = buf_page_hash_get(space, i);
1088 @@ -479,12 +484,13 @@
1092 + rw_lock_s_unlock(&(buf_pool->hash_latch));
1094 if (fail_count > BUF_READ_AHEAD_LINEAR_AREA -
1095 BUF_READ_AHEAD_LINEAR_THRESHOLD) {
1096 /* Too many failures: return */
1098 - mutex_exit(&(buf_pool->mutex));
1099 + //mutex_exit(&(buf_pool->mutex));
1103 @@ -492,10 +498,11 @@
1104 /* If we got this far, we know that enough pages in the area have
1105 been accessed in the right order: linear read-ahead can be sensible */
1107 + rw_lock_s_lock(&(buf_pool->hash_latch));
1108 block = buf_page_hash_get(space, offset);
1110 if (block == NULL) {
1111 - mutex_exit(&(buf_pool->mutex));
1112 + rw_lock_s_unlock(&(buf_pool->hash_latch));
1117 pred_offset = fil_page_get_prev(frame);
1118 succ_offset = fil_page_get_next(frame);
1120 - mutex_exit(&(buf_pool->mutex));
1121 + rw_lock_s_unlock(&(buf_pool->hash_latch));
1123 if ((offset == low) && (succ_offset == offset + 1)) {
1126 os_aio_simulated_wake_handler_threads();
1128 /* Flush pages from the end of the LRU list if necessary */
1129 - buf_flush_free_margin();
1130 + buf_flush_free_margin(FALSE);
1133 if (buf_debug_prints && (count > 0)) {
1135 os_aio_simulated_wake_handler_threads();
1137 /* Flush pages from the end of the LRU list if necessary */
1138 - buf_flush_free_margin();
1139 + buf_flush_free_margin(FALSE);
1142 if (buf_debug_prints) {
1144 os_aio_simulated_wake_handler_threads();
1146 /* Flush pages from the end of the LRU list if necessary */
1147 - buf_flush_free_margin();
1148 + buf_flush_free_margin(FALSE);
1151 if (buf_debug_prints) {
1152 diff -r 2e0c46e78b50 innobase/include/buf0buf.h
1153 --- a/innobase/include/buf0buf.h Mon Dec 22 00:33:53 2008 -0800
1154 +++ b/innobase/include/buf0buf.h Mon Dec 22 00:33:59 2008 -0800
1156 mem_heap_t* io_counter_heap;
1158 hash_table_t* page_hash; /* hash table of the file pages */
1159 + rw_lock_t hash_latch;
1161 ulint n_pend_reads; /* number of pending read operations */
1164 UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
1165 /* base node of the modified block
1167 + mutex_t flush_list_mutex;
1168 ibool init_flush[BUF_FLUSH_LIST + 1];
1169 /* this is TRUE when a flush of the
1170 given type is being initialized */
1171 @@ -1011,8 +1013,10 @@
1172 in the case of AWE, at the start are
1173 always free blocks for which the
1174 physical memory is mapped to a frame */
1175 + mutex_t free_mutex;
1176 UT_LIST_BASE_NODE_T(buf_block_t) LRU;
1177 /* base node of the LRU list */
1178 + mutex_t LRU_mutex;
1179 buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
1180 blocks in the LRU list; NULL if LRU
1181 length less than BUF_LRU_OLD_MIN_LEN */
1182 diff -r 2e0c46e78b50 innobase/include/buf0buf.ic
1183 --- a/innobase/include/buf0buf.ic Mon Dec 22 00:33:53 2008 -0800
1184 +++ b/innobase/include/buf0buf.ic Mon Dec 22 00:33:59 2008 -0800
1189 - mutex_enter(&(buf_pool->mutex));
1190 + mutex_enter(&(buf_pool->flush_list_mutex));
1192 block = UT_LIST_GET_LAST(buf_pool->flush_list);
1195 lsn = block->oldest_modification;
1198 - mutex_exit(&(buf_pool->mutex));
1199 + mutex_exit(&(buf_pool->flush_list_mutex));
1203 @@ -392,18 +392,18 @@
1204 /* out: TRUE if io going on */
1205 buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
1207 - mutex_enter(&(buf_pool->mutex));
1208 + mutex_enter(&block->mutex);
1210 ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
1211 ut_ad(block->buf_fix_count > 0);
1213 if (block->io_fix != 0) {
1214 - mutex_exit(&(buf_pool->mutex));
1215 + mutex_exit(&block->mutex);
1220 - mutex_exit(&(buf_pool->mutex));
1221 + mutex_exit(&block->mutex);
1227 block = buf_block_align(frame);
1229 - mutex_enter(&(buf_pool->mutex));
1230 + mutex_enter(&block->mutex);
1232 if (block->state == BUF_BLOCK_FILE_PAGE) {
1233 lsn = block->newest_modification;
1235 lsn = ut_dulint_zero;
1238 - mutex_exit(&(buf_pool->mutex));
1239 + mutex_exit(&block->mutex);
1244 ut_a(block->state == BUF_BLOCK_FILE_PAGE);
1246 if (rw_latch == RW_X_LATCH && mtr->modifications) {
1247 - mutex_enter(&buf_pool->mutex);
1248 + mutex_enter(&buf_pool->flush_list_mutex);
1249 buf_flush_note_modification(block, mtr);
1250 - mutex_exit(&buf_pool->mutex);
1251 + mutex_exit(&buf_pool->flush_list_mutex);
1254 mutex_enter(&block->mutex);
1255 diff -r 2e0c46e78b50 innobase/include/buf0flu.h
1256 --- a/innobase/include/buf0flu.h Mon Dec 22 00:33:53 2008 -0800
1257 +++ b/innobase/include/buf0flu.h Mon Dec 22 00:33:59 2008 -0800
1259 a margin of replaceable pages there. */
1262 -buf_flush_free_margin(void);
1263 +buf_flush_free_margin(
1264 /*=======================*/
1266 /************************************************************************
1267 Initializes a page for writing to the tablespace. */
1269 diff -r 2e0c46e78b50 innobase/include/buf0flu.ic
1270 --- a/innobase/include/buf0flu.ic Mon Dec 22 00:33:53 2008 -0800
1271 +++ b/innobase/include/buf0flu.ic Mon Dec 22 00:33:59 2008 -0800
1273 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1274 #endif /* UNIV_SYNC_DEBUG */
1276 - mutex_enter(&(buf_pool->mutex));
1277 + mutex_enter(&(buf_pool->flush_list_mutex));
1279 ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
1285 - mutex_exit(&(buf_pool->mutex));
1286 + mutex_exit(&(buf_pool->flush_list_mutex));
1288 diff -r 2e0c46e78b50 innobase/log/log0recv.c
1289 --- a/innobase/log/log0recv.c Mon Dec 22 00:33:53 2008 -0800
1290 +++ b/innobase/log/log0recv.c Mon Dec 22 00:33:59 2008 -0800
1291 @@ -1693,11 +1693,11 @@
1295 - mutex_enter(&(buf_pool->mutex));
1296 + rw_lock_s_lock(&(buf_pool->hash_latch));
1298 page = buf_page_hash_get(space, page_no)->frame;
1300 - mutex_exit(&(buf_pool->mutex));
1301 + rw_lock_s_unlock(&(buf_pool->hash_latch));
1303 replica = buf_page_get(space + RECV_REPLICA_SPACE_ADD, page_no,
1305 diff -r 2e0c46e78b50 patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info
1306 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1307 +++ b/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info Mon Dec 22 00:33:59 2008 -0800
1309 +File=split_buf_pool_mutex_fixed_optimistic_safe.patch
1310 +Name=InnoDB patch to fix buffer pool scalability
1312 +Author=Yasufumi Kinoshita