1 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0buf.c mysql-5.1.29-rc/storage/innobase/buf/buf0buf.c
2 --- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0buf.c 2008-10-12 06:54:12.000000000 +0900
3 +++ mysql-5.1.29-rc/storage/innobase/buf/buf0buf.c 2008-11-18 15:44:00.000000000 +0900
5 ---------------------------- */
6 mutex_create(&buf_pool->mutex, SYNC_BUF_POOL);
8 + mutex_create(&(buf_pool->flush_list_mutex), SYNC_NO_ORDER_CHECK);
9 + mutex_create(&(buf_pool->LRU_mutex), SYNC_NO_ORDER_CHECK);
10 + mutex_create(&(buf_pool->free_mutex), SYNC_NO_ORDER_CHECK);
11 + mutex_create(&(buf_pool->hash_mutex), SYNC_NO_ORDER_CHECK);
13 + mutex_enter(&(buf_pool->LRU_mutex));
14 + mutex_enter(&(buf_pool->flush_list_mutex));
15 + mutex_enter(&(buf_pool->free_mutex));
16 + mutex_enter(&(buf_pool->hash_mutex));
17 mutex_enter(&(buf_pool->mutex));
21 block->in_free_list = TRUE;
24 + mutex_exit(&(buf_pool->LRU_mutex));
25 + mutex_exit(&(buf_pool->flush_list_mutex));
26 + mutex_exit(&(buf_pool->free_mutex));
27 + mutex_exit(&(buf_pool->hash_mutex));
28 mutex_exit(&(buf_pool->mutex));
30 if (srv_use_adaptive_hash_indexes) {
33 if (buf_block_peek_if_too_old(block)) {
35 - mutex_enter(&buf_pool->mutex);
36 + mutex_enter(&(buf_pool->LRU_mutex));
37 /* There has been freeing activity in the LRU list:
38 best to move to the head of the LRU list */
40 buf_LRU_make_block_young(block);
41 - mutex_exit(&buf_pool->mutex);
42 + mutex_exit(&(buf_pool->LRU_mutex));
50 - mutex_enter(&(buf_pool->mutex));
51 + mutex_enter(&(buf_pool->LRU_mutex));
53 block = buf_block_align(frame);
57 buf_LRU_make_block_young(block);
59 - mutex_exit(&(buf_pool->mutex));
60 + mutex_exit(&(buf_pool->LRU_mutex));
63 /************************************************************************
66 buf_block_t* block) /* in, own: block to be freed */
68 - mutex_enter(&(buf_pool->mutex));
69 + mutex_enter(&(buf_pool->free_mutex));
71 mutex_enter(&block->mutex);
75 mutex_exit(&block->mutex);
77 - mutex_exit(&(buf_pool->mutex));
78 + mutex_exit(&(buf_pool->free_mutex));
81 /*************************************************************************
82 @@ -996,11 +1009,11 @@
86 - mutex_enter_fast(&(buf_pool->mutex));
87 + mutex_enter_fast(&(buf_pool->hash_mutex));
89 block = buf_page_hash_get(space, offset);
91 - mutex_exit(&(buf_pool->mutex));
92 + mutex_exit(&(buf_pool->hash_mutex));
100 - mutex_enter_fast(&(buf_pool->mutex));
101 + mutex_enter_fast(&(buf_pool->hash_mutex));
103 block = buf_page_hash_get(space, offset);
105 @@ -1025,7 +1038,7 @@
106 block->check_index_page_at_flush = FALSE;
109 - mutex_exit(&(buf_pool->mutex));
110 + mutex_exit(&(buf_pool->hash_mutex));
113 /************************************************************************
114 @@ -1044,7 +1057,7 @@
118 - mutex_enter_fast(&(buf_pool->mutex));
119 + mutex_enter_fast(&(buf_pool->hash_mutex));
121 block = buf_page_hash_get(space, offset);
123 @@ -1054,7 +1067,7 @@
124 is_hashed = block->is_hashed;
127 - mutex_exit(&(buf_pool->mutex));
128 + mutex_exit(&(buf_pool->hash_mutex));
132 @@ -1096,7 +1109,7 @@
136 - mutex_enter_fast(&(buf_pool->mutex));
137 + mutex_enter_fast(&(buf_pool->hash_mutex));
139 block = buf_page_hash_get(space, offset);
141 @@ -1104,7 +1117,7 @@
142 block->file_page_was_freed = TRUE;
145 - mutex_exit(&(buf_pool->mutex));
146 + mutex_exit(&(buf_pool->hash_mutex));
150 @@ -1125,7 +1138,7 @@
154 - mutex_enter_fast(&(buf_pool->mutex));
155 + mutex_enter_fast(&(buf_pool->hash_mutex));
157 block = buf_page_hash_get(space, offset);
159 @@ -1133,7 +1146,7 @@
160 block->file_page_was_freed = FALSE;
163 - mutex_exit(&(buf_pool->mutex));
164 + mutex_exit(&(buf_pool->hash_mutex));
168 @@ -1174,26 +1187,33 @@
169 buf_pool->n_page_gets++;
172 - mutex_enter_fast(&(buf_pool->mutex));
173 + // mutex_enter_fast(&(buf_pool->mutex));
176 block = buf_block_align(guess);
178 + mutex_enter(&block->mutex);
179 if ((offset != block->offset) || (space != block->space)
180 || (block->state != BUF_BLOCK_FILE_PAGE)) {
182 + mutex_exit(&block->mutex);
188 + mutex_enter_fast(&(buf_pool->hash_mutex));
189 block = buf_page_hash_get(space, offset);
191 + mutex_enter(&block->mutex);
193 + mutex_exit(&(buf_pool->hash_mutex));
197 /* Page not in buf_pool: needs to be read from file */
199 - mutex_exit(&(buf_pool->mutex));
200 + // mutex_exit(&(buf_pool->mutex));
202 if (mode == BUF_GET_IF_IN_POOL) {
204 @@ -1212,7 +1232,7 @@
208 - mutex_enter(&block->mutex);
209 + // mutex_enter(&block->mutex);
211 ut_a(block->state == BUF_BLOCK_FILE_PAGE);
213 @@ -1224,7 +1244,7 @@
215 if (mode == BUF_GET_IF_IN_POOL) {
216 /* The page is only being read to buffer */
217 - mutex_exit(&buf_pool->mutex);
218 + // mutex_exit(&buf_pool->mutex);
219 mutex_exit(&block->mutex);
222 @@ -1241,7 +1261,9 @@
223 LRU list and we must put it to awe_LRU_free_mapped list once
226 + mutex_enter_fast(&(buf_pool->mutex));
227 buf_awe_map_page_to_frame(block, TRUE);
228 + mutex_exit(&buf_pool->mutex);
231 #ifdef UNIV_SYNC_DEBUG
232 @@ -1249,7 +1271,7 @@
234 buf_block_buf_fix_inc(block);
236 - mutex_exit(&buf_pool->mutex);
237 + // mutex_exit(&buf_pool->mutex);
239 /* Check if this is the first access to the page */
241 @@ -1747,7 +1769,8 @@
245 - mutex_enter(&(buf_pool->mutex));
246 + mutex_enter(&(buf_pool->LRU_mutex));
247 + mutex_enter(&(buf_pool->hash_mutex));
248 mutex_enter(&block->mutex);
250 if (fil_tablespace_deleted_or_being_deleted_in_mem(
251 @@ -1763,7 +1786,8 @@
252 already in buf_pool, return */
254 mutex_exit(&block->mutex);
255 - mutex_exit(&(buf_pool->mutex));
256 + mutex_exit(&(buf_pool->LRU_mutex));
257 + mutex_exit(&(buf_pool->hash_mutex));
259 buf_block_free(block);
261 @@ -1778,10 +1802,14 @@
264 buf_page_init(space, offset, block);
265 + mutex_exit(&(buf_pool->hash_mutex));
267 /* The block must be put to the LRU list, to the old blocks */
269 buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */
270 + mutex_exit(&(buf_pool->LRU_mutex));
272 + mutex_enter(&(buf_pool->mutex)); /* for consistency about aio */
274 block->io_fix = BUF_IO_READ;
276 @@ -1830,7 +1858,8 @@
278 free_block = buf_LRU_get_free_block();
280 - mutex_enter(&(buf_pool->mutex));
281 + mutex_enter(&(buf_pool->LRU_mutex));
282 + mutex_enter(&(buf_pool->hash_mutex));
284 block = buf_page_hash_get(space, offset);
286 @@ -1841,7 +1870,8 @@
287 block->file_page_was_freed = FALSE;
289 /* Page can be found in buf_pool */
290 - mutex_exit(&(buf_pool->mutex));
291 + mutex_exit(&(buf_pool->LRU_mutex));
292 + mutex_exit(&(buf_pool->hash_mutex));
294 buf_block_free(free_block);
296 @@ -1864,6 +1894,7 @@
297 mutex_enter(&block->mutex);
299 buf_page_init(space, offset, block);
300 + mutex_exit(&(buf_pool->hash_mutex));
302 /* The block must be put to the LRU list */
303 buf_LRU_add_block(block, FALSE);
304 @@ -1875,7 +1906,7 @@
306 buf_pool->n_pages_created++;
308 - mutex_exit(&(buf_pool->mutex));
309 + mutex_exit(&(buf_pool->LRU_mutex));
311 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
313 @@ -1889,7 +1920,7 @@
314 ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
316 /* Flush pages from the end of the LRU list if necessary */
317 - buf_flush_free_margin();
318 + buf_flush_free_margin(FALSE);
320 frame = block->frame;
322 @@ -1928,6 +1959,7 @@
323 buf_block_t* block) /* in: pointer to the block in question */
330 @@ -2040,9 +2072,6 @@
334 - mutex_enter(&(buf_pool->mutex));
335 - mutex_enter(&block->mutex);
337 #ifdef UNIV_IBUF_DEBUG
338 ut_a(ibuf_count_get(block->space, block->offset) == 0);
340 @@ -2051,9 +2080,12 @@
341 removes the newest lock debug record, without checking the thread
346 if (io_type == BUF_IO_READ) {
347 + mutex_enter(&block->mutex);
348 + mutex_enter(&(buf_pool->mutex));
352 /* NOTE that the call to ibuf may have moved the ownership of
353 the x-latch to this OS thread: do not let this confuse you in
355 @@ -2064,6 +2096,8 @@
357 rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
359 + mutex_exit(&(buf_pool->mutex));
360 + mutex_exit(&block->mutex);
362 if (buf_debug_prints) {
363 fputs("Has read ", stderr);
364 @@ -2072,15 +2106,33 @@
366 ut_ad(io_type == BUF_IO_WRITE);
368 + flush_type = block->flush_type;
369 + if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
370 + mutex_enter(&(buf_pool->LRU_mutex));
372 + mutex_enter(&(buf_pool->flush_list_mutex));
373 + mutex_enter(&block->mutex);
374 + mutex_enter(&(buf_pool->mutex));
378 /* Write means a flush operation: call the completion
379 routine in the flush system */
381 buf_flush_write_complete(block);
383 + mutex_exit(&(buf_pool->flush_list_mutex));
384 + if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
385 + mutex_exit(&(buf_pool->LRU_mutex));
388 rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE);
390 buf_pool->n_pages_written++;
392 + mutex_exit(&(buf_pool->mutex));
393 + mutex_exit(&block->mutex);
396 if (buf_debug_prints) {
397 fputs("Has written ", stderr);
398 @@ -2088,9 +2140,6 @@
399 #endif /* UNIV_DEBUG */
402 - mutex_exit(&block->mutex);
403 - mutex_exit(&(buf_pool->mutex));
406 if (buf_debug_prints) {
407 fprintf(stderr, "page space %lu page no %lu\n",
408 @@ -2118,11 +2167,11 @@
409 freed = buf_LRU_search_and_free_block(100);
412 - mutex_enter(&(buf_pool->mutex));
413 + mutex_enter(&(buf_pool->LRU_mutex));
415 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
417 - mutex_exit(&(buf_pool->mutex));
418 + mutex_exit(&(buf_pool->LRU_mutex));
422 @@ -2142,10 +2191,22 @@
426 + ulint n_single_flush_tmp = 0;
427 + ulint n_lru_flush_tmp = 0;
428 + ulint n_list_flush_tmp = 0;
432 + mutex_enter(&(buf_pool->LRU_mutex));
433 + mutex_enter(&(buf_pool->flush_list_mutex));
434 + mutex_enter(&(buf_pool->free_mutex));
435 + mutex_enter(&(buf_pool->hash_mutex));
437 mutex_enter(&(buf_pool->mutex));
438 + n_single_flush_tmp = buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
439 + n_list_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LIST];
440 + n_lru_flush_tmp = buf_pool->n_flush[BUF_FLUSH_LRU];
441 + mutex_exit(&(buf_pool->mutex));
443 for (i = 0; i < buf_pool->curr_size; i++) {
445 @@ -2216,11 +2277,14 @@
447 ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
449 - ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
450 - ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
451 - ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
453 - mutex_exit(&(buf_pool->mutex));
454 + ut_a(n_single_flush_tmp == n_single_flush);
455 + ut_a(n_list_flush_tmp == n_list_flush);
456 + ut_a(n_lru_flush_tmp == n_lru_flush);
458 + mutex_exit(&(buf_pool->LRU_mutex));
459 + mutex_exit(&(buf_pool->flush_list_mutex));
460 + mutex_exit(&(buf_pool->free_mutex));
461 + mutex_exit(&(buf_pool->hash_mutex));
463 ut_a(buf_LRU_validate());
464 ut_a(buf_flush_validate());
465 @@ -2252,7 +2316,9 @@
466 index_ids = mem_alloc(sizeof(dulint) * size);
467 counts = mem_alloc(sizeof(ulint) * size);
469 - mutex_enter(&(buf_pool->mutex));
470 + mutex_enter(&(buf_pool->LRU_mutex));
471 + mutex_enter(&(buf_pool->flush_list_mutex));
472 + mutex_enter(&(buf_pool->free_mutex));
475 "buf_pool size %lu\n"
476 @@ -2305,7 +2371,9 @@
480 - mutex_exit(&(buf_pool->mutex));
481 + mutex_exit(&(buf_pool->LRU_mutex));
482 + mutex_exit(&(buf_pool->flush_list_mutex));
483 + mutex_exit(&(buf_pool->free_mutex));
485 for (i = 0; i < n_found; i++) {
486 index = dict_index_get_if_in_cache(index_ids[i]);
487 @@ -2339,8 +2407,6 @@
489 ulint fixed_pages_number = 0;
491 - mutex_enter(&(buf_pool->mutex));
493 for (i = 0; i < buf_pool->curr_size; i++) {
495 block = buf_pool_get_nth_block(buf_pool, i);
496 @@ -2356,7 +2422,6 @@
500 - mutex_exit(&(buf_pool->mutex));
502 return(fixed_pages_number);
504 @@ -2385,7 +2450,9 @@
508 - mutex_enter(&(buf_pool->mutex));
509 + mutex_enter(&(buf_pool->LRU_mutex));
510 + mutex_enter(&(buf_pool->flush_list_mutex));
511 + mutex_enter(&(buf_pool->free_mutex));
513 ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
514 / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
515 @@ -2393,7 +2460,9 @@
517 /* 1 + is there to avoid division by zero */
519 - mutex_exit(&(buf_pool->mutex));
520 + mutex_exit(&(buf_pool->LRU_mutex));
521 + mutex_exit(&(buf_pool->flush_list_mutex));
522 + mutex_exit(&(buf_pool->free_mutex));
526 @@ -2413,6 +2482,9 @@
528 size = buf_pool->curr_size;
530 + mutex_enter(&(buf_pool->LRU_mutex));
531 + mutex_enter(&(buf_pool->flush_list_mutex));
532 + mutex_enter(&(buf_pool->free_mutex));
533 mutex_enter(&(buf_pool->mutex));
536 @@ -2487,6 +2559,9 @@
537 buf_pool->n_pages_written_old = buf_pool->n_pages_written;
538 buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
540 + mutex_exit(&(buf_pool->LRU_mutex));
541 + mutex_exit(&(buf_pool->flush_list_mutex));
542 + mutex_exit(&(buf_pool->free_mutex));
543 mutex_exit(&(buf_pool->mutex));
546 @@ -2517,8 +2592,6 @@
550 - mutex_enter(&(buf_pool->mutex));
552 for (i = 0; i < buf_pool->curr_size; i++) {
554 block = buf_pool_get_nth_block(buf_pool, i);
555 @@ -2540,8 +2613,6 @@
556 mutex_exit(&block->mutex);
559 - mutex_exit(&(buf_pool->mutex));
564 @@ -2580,11 +2651,11 @@
568 - mutex_enter(&(buf_pool->mutex));
569 + mutex_enter(&(buf_pool->free_mutex));
571 len = UT_LIST_GET_LEN(buf_pool->free);
573 - mutex_exit(&(buf_pool->mutex));
574 + mutex_exit(&(buf_pool->free_mutex));
578 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0flu.c mysql-5.1.29-rc/storage/innobase/buf/buf0flu.c
579 --- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0flu.c 2008-10-12 06:54:12.000000000 +0900
580 +++ mysql-5.1.29-rc/storage/innobase/buf/buf0flu.c 2008-11-18 15:26:07.000000000 +0900
581 @@ -109,13 +109,15 @@
582 ut_ad(mutex_own(&(buf_pool->mutex)));
583 ut_ad(mutex_own(&block->mutex));
584 if (block->state != BUF_BLOCK_FILE_PAGE) {
585 + /* It is permited not to own LRU_mutex.. */
587 ut_print_timestamp(stderr);
589 " InnoDB: Error: buffer block state %lu"
590 " in the LRU list!\n",
591 (ulong)block->state);
592 ut_print_buf(stderr, block, sizeof(buf_block_t));
598 @@ -546,18 +548,20 @@
599 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
600 || flush_type == BUF_FLUSH_SINGLE_PAGE);
602 - mutex_enter(&(buf_pool->mutex));
603 + mutex_enter(&(buf_pool->hash_mutex));
605 block = buf_page_hash_get(space, offset);
607 ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
610 - mutex_exit(&(buf_pool->mutex));
611 + mutex_exit(&(buf_pool->hash_mutex));
615 mutex_enter(&block->mutex);
616 + mutex_enter(&(buf_pool->mutex));
617 + mutex_exit(&(buf_pool->hash_mutex));
619 if (flush_type == BUF_FLUSH_LIST
620 && buf_flush_ready_for_flush(block, flush_type)) {
622 high = fil_space_get_size(space);
625 - mutex_enter(&(buf_pool->mutex));
626 + mutex_enter(&(buf_pool->hash_mutex));
628 for (i = low; i < high; i++) {
632 mutex_exit(&block->mutex);
634 - mutex_exit(&(buf_pool->mutex));
635 + mutex_exit(&(buf_pool->hash_mutex));
637 /* Note: as we release the buf_pool mutex
638 above, in buf_flush_try_page we cannot be sure
639 @@ -800,14 +804,14 @@
640 count += buf_flush_try_page(space, i,
643 - mutex_enter(&(buf_pool->mutex));
644 + mutex_enter(&(buf_pool->hash_mutex));
646 mutex_exit(&block->mutex);
651 - mutex_exit(&(buf_pool->mutex));
652 + mutex_exit(&(buf_pool->hash_mutex));
658 (buf_pool->init_flush)[flush_type] = TRUE;
660 + mutex_exit(&(buf_pool->mutex));
662 + if (flush_type == BUF_FLUSH_LRU) {
663 + mutex_enter(&(buf_pool->LRU_mutex));
665 + mutex_enter(&(buf_pool->flush_list_mutex));
668 /* If we have flushed enough, leave the loop */
669 if (page_count >= min_n) {
671 offset = block->offset;
673 mutex_exit(&block->mutex);
674 - mutex_exit(&(buf_pool->mutex));
675 + if (flush_type == BUF_FLUSH_LRU) {
676 + mutex_exit(&(buf_pool->LRU_mutex));
678 + mutex_exit(&(buf_pool->flush_list_mutex));
680 old_page_count = page_count;
684 page_count - old_page_count); */
686 - mutex_enter(&(buf_pool->mutex));
687 + if (flush_type == BUF_FLUSH_LRU) {
688 + mutex_enter(&(buf_pool->LRU_mutex));
690 + mutex_enter(&(buf_pool->flush_list_mutex));
692 } else if (flush_type == BUF_FLUSH_LRU) {
698 + if (flush_type == BUF_FLUSH_LRU) {
699 + mutex_exit(&(buf_pool->LRU_mutex));
701 + mutex_exit(&(buf_pool->flush_list_mutex));
703 + mutex_enter(&(buf_pool->mutex));
705 (buf_pool->init_flush)[flush_type] = FALSE;
707 if ((buf_pool->n_flush[flush_type] == 0)
708 @@ -1001,10 +1025,14 @@
712 - mutex_enter(&(buf_pool->mutex));
713 + /* optimistic search... */
714 + //mutex_enter(&(buf_pool->LRU_mutex));
715 + //mutex_enter(&(buf_pool->free_mutex));
717 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
719 + //mutex_exit(&(buf_pool->free_mutex));
721 block = UT_LIST_GET_LAST(buf_pool->LRU);
723 while ((block != NULL)
724 @@ -1025,7 +1053,7 @@
725 block = UT_LIST_GET_PREV(LRU, block);
728 - mutex_exit(&(buf_pool->mutex));
729 + //mutex_exit(&(buf_pool->LRU_mutex));
731 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
733 @@ -1044,8 +1072,9 @@
734 immediately, without waiting. */
737 -buf_flush_free_margin(void)
738 +buf_flush_free_margin(
739 /*=======================*/
744 @@ -1055,7 +1084,7 @@
745 if (n_to_flush > 0) {
746 n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush,
748 - if (n_flushed == ULINT_UNDEFINED) {
749 + if (wait && n_flushed == ULINT_UNDEFINED) {
750 /* There was an LRU type flush batch already running;
751 let us wait for it to end */
753 @@ -1105,11 +1134,11 @@
757 - mutex_enter(&(buf_pool->mutex));
758 + mutex_enter(&(buf_pool->flush_list_mutex));
760 ret = buf_flush_validate_low();
762 - mutex_exit(&(buf_pool->mutex));
763 + mutex_exit(&(buf_pool->flush_list_mutex));
767 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0lru.c mysql-5.1.29-rc/storage/innobase/buf/buf0lru.c
768 --- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0lru.c 2008-10-12 06:54:12.000000000 +0900
769 +++ mysql-5.1.29-rc/storage/innobase/buf/buf0lru.c 2008-11-18 15:09:58.000000000 +0900
774 - mutex_enter(&(buf_pool->mutex));
775 + mutex_enter(&(buf_pool->LRU_mutex));
776 + mutex_enter(&(buf_pool->flush_list_mutex));
777 + mutex_enter(&(buf_pool->free_mutex));
778 + mutex_enter(&(buf_pool->hash_mutex));
784 mutex_exit(&block->mutex);
786 - mutex_exit(&(buf_pool->mutex));
787 + mutex_exit(&(buf_pool->LRU_mutex));
788 + mutex_exit(&(buf_pool->flush_list_mutex));
789 + mutex_exit(&(buf_pool->free_mutex));
790 + mutex_exit(&(buf_pool->hash_mutex));
792 /* Note that the following call will acquire
793 an S-latch on the page */
798 - mutex_exit(&(buf_pool->mutex));
799 + mutex_exit(&(buf_pool->LRU_mutex));
800 + mutex_exit(&(buf_pool->flush_list_mutex));
801 + mutex_exit(&(buf_pool->free_mutex));
802 + mutex_exit(&(buf_pool->hash_mutex));
805 os_thread_sleep(20000);
806 @@ -172,14 +181,14 @@
810 - mutex_enter(&(buf_pool->mutex));
811 + mutex_enter(&(buf_pool->LRU_mutex));
813 len = UT_LIST_GET_LEN(buf_pool->LRU);
815 if (len < BUF_LRU_OLD_MIN_LEN) {
816 /* The LRU list is too short to do read-ahead */
818 - mutex_exit(&(buf_pool->mutex));
819 + mutex_exit(&(buf_pool->LRU_mutex));
825 limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO;
827 - mutex_exit(&(buf_pool->mutex));
828 + mutex_exit(&(buf_pool->LRU_mutex));
832 @@ -212,13 +221,15 @@
836 - mutex_enter(&(buf_pool->mutex));
837 + /* optimistic search... */
838 + //mutex_enter(&(buf_pool->LRU_mutex));
842 block = UT_LIST_GET_LAST(buf_pool->LRU);
844 while (block != NULL) {
845 - ut_a(block->in_LRU_list);
846 + //ut_a(block->in_LRU_list); /* optimistic */
848 mutex_enter(&block->mutex);
852 #endif /* UNIV_DEBUG */
854 + mutex_exit(&block->mutex);
856 + mutex_enter(&(buf_pool->LRU_mutex));/* optimistic */
858 + mutex_enter(&(buf_pool->hash_mutex));
859 + mutex_enter(&block->mutex);
860 + if(block->in_LRU_list && buf_flush_ready_for_replace(block)) {
861 buf_LRU_block_remove_hashed_page(block);
862 + mutex_exit(&(buf_pool->hash_mutex));
864 - mutex_exit(&(buf_pool->mutex));
865 + mutex_exit(&(buf_pool->LRU_mutex));
866 mutex_exit(&block->mutex);
868 /* Remove possible adaptive hash index built on the
869 @@ -257,14 +276,25 @@
871 ut_a(block->buf_fix_count == 0);
873 - mutex_enter(&(buf_pool->mutex));
874 + mutex_enter(&(buf_pool->free_mutex));
875 mutex_enter(&block->mutex);
877 buf_LRU_block_free_hashed_page(block);
879 + mutex_exit(&(buf_pool->free_mutex));
880 mutex_exit(&block->mutex);
883 + } else { /* someone may interrupt...??? */
884 + mutex_exit(&(buf_pool->LRU_mutex));/* optimistic */
886 + mutex_exit(&(buf_pool->hash_mutex));
888 + if (!(block->in_LRU_list)) {
889 + mutex_exit(&block->mutex);
895 mutex_exit(&block->mutex);
896 @@ -275,13 +305,21 @@
897 if (!freed && n_iterations <= 10
898 && distance > 100 + (n_iterations * buf_pool->curr_size)
900 - buf_pool->LRU_flush_ended = 0;
902 + mutex_enter(&(buf_pool->mutex));
903 + buf_pool->LRU_flush_ended = 0;
904 mutex_exit(&(buf_pool->mutex));
906 + //mutex_exit(&(buf_pool->LRU_mutex));
912 + //mutex_exit(&(buf_pool->LRU_mutex));
915 + mutex_enter(&(buf_pool->mutex));
916 if (buf_pool->LRU_flush_ended > 0) {
917 buf_pool->LRU_flush_ended--;
923 - mutex_enter(&(buf_pool->mutex));
924 + mutex_enter(&(buf_pool->LRU_mutex));
925 + mutex_enter(&(buf_pool->free_mutex));
927 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
928 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) {
933 - mutex_exit(&(buf_pool->mutex));
934 + mutex_exit(&(buf_pool->LRU_mutex));
935 + mutex_exit(&(buf_pool->free_mutex));
940 ibool mon_value_was = FALSE;
941 ibool started_monitor = FALSE;
943 - mutex_enter(&(buf_pool->mutex));
944 + mutex_enter(&(buf_pool->free_mutex)); /* LRU info:optimistic */
946 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
947 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
950 mutex_exit(&block->mutex);
952 - mutex_exit(&(buf_pool->mutex));
953 + mutex_exit(&(buf_pool->free_mutex));
955 if (started_monitor) {
956 srv_print_innodb_monitor = mon_value_was;
958 /* If no block was in the free list, search from the end of the LRU
959 list and try to free a block there */
961 - mutex_exit(&(buf_pool->mutex));
962 + mutex_exit(&(buf_pool->free_mutex));
964 freed = buf_LRU_search_and_free_block(n_iterations);
968 /* No free block was found: try to flush the LRU list */
970 - buf_flush_free_margin();
971 + buf_flush_free_margin(TRUE);
972 ++srv_buf_pool_wait_free;
974 os_aio_simulated_wake_handler_threads();
979 - mutex_enter(&(buf_pool->mutex));
980 + mutex_enter(&(buf_pool->LRU_mutex));
982 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
984 @@ -1033,6 +1073,9 @@
985 ut_a(buf_pool->LRU_old_len == old_len);
988 + mutex_exit(&(buf_pool->LRU_mutex));
989 + mutex_enter(&(buf_pool->free_mutex));
991 UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free);
993 block = UT_LIST_GET_FIRST(buf_pool->free);
994 @@ -1043,7 +1086,7 @@
995 block = UT_LIST_GET_NEXT(free, block);
998 - mutex_exit(&(buf_pool->mutex));
999 + mutex_exit(&(buf_pool->free_mutex));
1003 @@ -1059,7 +1102,7 @@
1007 - mutex_enter(&(buf_pool->mutex));
1008 + mutex_enter(&(buf_pool->LRU_mutex));
1010 fprintf(stderr, "Pool ulint clock %lu\n",
1011 (ulong) buf_pool->ulint_clock);
1012 @@ -1105,6 +1148,6 @@
1016 - mutex_exit(&(buf_pool->mutex));
1017 + mutex_exit(&(buf_pool->LRU_mutex));
1019 #endif /* UNIV_DEBUG */
1020 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/buf/buf0rea.c mysql-5.1.29-rc/storage/innobase/buf/buf0rea.c
1021 --- mysql-5.1.29-rc_orig/storage/innobase/buf/buf0rea.c 2008-10-12 06:54:12.000000000 +0900
1022 +++ mysql-5.1.29-rc/storage/innobase/buf/buf0rea.c 2008-11-18 15:28:13.000000000 +0900
1023 @@ -219,10 +219,12 @@
1027 + mutex_exit(&(buf_pool->mutex));
1029 /* Count how many blocks in the area have been recently accessed,
1030 that is, reside near the start of the LRU list. */
1032 + mutex_enter(&(buf_pool->hash_mutex));
1033 for (i = low; i < high; i++) {
1034 block = buf_page_hash_get(space, i);
1040 + mutex_exit(&(buf_pool->hash_mutex));
1042 - mutex_exit(&(buf_pool->mutex));
1043 + // mutex_exit(&(buf_pool->mutex));
1045 if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
1050 /* Flush pages from the end of the LRU list if necessary */
1051 - buf_flush_free_margin();
1052 + buf_flush_free_margin(FALSE);
1054 return(count + count2);
1060 + mutex_exit(&(buf_pool->mutex));
1062 /* Check that almost all pages in the area have been accessed; if
1063 offset == low, the accesses must be in a descending order, otherwise,
1068 + mutex_enter(&(buf_pool->hash_mutex));
1069 for (i = low; i < high; i++) {
1070 block = buf_page_hash_get(space, i);
1072 @@ -462,12 +467,13 @@
1076 + mutex_exit(&(buf_pool->hash_mutex));
1078 if (fail_count > BUF_READ_AHEAD_LINEAR_AREA
1079 - BUF_READ_AHEAD_LINEAR_THRESHOLD) {
1080 /* Too many failures: return */
1082 - mutex_exit(&(buf_pool->mutex));
1083 + //mutex_exit(&(buf_pool->mutex));
1087 @@ -475,10 +481,11 @@
1088 /* If we got this far, we know that enough pages in the area have
1089 been accessed in the right order: linear read-ahead can be sensible */
1091 + mutex_enter(&(buf_pool->hash_mutex));
1092 block = buf_page_hash_get(space, offset);
1094 if (block == NULL) {
1095 - mutex_exit(&(buf_pool->mutex));
1096 + mutex_exit(&(buf_pool->hash_mutex));
1101 pred_offset = fil_page_get_prev(frame);
1102 succ_offset = fil_page_get_next(frame);
1104 - mutex_exit(&(buf_pool->mutex));
1105 + mutex_exit(&(buf_pool->hash_mutex));
1107 if ((offset == low) && (succ_offset == offset + 1)) {
1110 os_aio_simulated_wake_handler_threads();
1112 /* Flush pages from the end of the LRU list if necessary */
1113 - buf_flush_free_margin();
1114 + buf_flush_free_margin(FALSE);
1117 if (buf_debug_prints && (count > 0)) {
1119 os_aio_simulated_wake_handler_threads();
1121 /* Flush pages from the end of the LRU list if necessary */
1122 - buf_flush_free_margin();
1123 + buf_flush_free_margin(FALSE);
1126 if (buf_debug_prints) {
1128 os_aio_simulated_wake_handler_threads();
1130 /* Flush pages from the end of the LRU list if necessary */
1131 - buf_flush_free_margin();
1132 + buf_flush_free_margin(FALSE);
1135 if (buf_debug_prints) {
1136 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.h mysql-5.1.29-rc/storage/innobase/include/buf0buf.h
1137 --- mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.h 2008-10-12 06:54:13.000000000 +0900
1138 +++ mysql-5.1.29-rc/storage/innobase/include/buf0buf.h 2008-11-18 15:09:58.000000000 +0900
1140 currently always the same as
1142 hash_table_t* page_hash; /* hash table of the file pages */
1143 + mutex_t hash_mutex;
1145 ulint n_pend_reads; /* number of pending read operations */
1148 UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
1149 /* base node of the modified block
1151 + mutex_t flush_list_mutex;
1152 ibool init_flush[BUF_FLUSH_LIST + 1];
1153 /* this is TRUE when a flush of the
1154 given type is being initialized */
1155 @@ -991,8 +993,10 @@
1156 in the case of AWE, at the start are
1157 always free blocks for which the
1158 physical memory is mapped to a frame */
1159 + mutex_t free_mutex;
1160 UT_LIST_BASE_NODE_T(buf_block_t) LRU;
1161 /* base node of the LRU list */
1162 + mutex_t LRU_mutex;
1163 buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
1164 blocks in the LRU list; NULL if LRU
1165 length less than BUF_LRU_OLD_MIN_LEN */
1166 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.ic mysql-5.1.29-rc/storage/innobase/include/buf0buf.ic
1167 --- mysql-5.1.29-rc_orig/storage/innobase/include/buf0buf.ic 2008-10-12 06:54:13.000000000 +0900
1168 +++ mysql-5.1.29-rc/storage/innobase/include/buf0buf.ic 2008-11-18 15:09:58.000000000 +0900
1173 - mutex_enter(&(buf_pool->mutex));
1174 + mutex_enter(&(buf_pool->flush_list_mutex));
1176 block = UT_LIST_GET_LAST(buf_pool->flush_list);
1179 lsn = block->oldest_modification;
1182 - mutex_exit(&(buf_pool->mutex));
1183 + mutex_exit(&(buf_pool->flush_list_mutex));
1187 @@ -388,18 +388,18 @@
1188 /* out: TRUE if io going on */
1189 buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
1191 - mutex_enter(&(buf_pool->mutex));
1192 + mutex_enter(&block->mutex);
1194 ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
1195 ut_ad(block->buf_fix_count > 0);
1197 if (block->io_fix != 0) {
1198 - mutex_exit(&(buf_pool->mutex));
1199 + mutex_exit(&block->mutex);
1204 - mutex_exit(&(buf_pool->mutex));
1205 + mutex_exit(&block->mutex);
1211 block = buf_block_align(frame);
1213 - mutex_enter(&(buf_pool->mutex));
1214 + mutex_enter(&block->mutex);
1216 if (block->state == BUF_BLOCK_FILE_PAGE) {
1217 lsn = block->newest_modification;
1219 lsn = ut_dulint_zero;
1222 - mutex_exit(&(buf_pool->mutex));
1223 + mutex_exit(&block->mutex);
1228 ut_a(block->buf_fix_count > 0);
1230 if (rw_latch == RW_X_LATCH && mtr->modifications) {
1231 - mutex_enter(&buf_pool->mutex);
1232 + mutex_enter(&buf_pool->flush_list_mutex);
1233 buf_flush_note_modification(block, mtr);
1234 - mutex_exit(&buf_pool->mutex);
1235 + mutex_exit(&buf_pool->flush_list_mutex);
1238 mutex_enter(&block->mutex);
1239 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.h mysql-5.1.29-rc/storage/innobase/include/buf0flu.h
1240 --- mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.h 2008-10-12 06:54:13.000000000 +0900
1241 +++ mysql-5.1.29-rc/storage/innobase/include/buf0flu.h 2008-11-18 15:09:58.000000000 +0900
1243 a margin of replaceable pages there. */
1246 -buf_flush_free_margin(void);
1247 +buf_flush_free_margin(
1248 /*=======================*/
1250 /************************************************************************
1251 Initializes a page for writing to the tablespace. */
1253 diff -ruN mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.ic mysql-5.1.29-rc/storage/innobase/include/buf0flu.ic
1254 --- mysql-5.1.29-rc_orig/storage/innobase/include/buf0flu.ic 2008-10-12 06:54:13.000000000 +0900
1255 +++ mysql-5.1.29-rc/storage/innobase/include/buf0flu.ic 2008-11-18 15:09:58.000000000 +0900
1257 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
1258 #endif /* UNIV_SYNC_DEBUG */
1260 - mutex_enter(&(buf_pool->mutex));
1261 + mutex_enter(&(buf_pool->flush_list_mutex));
1263 ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
1269 - mutex_exit(&(buf_pool->mutex));
1270 + mutex_exit(&(buf_pool->flush_list_mutex));
1272 diff -ruN mysql-5.1.29-rc_orig/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info mysql-5.1.29-rc/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info
1273 --- /dev/null 1970-01-01 09:00:00.000000000 +0900
1274 +++ mysql-5.1.29-rc/patch_info/split_buf_pool_mutex_fixed_optimistic_safe.info 2008-11-18 15:09:58.000000000 +0900
1276 +File=split_buf_pool_mutex_fixed_optimistic_safe.patch
1277 +Name=InnoDB patch to fix buffer pool scalability
1279 +Author=Yasufumi Kinoshita