1 # name : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 --- a/storage/innodb_plugin/btr/btr0cur.c
9 +++ b/storage/innodb_plugin/btr/btr0cur.c
14 - buf_pool_mutex_enter();
15 + //buf_pool_mutex_enter();
16 + mutex_enter(&LRU_list_mutex);
17 mutex_enter(&block->mutex);
19 /* Only free the block if it is still allocated to
20 @@ -3864,16 +3865,21 @@
21 && buf_block_get_space(block) == space
22 && buf_block_get_page_no(block) == page_no) {
24 - if (!buf_LRU_free_block(&block->page, all)
25 - && all && block->page.zip.data) {
26 + if (!buf_LRU_free_block(&block->page, all, TRUE)
27 + && all && block->page.zip.data
28 + /* Now, buf_LRU_free_block() may release mutex temporarily */
29 + && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
30 + && buf_block_get_space(block) == space
31 + && buf_block_get_page_no(block) == page_no) {
32 /* Attempt to deallocate the uncompressed page
33 if the whole block cannot be deallocted. */
35 - buf_LRU_free_block(&block->page, FALSE);
36 + buf_LRU_free_block(&block->page, FALSE, TRUE);
40 - buf_pool_mutex_exit();
41 + //buf_pool_mutex_exit();
42 + mutex_exit(&LRU_list_mutex);
43 mutex_exit(&block->mutex);
46 --- a/storage/innodb_plugin/btr/btr0sea.c
47 +++ b/storage/innodb_plugin/btr/btr0sea.c
49 rec_offs_init(offsets_);
51 rw_lock_x_lock(&btr_search_latch);
52 - buf_pool_mutex_enter();
53 + //buf_pool_mutex_enter();
54 + rw_lock_x_lock(&page_hash_latch);
56 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
58 @@ -1933,11 +1934,13 @@
59 /* We release btr_search_latch every once in a while to
60 give other queries a chance to run. */
61 if ((i != 0) && ((i % chunk_size) == 0)) {
62 - buf_pool_mutex_exit();
63 + //buf_pool_mutex_exit();
64 + rw_lock_x_unlock(&page_hash_latch);
65 rw_lock_x_unlock(&btr_search_latch);
67 rw_lock_x_lock(&btr_search_latch);
68 - buf_pool_mutex_enter();
69 + //buf_pool_mutex_enter();
70 + rw_lock_x_lock(&page_hash_latch);
73 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
74 @@ -2044,11 +2047,13 @@
75 /* We release btr_search_latch every once in a while to
76 give other queries a chance to run. */
78 - buf_pool_mutex_exit();
79 + //buf_pool_mutex_exit();
80 + rw_lock_x_unlock(&page_hash_latch);
81 rw_lock_x_unlock(&btr_search_latch);
83 rw_lock_x_lock(&btr_search_latch);
84 - buf_pool_mutex_enter();
85 + //buf_pool_mutex_enter();
86 + rw_lock_x_lock(&page_hash_latch);
89 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
94 - buf_pool_mutex_exit();
95 + //buf_pool_mutex_exit();
96 + rw_lock_x_unlock(&page_hash_latch);
97 rw_lock_x_unlock(&btr_search_latch);
98 if (UNIV_LIKELY_NULL(heap)) {
100 --- a/storage/innodb_plugin/buf/buf0buddy.c
101 +++ b/storage/innodb_plugin/buf/buf0buddy.c
104 /** Validate a given zip_free list. */
105 #define BUF_BUDDY_LIST_VALIDATE(i) \
106 - UT_LIST_VALIDATE(list, buf_page_t, \
107 + UT_LIST_VALIDATE(zip_list, buf_page_t, \
108 buf_pool->zip_free[i], \
109 ut_ad(buf_page_get_state( \
112 buf_page_t* bpage, /*!< in,own: block to be freed */
113 ulint i) /*!< in: index of buf_pool->zip_free[] */
115 - ut_ad(buf_pool_mutex_own());
116 + //ut_ad(buf_pool_mutex_own());
117 + ut_ad(mutex_own(&zip_free_mutex));
118 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
119 ut_ad(buf_pool->zip_free[i].start != bpage);
120 - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
121 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
124 /**********************************************************************//**
125 @@ -100,16 +101,17 @@
126 ulint i) /*!< in: index of buf_pool->zip_free[] */
129 - buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
130 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
131 + buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
132 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
134 ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
135 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
136 #endif /* UNIV_DEBUG */
138 - ut_ad(buf_pool_mutex_own());
139 + //ut_ad(buf_pool_mutex_own());
140 + ut_ad(mutex_own(&zip_free_mutex));
141 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
142 - UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
143 + UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
146 /**********************************************************************//**
151 - ut_ad(buf_pool_mutex_own());
152 + //ut_ad(buf_pool_mutex_own());
153 + ut_ad(mutex_own(&zip_free_mutex));
154 ut_a(i < BUF_BUDDY_SIZES);
155 ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
157 @@ -164,16 +167,19 @@
159 buf_buddy_block_free(
160 /*=================*/
161 - void* buf) /*!< in: buffer frame to deallocate */
162 + void* buf, /*!< in: buffer frame to deallocate */
163 + ibool have_page_hash_mutex)
165 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
169 - ut_ad(buf_pool_mutex_own());
170 + //ut_ad(buf_pool_mutex_own());
171 ut_ad(!mutex_own(&buf_pool_zip_mutex));
172 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
174 + mutex_enter(&zip_hash_mutex);
176 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
177 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
178 && bpage->in_zip_hash && !bpage->in_page_hash),
179 @@ -185,12 +191,14 @@
180 ut_d(bpage->in_zip_hash = FALSE);
181 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
183 + mutex_exit(&zip_hash_mutex);
185 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
186 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
188 block = (buf_block_t*) bpage;
189 mutex_enter(&block->mutex);
190 - buf_LRU_block_free_non_file_page(block);
191 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
192 mutex_exit(&block->mutex);
194 ut_ad(buf_buddy_n_frames > 0);
196 buf_block_t* block) /*!< in: buffer frame to allocate */
198 const ulint fold = BUF_POOL_ZIP_FOLD(block);
199 - ut_ad(buf_pool_mutex_own());
200 + //ut_ad(buf_pool_mutex_own());
201 ut_ad(!mutex_own(&buf_pool_zip_mutex));
202 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
205 ut_ad(!block->page.in_page_hash);
206 ut_ad(!block->page.in_zip_hash);
207 ut_d(block->page.in_zip_hash = TRUE);
209 + mutex_enter(&zip_hash_mutex);
210 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
211 + mutex_exit(&zip_hash_mutex);
213 ut_d(buf_buddy_n_frames++);
215 @@ -269,25 +280,29 @@
217 ulint i, /*!< in: index of buf_pool->zip_free[],
218 or BUF_BUDDY_SIZES */
219 - ibool* lru) /*!< in: pointer to a variable that will be assigned
220 + ibool* lru, /*!< in: pointer to a variable that will be assigned
221 TRUE if storage was allocated from the LRU list
222 and buf_pool_mutex was temporarily released */
223 + ibool have_page_hash_mutex)
228 - ut_ad(buf_pool_mutex_own());
229 + //ut_ad(buf_pool_mutex_own());
230 ut_ad(!mutex_own(&buf_pool_zip_mutex));
231 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
233 if (i < BUF_BUDDY_SIZES) {
234 /* Try to allocate from the buddy system. */
235 + mutex_enter(&zip_free_mutex);
236 block = buf_buddy_alloc_zip(i);
243 + mutex_exit(&zip_free_mutex);
246 /* Try allocating from the buf_pool->free list. */
247 @@ -299,18 +314,29 @@
250 /* Try replacing an uncompressed page in the buffer pool. */
251 - buf_pool_mutex_exit();
252 + //buf_pool_mutex_exit();
253 + mutex_exit(&LRU_list_mutex);
254 + if (have_page_hash_mutex) {
255 + rw_lock_x_unlock(&page_hash_latch);
257 block = buf_LRU_get_free_block();
259 - buf_pool_mutex_enter();
260 + //buf_pool_mutex_enter();
261 + mutex_enter(&LRU_list_mutex);
262 + if (have_page_hash_mutex) {
263 + rw_lock_x_lock(&page_hash_latch);
267 buf_buddy_block_register(block);
269 + mutex_enter(&zip_free_mutex);
270 block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
273 buf_buddy_stat[i].used++;
274 + mutex_exit(&zip_free_mutex);
281 void* src, /*!< in: block to relocate */
282 void* dst, /*!< in: free block to relocate to */
283 - ulint i) /*!< in: index of buf_pool->zip_free[] */
284 + ulint i, /*!< in: index of buf_pool->zip_free[] */
285 + ibool have_page_hash_mutex)
288 const ulint size = BUF_BUDDY_LOW << i;
289 @@ -332,13 +359,20 @@
293 - ut_ad(buf_pool_mutex_own());
294 + //ut_ad(buf_pool_mutex_own());
295 + ut_ad(mutex_own(&zip_free_mutex));
296 ut_ad(!mutex_own(&buf_pool_zip_mutex));
297 ut_ad(!ut_align_offset(src, size));
298 ut_ad(!ut_align_offset(dst, size));
299 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
300 UNIV_MEM_ASSERT_W(dst, size);
302 + if (!have_page_hash_mutex) {
303 + mutex_exit(&zip_free_mutex);
304 + mutex_enter(&LRU_list_mutex);
305 + rw_lock_x_lock(&page_hash_latch);
308 /* We assume that all memory from buf_buddy_alloc()
309 is used for compressed page frames. */
312 added to buf_pool->page_hash yet. Obviously,
313 it cannot be relocated. */
315 + if (!have_page_hash_mutex) {
316 + mutex_enter(&zip_free_mutex);
317 + mutex_exit(&LRU_list_mutex);
318 + rw_lock_x_unlock(&page_hash_latch);
323 @@ -381,18 +420,27 @@
324 For the sake of simplicity, give up. */
325 ut_ad(page_zip_get_size(&bpage->zip) < size);
327 + if (!have_page_hash_mutex) {
328 + mutex_enter(&zip_free_mutex);
329 + mutex_exit(&LRU_list_mutex);
330 + rw_lock_x_unlock(&page_hash_latch);
335 + /* To keep latch order */
336 + if (have_page_hash_mutex)
337 + mutex_exit(&zip_free_mutex);
339 /* The block must have been allocated, but it may
340 contain uninitialized data. */
341 UNIV_MEM_ASSERT_W(src, size);
343 - mutex = buf_page_get_mutex(bpage);
344 + mutex = buf_page_get_mutex_enter(bpage);
346 - mutex_enter(mutex);
347 + mutex_enter(&zip_free_mutex);
349 - if (buf_page_can_relocate(bpage)) {
350 + if (mutex && buf_page_can_relocate(bpage)) {
351 /* Relocate the compressed page. */
352 ut_a(bpage->zip.data == src);
353 memcpy(dst, src, size);
354 @@ -406,10 +454,22 @@
355 buddy_stat->relocated_usec
356 += ut_time_us(NULL) - usec;
359 + if (!have_page_hash_mutex) {
360 + mutex_exit(&LRU_list_mutex);
361 + rw_lock_x_unlock(&page_hash_latch);
367 + if (!have_page_hash_mutex) {
368 + mutex_exit(&LRU_list_mutex);
369 + rw_lock_x_unlock(&page_hash_latch);
378 @@ -422,13 +482,15 @@
380 void* buf, /*!< in: block to be freed, must not be
381 pointed to by the buffer pool */
382 - ulint i) /*!< in: index of buf_pool->zip_free[],
383 + ulint i, /*!< in: index of buf_pool->zip_free[],
384 or BUF_BUDDY_SIZES */
385 + ibool have_page_hash_mutex)
390 - ut_ad(buf_pool_mutex_own());
391 + //ut_ad(buf_pool_mutex_own());
392 + ut_ad(mutex_own(&zip_free_mutex));
393 ut_ad(!mutex_own(&buf_pool_zip_mutex));
394 ut_ad(i <= BUF_BUDDY_SIZES);
395 ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
397 ((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
399 if (i == BUF_BUDDY_SIZES) {
400 - buf_buddy_block_free(buf);
401 + mutex_exit(&zip_free_mutex);
402 + buf_buddy_block_free(buf, have_page_hash_mutex);
403 + mutex_enter(&zip_free_mutex);
410 UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
411 - bpage = UT_LIST_GET_NEXT(list, bpage);
412 + bpage = UT_LIST_GET_NEXT(zip_list, bpage);
415 #ifndef UNIV_DEBUG_VALGRIND
417 ut_d(BUF_BUDDY_LIST_VALIDATE(i));
419 /* The buddy is not free. Is there a free block of this size? */
420 - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
421 + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
426 buf_buddy_remove_from_free(bpage, i);
428 /* Try to relocate the buddy of buf to the free block. */
429 - if (buf_buddy_relocate(buddy, bpage, i)) {
430 + if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
432 buddy->state = BUF_BLOCK_ZIP_FREE;
434 --- a/storage/innodb_plugin/buf/buf0buf.c
435 +++ b/storage/innodb_plugin/buf/buf0buf.c
437 /** mutex protecting the buffer pool struct and control blocks, except the
438 read-write lock in them */
439 UNIV_INTERN mutex_t buf_pool_mutex;
440 +UNIV_INTERN mutex_t LRU_list_mutex;
441 +UNIV_INTERN mutex_t flush_list_mutex;
442 +UNIV_INTERN rw_lock_t page_hash_latch;
443 +UNIV_INTERN mutex_t free_list_mutex;
444 +UNIV_INTERN mutex_t zip_free_mutex;
445 +UNIV_INTERN mutex_t zip_hash_mutex;
446 /** mutex protecting the control blocks of compressed-only pages
447 (of type buf_page_t, not buf_block_t) */
448 UNIV_INTERN mutex_t buf_pool_zip_mutex;
450 block->page.in_zip_hash = FALSE;
451 block->page.in_flush_list = FALSE;
452 block->page.in_free_list = FALSE;
453 - block->in_unzip_LRU_list = FALSE;
454 #endif /* UNIV_DEBUG */
455 + block->page.flush_list.prev = NULL;
456 + block->page.flush_list.next = NULL;
457 + block->page.zip_list.prev = NULL;
458 + block->page.zip_list.next = NULL;
459 block->page.in_LRU_list = FALSE;
460 + block->in_unzip_LRU_list = FALSE;
461 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
462 block->n_pointers = 0;
463 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
465 memset(block->frame, '\0', UNIV_PAGE_SIZE);
467 /* Add the block to the free list */
468 - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
469 + mutex_enter(&free_list_mutex);
470 + UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
471 ut_d(block->page.in_free_list = TRUE);
472 + mutex_exit(&free_list_mutex);
475 frame += UNIV_PAGE_SIZE;
480 - ut_ad(buf_pool_mutex_own());
481 + //ut_ad(buf_pool_mutex_own());
483 block = chunk->blocks;
489 - ut_ad(buf_pool_mutex_own());
490 + //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
492 block = chunk->blocks;
495 /* 1. Initialize general fields
496 ------------------------------- */
497 mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
498 + mutex_create(&LRU_list_mutex, SYNC_BUF_LRU_LIST);
499 + mutex_create(&flush_list_mutex, SYNC_BUF_FLUSH_LIST);
500 + rw_lock_create(&page_hash_latch, SYNC_BUF_PAGE_HASH);
501 + mutex_create(&free_list_mutex, SYNC_BUF_FREE_LIST);
502 + mutex_create(&zip_free_mutex, SYNC_BUF_ZIP_FREE);
503 + mutex_create(&zip_hash_mutex, SYNC_BUF_ZIP_HASH);
505 mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
507 + mutex_enter(&LRU_list_mutex);
508 + rw_lock_x_lock(&page_hash_latch);
509 buf_pool_mutex_enter();
511 buf_pool->n_chunks = 1;
513 --------------------------- */
514 /* All fields are initialized by mem_zalloc(). */
516 + mutex_exit(&LRU_list_mutex);
517 + rw_lock_x_unlock(&page_hash_latch);
518 buf_pool_mutex_exit();
520 btr_search_sys_create(buf_pool->curr_size
521 @@ -1052,7 +1075,11 @@
525 - ut_ad(buf_pool_mutex_own());
526 + //ut_ad(buf_pool_mutex_own());
527 + ut_ad(mutex_own(&LRU_list_mutex));
528 +#ifdef UNIV_SYNC_DEBUG
529 + ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
531 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
532 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
533 ut_a(bpage->buf_fix_count == 0);
534 @@ -1127,13 +1154,15 @@
536 buf_page_t* bpage) /*!< in: buffer block of a file page */
538 - buf_pool_mutex_enter();
539 + //buf_pool_mutex_enter();
540 + mutex_enter(&LRU_list_mutex);
542 ut_a(buf_page_in_file(bpage));
544 buf_LRU_make_block_young(bpage);
546 - buf_pool_mutex_exit();
547 + //buf_pool_mutex_exit();
548 + mutex_exit(&LRU_list_mutex);
551 /********************************************************************//**
552 @@ -1155,14 +1184,20 @@
553 ut_a(buf_page_in_file(bpage));
555 if (buf_page_peek_if_too_old(bpage)) {
556 - buf_pool_mutex_enter();
557 + //buf_pool_mutex_enter();
558 + mutex_enter(&LRU_list_mutex);
559 buf_LRU_make_block_young(bpage);
560 - buf_pool_mutex_exit();
561 + //buf_pool_mutex_exit();
562 + mutex_exit(&LRU_list_mutex);
563 } else if (!access_time) {
564 ulint time_ms = ut_time_ms();
565 - buf_pool_mutex_enter();
566 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
567 + //buf_pool_mutex_enter();
569 buf_page_set_accessed(bpage, time_ms);
570 - buf_pool_mutex_exit();
571 + mutex_exit(block_mutex);
573 + //buf_pool_mutex_exit();
577 @@ -1178,7 +1213,8 @@
581 - buf_pool_mutex_enter();
582 + //buf_pool_mutex_enter();
583 + rw_lock_s_lock(&page_hash_latch);
585 block = (buf_block_t*) buf_page_hash_get(space, offset);
587 @@ -1186,7 +1222,8 @@
588 block->check_index_page_at_flush = FALSE;
591 - buf_pool_mutex_exit();
592 + //buf_pool_mutex_exit();
593 + rw_lock_s_unlock(&page_hash_latch);
596 /********************************************************************//**
597 @@ -1204,7 +1241,8 @@
601 - buf_pool_mutex_enter();
602 + //buf_pool_mutex_enter();
603 + rw_lock_s_lock(&page_hash_latch);
605 block = (buf_block_t*) buf_page_hash_get(space, offset);
607 @@ -1214,7 +1252,8 @@
608 is_hashed = block->is_hashed;
611 - buf_pool_mutex_exit();
612 + //buf_pool_mutex_exit();
613 + rw_lock_s_unlock(&page_hash_latch);
617 @@ -1235,7 +1274,8 @@
621 - buf_pool_mutex_enter();
622 + //buf_pool_mutex_enter();
623 + rw_lock_s_lock(&page_hash_latch);
625 bpage = buf_page_hash_get(space, offset);
627 @@ -1245,7 +1285,8 @@
628 bpage->file_page_was_freed = TRUE;
631 - buf_pool_mutex_exit();
632 + //buf_pool_mutex_exit();
633 + rw_lock_s_unlock(&page_hash_latch);
637 @@ -1265,7 +1306,8 @@
641 - buf_pool_mutex_enter();
642 + //buf_pool_mutex_enter();
643 + rw_lock_s_lock(&page_hash_latch);
645 bpage = buf_page_hash_get(space, offset);
647 @@ -1273,7 +1315,8 @@
648 bpage->file_page_was_freed = FALSE;
651 - buf_pool_mutex_exit();
652 + //buf_pool_mutex_exit();
653 + rw_lock_s_unlock(&page_hash_latch);
657 @@ -1307,8 +1350,9 @@
658 buf_pool->stat.n_page_gets++;
661 - buf_pool_mutex_enter();
662 + //buf_pool_mutex_enter();
664 + rw_lock_s_lock(&page_hash_latch);
665 bpage = buf_page_hash_get(space, offset);
668 @@ -1316,7 +1360,8 @@
670 /* Page not in buf_pool: needs to be read from file */
672 - buf_pool_mutex_exit();
673 + //buf_pool_mutex_exit();
674 + rw_lock_s_unlock(&page_hash_latch);
676 buf_read_page(space, zip_size, offset);
678 @@ -1328,34 +1373,58 @@
679 if (UNIV_UNLIKELY(!bpage->zip.data)) {
680 /* There is no compressed page. */
682 - buf_pool_mutex_exit();
683 + //buf_pool_mutex_exit();
684 + rw_lock_s_unlock(&page_hash_latch);
688 + block_mutex = buf_page_get_mutex_enter(bpage);
690 + rw_lock_s_unlock(&page_hash_latch);
692 switch (buf_page_get_state(bpage)) {
693 case BUF_BLOCK_NOT_USED:
694 case BUF_BLOCK_READY_FOR_USE:
695 case BUF_BLOCK_MEMORY:
696 case BUF_BLOCK_REMOVE_HASH:
697 case BUF_BLOCK_ZIP_FREE:
699 + mutex_exit(block_mutex);
701 case BUF_BLOCK_ZIP_PAGE:
702 case BUF_BLOCK_ZIP_DIRTY:
703 - block_mutex = &buf_pool_zip_mutex;
704 - mutex_enter(block_mutex);
705 + ut_a(block_mutex == &buf_pool_zip_mutex);
706 bpage->buf_fix_count++;
708 case BUF_BLOCK_FILE_PAGE:
709 - block_mutex = &((buf_block_t*) bpage)->mutex;
710 + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
712 + /* release mutex to obey to latch-order */
713 + mutex_exit(block_mutex);
715 + /* get LRU_list_mutex for buf_LRU_free_block() */
716 + mutex_enter(&LRU_list_mutex);
717 mutex_enter(block_mutex);
719 - /* Discard the uncompressed page frame if possible. */
720 - if (buf_LRU_free_block(bpage, FALSE)) {
721 + if (UNIV_UNLIKELY(bpage->space != space
722 + || bpage->offset != offset
723 + || !bpage->in_LRU_list
724 + || !bpage->zip.data)) {
725 + /* someone should interrupt, retry */
726 + mutex_exit(&LRU_list_mutex);
727 + mutex_exit(block_mutex);
731 + /* Discard the uncompressed page frame if possible. */
732 + if (buf_LRU_free_block(bpage, FALSE, TRUE)) {
733 + mutex_exit(&LRU_list_mutex);
734 mutex_exit(block_mutex);
738 + mutex_exit(&LRU_list_mutex);
740 buf_block_buf_fix_inc((buf_block_t*) bpage,
743 @@ -1368,7 +1437,7 @@
744 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
745 access_time = buf_page_is_accessed(bpage);
747 - buf_pool_mutex_exit();
748 + //buf_pool_mutex_exit();
750 mutex_exit(block_mutex);
752 @@ -1626,7 +1695,7 @@
753 const buf_block_t* block) /*!< in: pointer to block,
756 - ut_ad(buf_pool_mutex_own());
757 + //ut_ad(buf_pool_mutex_own());
759 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
760 /* The pointer should be aligned. */
761 @@ -1660,6 +1729,7 @@
765 + mutex_t* block_mutex = NULL;
768 ut_ad(mtr->state == MTR_ACTIVE);
769 @@ -1687,17 +1757,23 @@
770 buf_pool->stat.n_page_gets++;
773 - buf_pool_mutex_enter();
774 + //buf_pool_mutex_enter();
777 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
779 /* If the guess is a compressed page descriptor that
780 has been allocated by buf_page_alloc_descriptor(),
781 it may have been freed by buf_relocate(). */
782 - if (!buf_block_is_uncompressed(block)
783 + if (!block_mutex) {
784 + block = guess = NULL;
785 + } else if (!buf_block_is_uncompressed(block)
786 || offset != block->page.offset
787 || space != block->page.space
788 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
790 + mutex_exit(block_mutex);
792 block = guess = NULL;
794 ut_ad(!block->page.in_zip_hash);
795 @@ -1706,14 +1782,20 @@
799 + rw_lock_s_lock(&page_hash_latch);
800 block = (buf_block_t*) buf_page_hash_get(space, offset);
802 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
805 + rw_lock_s_unlock(&page_hash_latch);
810 /* Page not in buf_pool: needs to be read from file */
812 - buf_pool_mutex_exit();
813 + //buf_pool_mutex_exit();
815 if (mode == BUF_GET_IF_IN_POOL
816 || mode == BUF_PEEK_IF_IN_POOL) {
817 @@ -1758,7 +1840,8 @@
818 if (must_read && (mode == BUF_GET_IF_IN_POOL
819 || mode == BUF_PEEK_IF_IN_POOL)) {
820 /* The page is only being read to buffer */
821 - buf_pool_mutex_exit();
822 + //buf_pool_mutex_exit();
823 + mutex_exit(block_mutex);
827 @@ -1768,38 +1851,50 @@
830 case BUF_BLOCK_FILE_PAGE:
831 + if (block_mutex == &buf_pool_zip_mutex) {
832 + /* it is wrong mutex... */
833 + mutex_exit(block_mutex);
838 case BUF_BLOCK_ZIP_PAGE:
839 case BUF_BLOCK_ZIP_DIRTY:
840 + ut_ad(block_mutex == &buf_pool_zip_mutex);
841 bpage = &block->page;
842 /* Protect bpage->buf_fix_count. */
843 - mutex_enter(&buf_pool_zip_mutex);
844 + /* Already proteced here. */
845 + //mutex_enter(&buf_pool_zip_mutex);
847 if (bpage->buf_fix_count
848 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
849 /* This condition often occurs when the buffer
850 is not buffer-fixed, but I/O-fixed by
851 buf_page_init_for_read(). */
852 - mutex_exit(&buf_pool_zip_mutex);
853 + //mutex_exit(&buf_pool_zip_mutex);
855 /* The block is buffer-fixed or I/O-fixed.
857 - buf_pool_mutex_exit();
858 + //buf_pool_mutex_exit();
859 + mutex_exit(block_mutex);
860 os_thread_sleep(WAIT_FOR_READ);
865 /* Allocate an uncompressed page. */
866 - buf_pool_mutex_exit();
867 - mutex_exit(&buf_pool_zip_mutex);
868 + //buf_pool_mutex_exit();
869 + //mutex_exit(&buf_pool_zip_mutex);
870 + mutex_exit(block_mutex);
872 block = buf_LRU_get_free_block();
874 + block_mutex = &block->mutex;
876 - buf_pool_mutex_enter();
877 - mutex_enter(&block->mutex);
878 + //buf_pool_mutex_enter();
879 + mutex_enter(&LRU_list_mutex);
880 + rw_lock_x_lock(&page_hash_latch);
881 + mutex_enter(block_mutex);
884 buf_page_t* hash_bpage
885 @@ -1810,35 +1905,49 @@
886 while buf_pool_mutex was released.
887 Free the block that was allocated. */
889 - buf_LRU_block_free_non_file_page(block);
890 - mutex_exit(&block->mutex);
891 + buf_LRU_block_free_non_file_page(block, TRUE);
892 + mutex_exit(block_mutex);
894 block = (buf_block_t*) hash_bpage;
896 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
899 + rw_lock_x_unlock(&page_hash_latch);
900 + mutex_exit(&LRU_list_mutex);
905 + mutex_enter(&buf_pool_zip_mutex);
908 (bpage->buf_fix_count
909 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
911 + mutex_exit(&buf_pool_zip_mutex);
912 /* The block was buffer-fixed or I/O-fixed
913 while buf_pool_mutex was not held by this thread.
914 Free the block that was allocated and try again.
915 This should be extremely unlikely. */
917 - buf_LRU_block_free_non_file_page(block);
918 - mutex_exit(&block->mutex);
919 + buf_LRU_block_free_non_file_page(block, TRUE);
920 + //mutex_exit(&block->mutex);
922 + rw_lock_x_unlock(&page_hash_latch);
923 + mutex_exit(&LRU_list_mutex);
924 goto wait_until_unfixed;
927 /* Move the compressed page from bpage to block,
928 and uncompress it. */
930 - mutex_enter(&buf_pool_zip_mutex);
931 + mutex_enter(&flush_list_mutex);
933 buf_relocate(bpage, &block->page);
935 + rw_lock_x_unlock(&page_hash_latch);
937 buf_block_init_low(block);
938 block->lock_hash_val = lock_rec_hash(space, offset);
940 @@ -1848,7 +1957,7 @@
941 if (buf_page_get_state(&block->page)
942 == BUF_BLOCK_ZIP_PAGE) {
943 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
944 - UT_LIST_REMOVE(list, buf_pool->zip_clean,
945 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
947 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
948 ut_ad(!block->page.in_flush_list);
949 @@ -1858,6 +1967,8 @@
953 + mutex_exit(&flush_list_mutex);
955 /* Buffer-fix, I/O-fix, and X-latch the block
956 for the duration of the decompression.
957 Also add the block to the unzip_LRU list. */
958 @@ -1866,17 +1977,22 @@
959 /* Insert at the front of unzip_LRU list */
960 buf_unzip_LRU_add_block(block, FALSE);
962 + mutex_exit(&LRU_list_mutex);
964 block->page.buf_fix_count = 1;
965 buf_block_set_io_fix(block, BUF_IO_READ);
966 rw_lock_x_lock_func(&block->lock, 0, file, line);
968 UNIV_MEM_INVALID(bpage, sizeof *bpage);
970 - mutex_exit(&block->mutex);
971 + mutex_exit(block_mutex);
972 mutex_exit(&buf_pool_zip_mutex);
974 + mutex_enter(&buf_pool_mutex);
975 buf_pool->n_pend_unzip++;
976 + mutex_exit(&buf_pool_mutex);
978 - buf_pool_mutex_exit();
979 + //buf_pool_mutex_exit();
981 buf_page_free_descriptor(bpage);
983 @@ -1891,12 +2007,15 @@
986 /* Unfix and unlatch the block. */
987 - buf_pool_mutex_enter();
988 - mutex_enter(&block->mutex);
989 + //buf_pool_mutex_enter();
990 + block_mutex = &block->mutex;
991 + mutex_enter(block_mutex);
992 block->page.buf_fix_count--;
993 buf_block_set_io_fix(block, BUF_IO_NONE);
994 - mutex_exit(&block->mutex);
996 + mutex_enter(&buf_pool_mutex);
997 buf_pool->n_pend_unzip--;
998 + mutex_exit(&buf_pool_mutex);
999 rw_lock_x_unlock(&block->lock);
1002 @@ -1911,7 +2030,7 @@
1004 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1006 - mutex_enter(&block->mutex);
1007 + //mutex_enter(&block->mutex);
1008 #if UNIV_WORD_SIZE == 4
1009 /* On 32-bit systems, there is no padding in buf_page_t. On
1010 other systems, Valgrind could complain about uninitialized pad
1011 @@ -1923,9 +2042,9 @@
1012 /* Try to evict the block from the buffer pool, to use the
1013 insert buffer as much as possible. */
1015 - if (buf_LRU_free_block(&block->page, TRUE)) {
1016 - buf_pool_mutex_exit();
1017 - mutex_exit(&block->mutex);
1018 + if (buf_LRU_free_block(&block->page, TRUE, FALSE)) {
1019 + //buf_pool_mutex_exit();
1020 + mutex_exit(block_mutex);
1022 "innodb_change_buffering_debug evict %u %u\n",
1023 (unsigned) space, (unsigned) offset);
1024 @@ -1944,13 +2063,14 @@
1026 buf_block_buf_fix_inc(block, file, line);
1028 - mutex_exit(&block->mutex);
1029 + //mutex_exit(&block->mutex);
1031 /* Check if this is the first access to the page */
1033 access_time = buf_page_is_accessed(&block->page);
1035 - buf_pool_mutex_exit();
1036 + //buf_pool_mutex_exit();
1037 + mutex_exit(block_mutex);
1039 if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
1040 buf_page_set_accessed_make_young(&block->page, access_time);
1041 @@ -2180,9 +2300,11 @@
1042 mutex_exit(&block->mutex);
1044 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1045 - buf_pool_mutex_enter();
1046 + //buf_pool_mutex_enter();
1047 + mutex_enter(&LRU_list_mutex);
1048 buf_LRU_make_block_young(&block->page);
1049 - buf_pool_mutex_exit();
1050 + //buf_pool_mutex_exit();
1051 + mutex_exit(&LRU_list_mutex);
1052 } else if (!buf_page_is_accessed(&block->page)) {
1053 /* Above, we do a dirty read on purpose, to avoid
1054 mutex contention. The field buf_page_t::access_time
1055 @@ -2190,9 +2312,11 @@
1056 field must be protected by mutex, however. */
1057 ulint time_ms = ut_time_ms();
1059 - buf_pool_mutex_enter();
1060 + //buf_pool_mutex_enter();
1061 + mutex_enter(&block->mutex);
1062 buf_page_set_accessed(&block->page, time_ms);
1063 - buf_pool_mutex_exit();
1064 + //buf_pool_mutex_exit();
1065 + mutex_exit(&block->mutex);
1068 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1069 @@ -2258,16 +2382,19 @@
1071 ut_ad(mtr->state == MTR_ACTIVE);
1073 - buf_pool_mutex_enter();
1074 + //buf_pool_mutex_enter();
1075 + rw_lock_s_lock(&page_hash_latch);
1076 block = buf_block_hash_get(space_id, page_no);
1079 - buf_pool_mutex_exit();
1080 + //buf_pool_mutex_exit();
1081 + rw_lock_s_unlock(&page_hash_latch);
1085 mutex_enter(&block->mutex);
1086 - buf_pool_mutex_exit();
1087 + //buf_pool_mutex_exit();
1088 + rw_lock_s_unlock(&page_hash_latch);
1090 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1091 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1092 @@ -2354,7 +2481,10 @@
1094 buf_page_t* hash_page;
1096 - ut_ad(buf_pool_mutex_own());
1097 + //ut_ad(buf_pool_mutex_own());
1098 +#ifdef UNIV_SYNC_DEBUG
1099 + ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
1101 ut_ad(mutex_own(&(block->mutex)));
1102 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1104 @@ -2387,7 +2517,8 @@
1105 (const void*) hash_page, (const void*) block);
1106 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1107 mutex_exit(&block->mutex);
1108 - buf_pool_mutex_exit();
1109 + //buf_pool_mutex_exit();
1110 + rw_lock_x_unlock(&page_hash_latch);
1114 @@ -2466,16 +2597,24 @@
1118 - buf_pool_mutex_enter();
1119 + //buf_pool_mutex_enter();
1120 + mutex_enter(&LRU_list_mutex);
1121 + rw_lock_x_lock(&page_hash_latch);
1123 if (buf_page_hash_get(space, offset)) {
1124 /* The page is already in the buffer pool. */
1127 mutex_enter(&block->mutex);
1128 - buf_LRU_block_free_non_file_page(block);
1129 + mutex_exit(&LRU_list_mutex);
1130 + rw_lock_x_unlock(&page_hash_latch);
1131 + buf_LRU_block_free_non_file_page(block, FALSE);
1132 mutex_exit(&block->mutex);
1135 + mutex_exit(&LRU_list_mutex);
1136 + rw_lock_x_unlock(&page_hash_latch);
1141 @@ -2495,6 +2634,8 @@
1142 mutex_enter(&block->mutex);
1143 buf_page_init(space, offset, block);
1145 + rw_lock_x_unlock(&page_hash_latch);
1147 /* The block must be put to the LRU list, to the old blocks */
1148 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1150 @@ -2522,7 +2663,7 @@
1151 been added to buf_pool->LRU and
1152 buf_pool->page_hash. */
1153 mutex_exit(&block->mutex);
1154 - data = buf_buddy_alloc(zip_size, &lru);
1155 + data = buf_buddy_alloc(zip_size, &lru, FALSE);
1156 mutex_enter(&block->mutex);
1157 block->page.zip.data = data;
1159 @@ -2535,6 +2676,7 @@
1160 buf_unzip_LRU_add_block(block, TRUE);
1163 + mutex_exit(&LRU_list_mutex);
1164 mutex_exit(&block->mutex);
1167 @@ -2542,7 +2684,7 @@
1168 control block (bpage), in order to avoid the
1169 invocation of buf_buddy_relocate_block() on
1170 uninitialized data. */
1171 - data = buf_buddy_alloc(zip_size, &lru);
1172 + data = buf_buddy_alloc(zip_size, &lru, TRUE);
1174 /* If buf_buddy_alloc() allocated storage from the LRU list,
1175 it released and reacquired buf_pool_mutex. Thus, we must
1176 @@ -2550,7 +2692,10 @@
1177 if (UNIV_UNLIKELY(lru)
1178 && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
1180 - buf_buddy_free(data, zip_size);
1181 + buf_buddy_free(data, zip_size, TRUE);
1183 + mutex_exit(&LRU_list_mutex);
1184 + rw_lock_x_unlock(&page_hash_latch);
1188 @@ -2581,20 +2726,28 @@
1189 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1190 buf_page_address_fold(space, offset), bpage);
1192 + rw_lock_x_unlock(&page_hash_latch);
1194 /* The block must be put to the LRU list, to the old blocks */
1195 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1196 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1197 + mutex_enter(&flush_list_mutex);
1198 buf_LRU_insert_zip_clean(bpage);
1199 + mutex_exit(&flush_list_mutex);
1200 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1202 + mutex_exit(&LRU_list_mutex);
1204 buf_page_set_io_fix(bpage, BUF_IO_READ);
1206 mutex_exit(&buf_pool_zip_mutex);
1209 + mutex_enter(&buf_pool_mutex);
1210 buf_pool->n_pend_reads++;
1211 + mutex_exit(&buf_pool_mutex);
1213 - buf_pool_mutex_exit();
1214 + //buf_pool_mutex_exit();
1216 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1218 @@ -2632,7 +2785,9 @@
1220 free_block = buf_LRU_get_free_block();
1222 - buf_pool_mutex_enter();
1223 + //buf_pool_mutex_enter();
1224 + mutex_enter(&LRU_list_mutex);
1225 + rw_lock_x_lock(&page_hash_latch);
1227 block = (buf_block_t*) buf_page_hash_get(space, offset);
1229 @@ -2645,7 +2800,9 @@
1230 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
1232 /* Page can be found in buf_pool */
1233 - buf_pool_mutex_exit();
1234 + //buf_pool_mutex_exit();
1235 + mutex_exit(&LRU_list_mutex);
1236 + rw_lock_x_unlock(&page_hash_latch);
1238 buf_block_free(free_block);
1240 @@ -2667,6 +2824,7 @@
1241 mutex_enter(&block->mutex);
1243 buf_page_init(space, offset, block);
1244 + rw_lock_x_unlock(&page_hash_latch);
1246 /* The block must be put to the LRU list */
1247 buf_LRU_add_block(&block->page, FALSE);
1248 @@ -2693,7 +2851,7 @@
1249 the reacquisition of buf_pool_mutex. We also must
1250 defer this operation until after the block descriptor
1251 has been added to buf_pool->LRU and buf_pool->page_hash. */
1252 - data = buf_buddy_alloc(zip_size, &lru);
1253 + data = buf_buddy_alloc(zip_size, &lru, FALSE);
1254 mutex_enter(&block->mutex);
1255 block->page.zip.data = data;
1257 @@ -2711,7 +2869,8 @@
1259 buf_page_set_accessed(&block->page, time_ms);
1261 - buf_pool_mutex_exit();
1262 + //buf_pool_mutex_exit();
1263 + mutex_exit(&LRU_list_mutex);
1265 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1267 @@ -2761,6 +2920,7 @@
1268 enum buf_io_fix io_type;
1269 const ibool uncompressed = (buf_page_get_state(bpage)
1270 == BUF_BLOCK_FILE_PAGE);
1271 + mutex_t* block_mutex;
1273 ut_a(buf_page_in_file(bpage));
1275 @@ -2894,8 +3054,13 @@
1279 - buf_pool_mutex_enter();
1280 - mutex_enter(buf_page_get_mutex(bpage));
1281 + //buf_pool_mutex_enter();
1282 + if (io_type == BUF_IO_WRITE) {
1283 + mutex_enter(&LRU_list_mutex);
1285 + block_mutex = buf_page_get_mutex_enter(bpage);
1286 + ut_a(block_mutex);
1287 + mutex_enter(&buf_pool_mutex);
1289 #ifdef UNIV_IBUF_COUNT_DEBUG
1290 if (io_type == BUF_IO_WRITE || uncompressed) {
1291 @@ -2935,6 +3100,11 @@
1293 buf_flush_write_complete(bpage);
1295 + /* to keep consistency at buf_LRU_insert_zip_clean() */
1296 + //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
1297 + mutex_exit(&LRU_list_mutex);
1301 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1303 @@ -2957,8 +3127,9 @@
1305 #endif /* UNIV_DEBUG */
1307 - mutex_exit(buf_page_get_mutex(bpage));
1308 - buf_pool_mutex_exit();
1309 + mutex_exit(&buf_pool_mutex);
1310 + mutex_exit(block_mutex);
1311 + //buf_pool_mutex_exit();
1314 /*********************************************************************//**
1315 @@ -3005,7 +3176,8 @@
1316 freed = buf_LRU_search_and_free_block(100);
1319 - buf_pool_mutex_enter();
1320 + //buf_pool_mutex_enter();
1321 + mutex_enter(&LRU_list_mutex);
1323 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1324 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1325 @@ -3018,7 +3190,8 @@
1326 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1327 buf_refresh_io_stats();
1329 - buf_pool_mutex_exit();
1330 + //buf_pool_mutex_exit();
1331 + mutex_exit(&LRU_list_mutex);
1334 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1335 @@ -3043,7 +3216,10 @@
1339 - buf_pool_mutex_enter();
1340 + //buf_pool_mutex_enter();
1341 + mutex_enter(&LRU_list_mutex);
1342 + rw_lock_x_lock(&page_hash_latch);
1343 + /* for keep the new latch order, it cannot validate correctly... */
1345 chunk = buf_pool->chunks;
1347 @@ -3142,7 +3318,7 @@
1348 /* Check clean compressed-only blocks. */
1350 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1351 - b = UT_LIST_GET_NEXT(list, b)) {
1352 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1353 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1354 switch (buf_page_get_io_fix(b)) {
1356 @@ -3167,8 +3343,9 @@
1358 /* Check dirty compressed-only blocks. */
1360 + mutex_enter(&flush_list_mutex);
1361 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1362 - b = UT_LIST_GET_NEXT(list, b)) {
1363 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1364 ut_ad(b->in_flush_list);
1366 switch (buf_page_get_state(b)) {
1367 @@ -3213,6 +3390,7 @@
1369 ut_a(buf_page_hash_get(b->space, b->offset) == b);
1371 + mutex_exit(&flush_list_mutex);
1373 mutex_exit(&buf_pool_zip_mutex);
1375 @@ -3224,19 +3402,27 @@
1378 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1379 + /* because of latching order with block->mutex, we cannot get free_list_mutex before that */
1381 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1382 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1383 (ulong) UT_LIST_GET_LEN(buf_pool->free),
1388 + /* because of latching order with block->mutex, we cannot get flush_list_mutex before that */
1390 ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
1392 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1393 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1394 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1397 - buf_pool_mutex_exit();
1398 + //buf_pool_mutex_exit();
1399 + mutex_exit(&LRU_list_mutex);
1400 + rw_lock_x_unlock(&page_hash_latch);
1402 ut_a(buf_LRU_validate());
1403 ut_a(buf_flush_validate());
1404 @@ -3270,7 +3456,10 @@
1405 index_ids = mem_alloc(sizeof(dulint) * size);
1406 counts = mem_alloc(sizeof(ulint) * size);
1408 - buf_pool_mutex_enter();
1409 + //buf_pool_mutex_enter();
1410 + mutex_enter(&LRU_list_mutex);
1411 + mutex_enter(&free_list_mutex);
1412 + mutex_enter(&flush_list_mutex);
1415 "buf_pool size %lu\n"
1416 @@ -3337,7 +3526,10 @@
1420 - buf_pool_mutex_exit();
1421 + //buf_pool_mutex_exit();
1422 + mutex_exit(&LRU_list_mutex);
1423 + mutex_exit(&free_list_mutex);
1424 + mutex_exit(&flush_list_mutex);
1426 for (i = 0; i < n_found; i++) {
1427 index = dict_index_get_if_in_cache(index_ids[i]);
1428 @@ -3376,7 +3568,7 @@
1430 ulint fixed_pages_number = 0;
1432 - buf_pool_mutex_enter();
1433 + //buf_pool_mutex_enter();
1435 chunk = buf_pool->chunks;
1437 @@ -3410,7 +3602,7 @@
1438 /* Traverse the lists of clean and dirty compressed-only blocks. */
1440 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1441 - b = UT_LIST_GET_NEXT(list, b)) {
1442 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1443 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1444 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1446 @@ -3420,8 +3612,9 @@
1450 + mutex_enter(&flush_list_mutex);
1451 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1452 - b = UT_LIST_GET_NEXT(list, b)) {
1453 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1454 ut_ad(b->in_flush_list);
1456 switch (buf_page_get_state(b)) {
1457 @@ -3444,9 +3637,10 @@
1461 + mutex_exit(&flush_list_mutex);
1463 mutex_exit(&buf_pool_zip_mutex);
1464 - buf_pool_mutex_exit();
1465 + //buf_pool_mutex_exit();
1467 return(fixed_pages_number);
1469 @@ -3504,7 +3698,11 @@
1473 - buf_pool_mutex_enter();
1474 + //buf_pool_mutex_enter();
1475 + mutex_enter(&LRU_list_mutex);
1476 + mutex_enter(&free_list_mutex);
1477 + mutex_enter(&buf_pool_mutex);
1478 + mutex_enter(&flush_list_mutex);
1481 "Buffer pool size %lu\n"
1482 @@ -3607,7 +3805,11 @@
1483 buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
1485 buf_refresh_io_stats();
1486 - buf_pool_mutex_exit();
1487 + //buf_pool_mutex_exit();
1488 + mutex_exit(&LRU_list_mutex);
1489 + mutex_exit(&free_list_mutex);
1490 + mutex_exit(&buf_pool_mutex);
1491 + mutex_exit(&flush_list_mutex);
1494 /**********************************************************************//**
1495 @@ -3634,7 +3836,7 @@
1499 - buf_pool_mutex_enter();
1500 + //buf_pool_mutex_enter(); /* optimistic */
1502 chunk = buf_pool->chunks;
1504 @@ -3651,7 +3853,7 @@
1508 - buf_pool_mutex_exit();
1509 + //buf_pool_mutex_exit(); /* optimistic */
1513 @@ -3667,7 +3869,8 @@
1517 - buf_pool_mutex_enter();
1518 + //buf_pool_mutex_enter();
1519 + mutex_enter(&buf_pool_mutex);
1521 if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
1522 + buf_pool->n_flush[BUF_FLUSH_LIST]
1523 @@ -3677,7 +3880,8 @@
1527 - buf_pool_mutex_exit();
1528 + //buf_pool_mutex_exit();
1529 + mutex_exit(&buf_pool_mutex);
1533 @@ -3692,11 +3896,13 @@
1537 - buf_pool_mutex_enter();
1538 + //buf_pool_mutex_enter();
1539 + mutex_enter(&free_list_mutex);
1541 len = UT_LIST_GET_LEN(buf_pool->free);
1543 - buf_pool_mutex_exit();
1544 + //buf_pool_mutex_exit();
1545 + mutex_exit(&free_list_mutex);
1549 --- a/storage/innodb_plugin/buf/buf0flu.c
1550 +++ b/storage/innodb_plugin/buf/buf0flu.c
1552 const ib_rbt_node_t* c_node;
1553 const ib_rbt_node_t* p_node;
1555 - ut_ad(buf_pool_mutex_own());
1556 + //ut_ad(buf_pool_mutex_own());
1557 + ut_ad(mutex_own(&flush_list_mutex));
1559 /* Insert this buffer into the rbt. */
1560 c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
1563 #endif /* UNIV_DEBUG */
1565 - ut_ad(buf_pool_mutex_own());
1566 + //ut_ad(buf_pool_mutex_own());
1567 + ut_ad(mutex_own(&flush_list_mutex));
1570 #endif /* UNIV_DEBUG */
1571 @@ -199,12 +201,14 @@
1572 buf_flush_init_flush_rbt(void)
1573 /*==========================*/
1575 - buf_pool_mutex_enter();
1576 + //buf_pool_mutex_enter();
1577 + mutex_enter(&flush_list_mutex);
1579 /* Create red black tree for speedy insertions in flush list. */
1580 buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
1581 buf_flush_block_cmp);
1582 - buf_pool_mutex_exit();
1583 + //buf_pool_mutex_exit();
1584 + mutex_exit(&flush_list_mutex);
1587 /********************************************************************//**
1589 buf_flush_free_flush_rbt(void)
1590 /*==========================*/
1592 - buf_pool_mutex_enter();
1593 + //buf_pool_mutex_enter();
1594 + mutex_enter(&flush_list_mutex);
1596 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1597 ut_a(buf_flush_validate_low());
1599 rbt_free(buf_pool->flush_rbt);
1600 buf_pool->flush_rbt = NULL;
1602 - buf_pool_mutex_exit();
1603 + //buf_pool_mutex_exit();
1604 + mutex_exit(&flush_list_mutex);
1607 /********************************************************************//**
1609 /*=============================*/
1610 buf_block_t* block) /*!< in/out: block which is modified */
1612 - ut_ad(buf_pool_mutex_own());
1613 + //ut_ad(buf_pool_mutex_own());
1614 + ut_ad(mutex_own(&block->mutex));
1615 + ut_ad(mutex_own(&flush_list_mutex));
1616 ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
1617 || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
1618 <= block->page.oldest_modification));
1620 ut_ad(!block->page.in_zip_hash);
1621 ut_ad(!block->page.in_flush_list);
1622 ut_d(block->page.in_flush_list = TRUE);
1623 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1624 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1626 #ifdef UNIV_DEBUG_VALGRIND
1632 - ut_ad(buf_pool_mutex_own());
1633 + //ut_ad(buf_pool_mutex_own());
1634 + ut_ad(mutex_own(&block->mutex));
1635 + ut_ad(mutex_own(&flush_list_mutex));
1636 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1638 ut_ad(block->page.in_LRU_list);
1639 @@ -324,14 +334,14 @@
1640 > block->page.oldest_modification) {
1641 ut_ad(b->in_flush_list);
1643 - b = UT_LIST_GET_NEXT(list, b);
1644 + b = UT_LIST_GET_NEXT(flush_list, b);
1648 if (prev_b == NULL) {
1649 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1650 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1652 - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1653 + UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1654 prev_b, &block->page);
1658 buf_page_in_file(bpage) and in the LRU list */
1660 //ut_ad(buf_pool_mutex_own());
1661 - //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1662 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1663 //ut_ad(bpage->in_LRU_list); /* optimistic use */
1665 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1666 @@ -387,12 +397,12 @@
1667 buf_page_in_file(bpage) */
1668 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1670 - ut_a(buf_page_in_file(bpage));
1671 - ut_ad(buf_pool_mutex_own());
1672 + //ut_a(buf_page_in_file(bpage));
1673 + //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
1674 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1675 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1677 - if (bpage->oldest_modification != 0
1678 + if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1679 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1680 ut_ad(bpage->in_flush_list);
1682 @@ -421,8 +431,11 @@
1684 buf_page_t* bpage) /*!< in: pointer to the block in question */
1686 - ut_ad(buf_pool_mutex_own());
1687 + //ut_ad(buf_pool_mutex_own());
1688 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1690 + mutex_enter(&flush_list_mutex);
1692 ut_ad(bpage->in_flush_list);
1694 switch (buf_page_get_state(bpage)) {
1695 @@ -433,17 +446,18 @@
1696 case BUF_BLOCK_READY_FOR_USE:
1697 case BUF_BLOCK_MEMORY:
1698 case BUF_BLOCK_REMOVE_HASH:
1699 + mutex_exit(&flush_list_mutex);
1702 case BUF_BLOCK_ZIP_DIRTY:
1703 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1704 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1705 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1706 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1707 buf_LRU_insert_zip_clean(bpage);
1708 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1710 case BUF_BLOCK_FILE_PAGE:
1711 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1712 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1718 bpage->oldest_modification = 0;
1720 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
1721 + ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
1722 ut_ad(ut_list_node_313->in_flush_list)));
1723 + mutex_exit(&flush_list_mutex);
1726 /********************************************************************//**
1729 buf_page_t* prev_b = NULL;
1731 - ut_ad(buf_pool_mutex_own());
1732 + //ut_ad(buf_pool_mutex_own());
1733 + ut_ad(mutex_own(&flush_list_mutex));
1735 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1737 @@ -494,18 +510,18 @@
1738 because we assert on in_flush_list in comparison function. */
1739 ut_d(bpage->in_flush_list = FALSE);
1741 - prev = UT_LIST_GET_PREV(list, bpage);
1742 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1743 + prev = UT_LIST_GET_PREV(flush_list, bpage);
1744 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1747 ut_ad(prev->in_flush_list);
1748 UT_LIST_INSERT_AFTER(
1751 buf_pool->flush_list,
1757 buf_pool->flush_list,
1761 io_fixed and oldest_modification != 0. Thus, it cannot be
1762 relocated in the buffer pool or removed from flush_list or
1764 - ut_ad(!buf_pool_mutex_own());
1765 + //ut_ad(!buf_pool_mutex_own());
1766 + ut_ad(!mutex_own(&LRU_list_mutex));
1767 + ut_ad(!mutex_own(&flush_list_mutex));
1768 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
1769 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
1770 ut_ad(bpage->oldest_modification != 0);
1771 @@ -1057,7 +1075,7 @@
1773 buf_block_t* block) /*!< in/out: buffer control block */
1775 - ut_ad(buf_pool_mutex_own());
1776 + //ut_ad(buf_pool_mutex_own());
1777 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1778 ut_ad(mutex_own(&block->mutex));
1780 @@ -1065,8 +1083,11 @@
1784 + buf_pool_mutex_enter();
1786 if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
1787 || buf_pool->init_flush[BUF_FLUSH_LRU]) {
1788 + buf_pool_mutex_exit();
1789 /* There is already a flush batch of the same type running */
1792 @@ -1139,12 +1160,19 @@
1793 ibool is_uncompressed;
1795 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
1796 - ut_ad(buf_pool_mutex_own());
1797 + //ut_ad(buf_pool_mutex_own());
1798 +#ifdef UNIV_SYNC_DEBUG
1799 + ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
1800 + || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
1802 ut_ad(buf_page_in_file(bpage));
1804 block_mutex = buf_page_get_mutex(bpage);
1805 ut_ad(mutex_own(block_mutex));
1807 + mutex_enter(&buf_pool_mutex);
1808 + rw_lock_s_unlock(&page_hash_latch);
1810 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
1812 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
1813 @@ -1175,7 +1203,8 @@
1816 mutex_exit(block_mutex);
1817 - buf_pool_mutex_exit();
1818 + //buf_pool_mutex_exit();
1819 + mutex_exit(&buf_pool_mutex);
1821 /* Even though bpage is not protected by any mutex at
1822 this point, it is safe to access bpage, because it is
1823 @@ -1212,7 +1241,8 @@
1826 mutex_exit(block_mutex);
1827 - buf_pool_mutex_exit();
1828 + //buf_pool_mutex_exit();
1829 + mutex_exit(&buf_pool_mutex);
1833 @@ -1277,7 +1307,8 @@
1834 high = fil_space_get_size(space);
1837 - buf_pool_mutex_enter();
1838 + //buf_pool_mutex_enter();
1839 + rw_lock_s_lock(&page_hash_latch);
1841 for (i = low; i < high; i++) {
1843 @@ -1296,11 +1327,9 @@
1844 if (flush_type != BUF_FLUSH_LRU
1846 || buf_page_is_old(bpage)) {
1847 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
1849 - mutex_enter(block_mutex);
1850 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
1852 - if (buf_flush_ready_for_flush(bpage, flush_type)
1853 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
1854 && (i == offset || !bpage->buf_fix_count)) {
1855 /* We only try to flush those
1856 neighbors != offset where the buf fix count is
1857 @@ -1314,14 +1343,16 @@
1858 ut_ad(!mutex_own(block_mutex));
1861 - buf_pool_mutex_enter();
1863 + //buf_pool_mutex_enter();
1864 + rw_lock_s_lock(&page_hash_latch);
1865 + } else if (block_mutex) {
1866 mutex_exit(block_mutex);
1871 - buf_pool_mutex_exit();
1872 + //buf_pool_mutex_exit();
1873 + rw_lock_s_unlock(&page_hash_latch);
1877 @@ -1352,9 +1383,11 @@
1878 min_n), otherwise ignored */
1881 + buf_page_t* prev_bpage = NULL;
1882 ulint page_count = 0;
1885 + ulint remaining = 0;
1887 ut_ad((flush_type == BUF_FLUSH_LRU)
1888 || (flush_type == BUF_FLUSH_LIST));
1889 @@ -1362,20 +1395,28 @@
1890 ut_ad((flush_type != BUF_FLUSH_LIST)
1891 || sync_thread_levels_empty_gen(TRUE));
1892 #endif /* UNIV_SYNC_DEBUG */
1893 - buf_pool_mutex_enter();
1894 + //buf_pool_mutex_enter();
1895 + mutex_enter(&buf_pool_mutex);
1897 if ((buf_pool->n_flush[flush_type] > 0)
1898 || (buf_pool->init_flush[flush_type] == TRUE)) {
1900 /* There is already a flush batch of the same type running */
1902 - buf_pool_mutex_exit();
1903 + //buf_pool_mutex_exit();
1904 + mutex_exit(&buf_pool_mutex);
1906 return(ULINT_UNDEFINED);
1909 buf_pool->init_flush[flush_type] = TRUE;
1911 + mutex_exit(&buf_pool_mutex);
1913 + if (flush_type == BUF_FLUSH_LRU) {
1914 + mutex_enter(&LRU_list_mutex);
1919 /* If we have flushed enough, leave the loop */
1920 @@ -1392,7 +1433,13 @@
1922 ut_ad(flush_type == BUF_FLUSH_LIST);
1924 + mutex_enter(&flush_list_mutex);
1925 + remaining = UT_LIST_GET_LEN(buf_pool->flush_list);
1926 bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
1928 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
1930 + mutex_exit(&flush_list_mutex);
1932 || bpage->oldest_modification >= lsn_limit) {
1933 /* We have flushed enough */
1934 @@ -1409,26 +1456,35 @@
1935 function a pointer to a block in the list! */
1938 - mutex_t*block_mutex = buf_page_get_mutex(bpage);
1939 + mutex_t*block_mutex = buf_page_get_mutex_enter(bpage);
1942 - ut_a(buf_page_in_file(bpage));
1943 + //ut_a(buf_page_in_file(bpage));
1945 - mutex_enter(block_mutex);
1946 - ready = buf_flush_ready_for_flush(bpage, flush_type);
1947 - mutex_exit(block_mutex);
1948 + if (block_mutex) {
1949 + ready = buf_flush_ready_for_flush(bpage, flush_type);
1950 + mutex_exit(block_mutex);
1956 space = buf_page_get_space(bpage);
1957 offset = buf_page_get_page_no(bpage);
1959 - buf_pool_mutex_exit();
1960 + //buf_pool_mutex_exit();
1961 + if (flush_type == BUF_FLUSH_LRU) {
1962 + mutex_exit(&LRU_list_mutex);
1965 /* Try to flush also all the neighbors */
1966 page_count += buf_flush_try_neighbors(
1967 space, offset, flush_type, srv_flush_neighbor_pages);
1969 - buf_pool_mutex_enter();
1970 + //buf_pool_mutex_enter();
1971 + if (flush_type == BUF_FLUSH_LRU) {
1972 + mutex_enter(&LRU_list_mutex);
1976 } else if (flush_type == BUF_FLUSH_LRU) {
1977 @@ -1436,16 +1492,35 @@
1979 ut_ad(flush_type == BUF_FLUSH_LIST);
1981 - bpage = UT_LIST_GET_PREV(list, bpage);
1982 - ut_ad(!bpage || bpage->in_flush_list);
1983 + mutex_enter(&flush_list_mutex);
1984 + bpage = UT_LIST_GET_PREV(flush_list, bpage);
1985 + //ut_ad(!bpage || bpage->in_flush_list); /* optimistic */
1986 + if (bpage != prev_bpage) {
1987 + /* the search may warp.. retrying */
1991 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
1993 + mutex_exit(&flush_list_mutex);
1996 } while (bpage != NULL);
2001 /* If we could not find anything to flush, leave the loop */
2006 + if (flush_type == BUF_FLUSH_LRU) {
2007 + mutex_exit(&LRU_list_mutex);
2010 + mutex_enter(&buf_pool_mutex);
2012 buf_pool->init_flush[flush_type] = FALSE;
2014 if (buf_pool->n_flush[flush_type] == 0) {
2015 @@ -1455,7 +1530,8 @@
2016 os_event_set(buf_pool->no_flush[flush_type]);
2019 - buf_pool_mutex_exit();
2020 + //buf_pool_mutex_exit();
2021 + mutex_exit(&buf_pool_mutex);
2023 buf_flush_buffered_writes();
2025 @@ -1516,7 +1592,7 @@
2027 //buf_pool_mutex_enter();
2029 - buf_pool_mutex_enter();
2030 + mutex_enter(&LRU_list_mutex);
2032 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2034 @@ -1533,15 +1609,15 @@
2035 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2038 - block_mutex = buf_page_get_mutex(bpage);
2039 + block_mutex = buf_page_get_mutex_enter(bpage);
2041 - mutex_enter(block_mutex);
2043 - if (buf_flush_ready_for_replace(bpage)) {
2044 + if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2048 - mutex_exit(block_mutex);
2049 + if (block_mutex) {
2050 + mutex_exit(block_mutex);
2055 @@ -1550,7 +1626,7 @@
2057 //buf_pool_mutex_exit();
2059 - buf_pool_mutex_exit();
2060 + mutex_exit(&LRU_list_mutex);
2062 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
2064 @@ -1717,7 +1793,7 @@
2066 const ib_rbt_node_t* rnode = NULL;
2068 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2069 + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2070 ut_ad(ut_list_node_313->in_flush_list));
2072 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2073 @@ -1732,7 +1808,7 @@
2074 while (bpage != NULL) {
2075 const ib_uint64_t om = bpage->oldest_modification;
2076 ut_ad(bpage->in_flush_list);
2077 - ut_a(buf_page_in_file(bpage));
2078 + //ut_a(buf_page_in_file(bpage)); /* optimistic */
2081 if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
2082 @@ -1744,7 +1820,7 @@
2083 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2086 - bpage = UT_LIST_GET_NEXT(list, bpage);
2087 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2089 ut_a(!bpage || om >= bpage->oldest_modification);
2091 @@ -1766,11 +1842,13 @@
2095 - buf_pool_mutex_enter();
2096 + //buf_pool_mutex_enter();
2097 + mutex_enter(&flush_list_mutex);
2099 ret = buf_flush_validate_low();
2101 - buf_pool_mutex_exit();
2102 + //buf_pool_mutex_exit();
2103 + mutex_exit(&flush_list_mutex);
2107 --- a/storage/innodb_plugin/buf/buf0lru.c
2108 +++ b/storage/innodb_plugin/buf/buf0lru.c
2111 buf_LRU_block_free_hashed_page(
2112 /*===========================*/
2113 - buf_block_t* block); /*!< in: block, must contain a file page and
2114 + buf_block_t* block, /*!< in: block, must contain a file page and
2115 be in a state where it can be freed */
2116 + ibool have_page_hash_mutex);
2118 /******************************************************************//**
2119 Determines if the unzip_LRU list should be used for evicting a victim
2120 @@ -154,16 +155,21 @@
2121 @return TRUE if should use unzip_LRU */
2124 -buf_LRU_evict_from_unzip_LRU(void)
2125 +buf_LRU_evict_from_unzip_LRU(
2126 + ibool have_LRU_mutex)
2127 /*==============================*/
2132 - ut_ad(buf_pool_mutex_own());
2133 + //ut_ad(buf_pool_mutex_own());
2135 + if (!have_LRU_mutex)
2136 + mutex_enter(&LRU_list_mutex);
2137 /* If the unzip_LRU list is empty, we can only use the LRU. */
2138 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2139 + if (!have_LRU_mutex)
2140 + mutex_exit(&LRU_list_mutex);
2144 @@ -172,14 +178,20 @@
2145 decompressed pages in the buffer pool. */
2146 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2147 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2148 + if (!have_LRU_mutex)
2149 + mutex_exit(&LRU_list_mutex);
2153 /* If eviction hasn't started yet, we assume by default
2154 that a workload is disk bound. */
2155 if (buf_pool->freed_page_clock == 0) {
2156 + if (!have_LRU_mutex)
2157 + mutex_exit(&LRU_list_mutex);
2160 + if (!have_LRU_mutex)
2161 + mutex_exit(&LRU_list_mutex);
2163 /* Calculate the average over past intervals, and add the values
2164 of the current interval. */
2165 @@ -245,18 +257,25 @@
2167 page_arr = ut_malloc(sizeof(ulint)
2168 * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2169 - buf_pool_mutex_enter();
2170 + //buf_pool_mutex_enter();
2171 + mutex_enter(&LRU_list_mutex);
2175 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2177 while (bpage != NULL) {
2178 + /* bpage->state,space,io_fix,buf_fix_count are protected by block_mutex at XtraDB */
2179 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2180 buf_page_t* prev_bpage;
2183 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2185 + if (UNIV_UNLIKELY(!block_mutex)) {
2189 ut_a(buf_page_in_file(bpage));
2191 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2192 @@ -265,23 +284,27 @@
2193 /* Compressed pages are never hashed.
2194 Skip blocks of other tablespaces.
2195 Skip I/O-fixed blocks (to be dealt with later). */
2196 + mutex_exit(block_mutex);
2202 - mutex_enter(&((buf_block_t*) bpage)->mutex);
2203 + //mutex_enter(&((buf_block_t*) bpage)->mutex);
2204 is_fixed = bpage->buf_fix_count > 0
2205 || !((buf_block_t*) bpage)->is_hashed;
2206 - mutex_exit(&((buf_block_t*) bpage)->mutex);
2207 + //mutex_exit(&((buf_block_t*) bpage)->mutex);
2210 + mutex_exit(block_mutex);
2214 /* Store the page number so that we can drop the hash
2215 index in a batch later. */
2216 page_arr[num_entries] = bpage->offset;
2217 + mutex_exit(block_mutex);
2219 ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
2222 @@ -291,10 +314,12 @@
2224 /* Array full. We release the buf_pool_mutex to
2225 obey the latching order. */
2226 - buf_pool_mutex_exit();
2227 + //buf_pool_mutex_exit();
2228 + mutex_exit(&LRU_list_mutex);
2229 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
2231 - buf_pool_mutex_enter();
2232 + //buf_pool_mutex_enter();
2233 + mutex_enter(&LRU_list_mutex);
2236 /* Note that we released the buf_pool mutex above
2237 @@ -313,13 +338,23 @@
2238 /* If, however, bpage has been removed from LRU list
2239 to the free list then we should restart the scan.
2240 bpage->state is protected by buf_pool mutex. */
2242 + /* obtain block_mutex again to avoid race condition of bpage->state */
2243 + block_mutex = buf_page_get_mutex_enter(bpage);
2244 + if (!block_mutex) {
2249 && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2250 + mutex_exit(block_mutex);
2253 + mutex_exit(block_mutex);
2256 - buf_pool_mutex_exit();
2257 + //buf_pool_mutex_exit();
2258 + mutex_exit(&LRU_list_mutex);
2260 /* Drop any remaining batch of search hashed pages. */
2261 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2263 buf_LRU_drop_page_hash_for_tablespace(id);
2266 - buf_pool_mutex_enter();
2267 + //buf_pool_mutex_enter();
2268 + mutex_enter(&LRU_list_mutex);
2269 + rw_lock_x_lock(&page_hash_latch);
2273 @@ -377,8 +414,15 @@
2277 - block_mutex = buf_page_get_mutex(bpage);
2278 - mutex_enter(block_mutex);
2279 + block_mutex = buf_page_get_mutex_enter(bpage);
2281 + if (!block_mutex) {
2282 + /* It may be impossible case...
2283 + Something wrong, so will be scan_again */
2285 + all_freed = FALSE;
2289 if (bpage->buf_fix_count > 0) {
2295 - buf_pool_mutex_exit();
2296 + //buf_pool_mutex_exit();
2297 + mutex_exit(&LRU_list_mutex);
2298 + rw_lock_x_unlock(&page_hash_latch);
2300 zip_size = buf_page_get_zip_size(bpage);
2301 page_no = buf_page_get_page_no(bpage);
2304 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2305 != BUF_BLOCK_ZIP_FREE) {
2306 - buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2307 + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, TRUE);
2308 mutex_exit(block_mutex);
2310 /* The block_mutex should have been released
2315 - buf_pool_mutex_exit();
2316 + //buf_pool_mutex_exit();
2317 + mutex_exit(&LRU_list_mutex);
2318 + rw_lock_x_unlock(&page_hash_latch);
2321 os_thread_sleep(20000);
2326 - ut_ad(buf_pool_mutex_own());
2327 + //ut_ad(buf_pool_mutex_own());
2328 + ut_ad(mutex_own(&LRU_list_mutex));
2329 + ut_ad(mutex_own(&flush_list_mutex));
2330 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2332 /* Find the first successor of bpage in the LRU list
2333 @@ -476,17 +526,17 @@
2336 b = UT_LIST_GET_NEXT(LRU, b);
2337 - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2338 + } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2340 /* Insert bpage before b, i.e., after the predecessor of b. */
2342 - b = UT_LIST_GET_PREV(list, b);
2343 + b = UT_LIST_GET_PREV(zip_list, b);
2347 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2348 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2350 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2351 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2354 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2355 @@ -499,16 +549,17 @@
2357 buf_LRU_free_from_unzip_LRU_list(
2358 /*=============================*/
2359 - ulint n_iterations) /*!< in: how many times this has been called
2360 + ulint n_iterations, /*!< in: how many times this has been called
2361 repeatedly without result: a high value means
2362 that we should search farther; we will search
2363 n_iterations / 5 of the unzip_LRU list,
2364 or nothing if n_iterations >= 5 */
2365 + ibool have_LRU_mutex)
2370 - ut_ad(buf_pool_mutex_own());
2371 + //ut_ad(buf_pool_mutex_own()); /* optimistic */
2373 /* Theoratically it should be much easier to find a victim
2374 from unzip_LRU as we can choose even a dirty block (as we'll
2376 if we have done five iterations so far. */
2378 if (UNIV_UNLIKELY(n_iterations >= 5)
2379 - || !buf_LRU_evict_from_unzip_LRU()) {
2380 + || !buf_LRU_evict_from_unzip_LRU(have_LRU_mutex)) {
2384 @@ -526,18 +577,25 @@
2385 distance = 100 + (n_iterations
2386 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2389 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2390 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2391 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2395 + mutex_enter(&block->mutex);
2396 + if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2397 + || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2398 + mutex_exit(&block->mutex);
2402 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2403 ut_ad(block->in_unzip_LRU_list);
2404 ut_ad(block->page.in_LRU_list);
2406 - mutex_enter(&block->mutex);
2407 - freed = buf_LRU_free_block(&block->page, FALSE);
2408 + freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
2409 mutex_exit(&block->mutex);
2412 @@ -555,34 +613,45 @@
2414 buf_LRU_free_from_common_LRU_list(
2415 /*==============================*/
2416 - ulint n_iterations) /*!< in: how many times this has been called
2417 + ulint n_iterations, /*!< in: how many times this has been called
2418 repeatedly without result: a high value means
2419 that we should search farther; if
2420 n_iterations < 10, then we search
2421 n_iterations / 10 * buf_pool->curr_size
2422 pages from the end of the LRU list */
2423 + ibool have_LRU_mutex)
2428 - ut_ad(buf_pool_mutex_own());
2429 + //ut_ad(buf_pool_mutex_own()); /* optimistic */
2431 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2434 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2435 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2436 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2440 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2441 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2443 + if (!block_mutex) {
2447 + if (!bpage->in_LRU_list
2448 + || !buf_page_in_file(bpage)) {
2449 + mutex_exit(block_mutex);
2453 ut_ad(buf_page_in_file(bpage));
2454 ut_ad(bpage->in_LRU_list);
2456 - mutex_enter(block_mutex);
2457 accessed = buf_page_is_accessed(bpage);
2458 - freed = buf_LRU_free_block(bpage, TRUE);
2459 + freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
2460 mutex_exit(block_mutex);
2463 @@ -616,22 +685,33 @@
2464 n_iterations / 5 of the unzip_LRU list. */
2466 ibool freed = FALSE;
2467 + ibool have_LRU_mutex = FALSE;
2469 - buf_pool_mutex_enter();
2470 + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2471 + have_LRU_mutex = TRUE;
2473 - freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
2474 + /* optimistic search... */
2475 + //buf_pool_mutex_enter();
2476 + if (have_LRU_mutex)
2477 + mutex_enter(&LRU_list_mutex);
2479 + freed = buf_LRU_free_from_unzip_LRU_list(n_iterations, have_LRU_mutex);
2482 - freed = buf_LRU_free_from_common_LRU_list(n_iterations);
2483 + freed = buf_LRU_free_from_common_LRU_list(n_iterations, have_LRU_mutex);
2486 + mutex_enter(&buf_pool_mutex);
2488 buf_pool->LRU_flush_ended = 0;
2489 } else if (buf_pool->LRU_flush_ended > 0) {
2490 buf_pool->LRU_flush_ended--;
2492 + mutex_exit(&buf_pool_mutex);
2494 - buf_pool_mutex_exit();
2495 + //buf_pool_mutex_exit();
2496 + if (have_LRU_mutex)
2497 + mutex_exit(&LRU_list_mutex);
2501 @@ -649,18 +729,22 @@
2502 buf_LRU_try_free_flushed_blocks(void)
2503 /*=================================*/
2505 - buf_pool_mutex_enter();
2506 + //buf_pool_mutex_enter();
2507 + mutex_enter(&buf_pool_mutex);
2509 while (buf_pool->LRU_flush_ended > 0) {
2511 - buf_pool_mutex_exit();
2512 + //buf_pool_mutex_exit();
2513 + mutex_exit(&buf_pool_mutex);
2515 buf_LRU_search_and_free_block(1);
2517 - buf_pool_mutex_enter();
2518 + //buf_pool_mutex_enter();
2519 + mutex_enter(&buf_pool_mutex);
2522 - buf_pool_mutex_exit();
2523 + //buf_pool_mutex_exit();
2524 + mutex_exit(&buf_pool_mutex);
2527 /******************************************************************//**
2532 - buf_pool_mutex_enter();
2533 + //buf_pool_mutex_enter();
2534 + mutex_enter(&LRU_list_mutex);
2535 + mutex_enter(&free_list_mutex);
2537 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2538 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
2543 - buf_pool_mutex_exit();
2544 + //buf_pool_mutex_exit();
2545 + mutex_exit(&LRU_list_mutex);
2546 + mutex_exit(&free_list_mutex);
2550 @@ -699,9 +787,10 @@
2554 - ut_ad(buf_pool_mutex_own());
2555 + //ut_ad(buf_pool_mutex_own());
2557 - block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2558 + mutex_enter(&free_list_mutex);
2559 + block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2562 ut_ad(block->page.in_free_list);
2564 ut_ad(!block->page.in_flush_list);
2565 ut_ad(!block->page.in_LRU_list);
2566 ut_a(!buf_page_in_file(&block->page));
2567 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2568 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2570 + mutex_exit(&free_list_mutex);
2572 mutex_enter(&block->mutex);
2575 UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
2577 mutex_exit(&block->mutex);
2579 + mutex_exit(&free_list_mutex);
2584 ibool mon_value_was = FALSE;
2585 ibool started_monitor = FALSE;
2587 - buf_pool_mutex_enter();
2588 + //buf_pool_mutex_enter();
2590 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2591 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2594 /* If there is a block in the free list, take it */
2595 block = buf_LRU_get_free_only();
2596 - buf_pool_mutex_exit();
2597 + //buf_pool_mutex_exit();
2600 memset(&block->page.zip, 0, sizeof block->page.zip);
2601 @@ -868,18 +961,21 @@
2603 os_aio_simulated_wake_handler_threads();
2605 - buf_pool_mutex_enter();
2606 + //buf_pool_mutex_enter();
2607 + mutex_enter(&buf_pool_mutex);
2609 if (buf_pool->LRU_flush_ended > 0) {
2610 /* We have written pages in an LRU flush. To make the insert
2611 buffer more efficient, we try to move these pages to the free
2614 - buf_pool_mutex_exit();
2615 + //buf_pool_mutex_exit();
2616 + mutex_exit(&buf_pool_mutex);
2618 buf_LRU_try_free_flushed_blocks();
2620 - buf_pool_mutex_exit();
2621 + //buf_pool_mutex_exit();
2622 + mutex_exit(&buf_pool_mutex);
2625 if (n_iterations > 10) {
2626 @@ -904,7 +1000,8 @@
2629 ut_a(buf_pool->LRU_old);
2630 - ut_ad(buf_pool_mutex_own());
2631 + //ut_ad(buf_pool_mutex_own());
2632 + ut_ad(mutex_own(&LRU_list_mutex));
2633 ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2634 ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2635 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2636 @@ -969,7 +1066,8 @@
2640 - ut_ad(buf_pool_mutex_own());
2641 + //ut_ad(buf_pool_mutex_own());
2642 + ut_ad(mutex_own(&LRU_list_mutex));
2643 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2645 /* We first initialize all blocks in the LRU list as old and then use
2646 @@ -1002,13 +1100,14 @@
2649 ut_ad(buf_page_in_file(bpage));
2650 - ut_ad(buf_pool_mutex_own());
2651 + //ut_ad(buf_pool_mutex_own());
2652 + ut_ad(mutex_own(&LRU_list_mutex));
2654 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2655 buf_block_t* block = (buf_block_t*) bpage;
2657 ut_ad(block->in_unzip_LRU_list);
2658 - ut_d(block->in_unzip_LRU_list = FALSE);
2659 + block->in_unzip_LRU_list = FALSE;
2661 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2663 @@ -1024,7 +1123,8 @@
2667 - ut_ad(buf_pool_mutex_own());
2668 + //ut_ad(buf_pool_mutex_own());
2669 + ut_ad(mutex_own(&LRU_list_mutex));
2671 ut_a(buf_page_in_file(bpage));
2673 @@ -1099,12 +1199,13 @@
2677 - ut_ad(buf_pool_mutex_own());
2678 + //ut_ad(buf_pool_mutex_own());
2679 + ut_ad(mutex_own(&LRU_list_mutex));
2681 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2683 ut_ad(!block->in_unzip_LRU_list);
2684 - ut_d(block->in_unzip_LRU_list = TRUE);
2685 + block->in_unzip_LRU_list = TRUE;
2688 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2689 @@ -1123,7 +1224,8 @@
2693 - ut_ad(buf_pool_mutex_own());
2694 + //ut_ad(buf_pool_mutex_own());
2695 + ut_ad(mutex_own(&LRU_list_mutex));
2697 ut_a(buf_page_in_file(bpage));
2699 @@ -1172,7 +1274,8 @@
2703 - ut_ad(buf_pool_mutex_own());
2704 + //ut_ad(buf_pool_mutex_own());
2705 + ut_ad(mutex_own(&LRU_list_mutex));
2707 ut_a(buf_page_in_file(bpage));
2708 ut_ad(!bpage->in_LRU_list);
2709 @@ -1249,7 +1352,8 @@
2710 /*=====================*/
2711 buf_page_t* bpage) /*!< in: control block */
2713 - ut_ad(buf_pool_mutex_own());
2714 + //ut_ad(buf_pool_mutex_own());
2715 + ut_ad(mutex_own(&LRU_list_mutex));
2718 buf_pool->stat.n_pages_made_young++;
2719 @@ -1288,16 +1392,17 @@
2722 buf_page_t* bpage, /*!< in: block to be freed */
2723 - ibool zip) /*!< in: TRUE if should remove also the
2724 + ibool zip, /*!< in: TRUE if should remove also the
2725 compressed page of an uncompressed page */
2726 + ibool have_LRU_mutex)
2728 buf_page_t* b = NULL;
2729 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2731 - ut_ad(buf_pool_mutex_own());
2732 + //ut_ad(buf_pool_mutex_own());
2733 ut_ad(mutex_own(block_mutex));
2734 ut_ad(buf_page_in_file(bpage));
2735 - ut_ad(bpage->in_LRU_list);
2736 + //ut_ad(bpage->in_LRU_list);
2737 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2738 #if UNIV_WORD_SIZE == 4
2739 /* On 32-bit systems, there is no padding in buf_page_t. On
2740 @@ -1306,7 +1411,7 @@
2741 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2744 - if (!buf_page_can_relocate(bpage)) {
2745 + if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2747 /* Do not free buffer-fixed or I/O-fixed blocks. */
2749 @@ -1340,7 +1445,7 @@
2751 b = buf_page_alloc_descriptor();
2753 - memcpy(b, bpage, sizeof *b);
2754 + //memcpy(b, bpage, sizeof *b);
2758 @@ -1351,6 +1456,39 @@
2760 #endif /* UNIV_DEBUG */
2762 + /* not to break latch order, must re-enter block_mutex */
2763 + mutex_exit(block_mutex);
2765 + if (!have_LRU_mutex)
2766 + mutex_enter(&LRU_list_mutex); /* optimistic */
2767 + rw_lock_x_lock(&page_hash_latch);
2768 + mutex_enter(block_mutex);
2770 + /* recheck states of block */
2771 + if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2772 + || !buf_page_can_relocate(bpage)) {
2775 + buf_buddy_free(b, sizeof *b, TRUE);
2777 + if (!have_LRU_mutex)
2778 + mutex_exit(&LRU_list_mutex);
2779 + rw_lock_x_unlock(&page_hash_latch);
2781 + } else if (zip || !bpage->zip.data) {
2782 + if (bpage->oldest_modification)
2784 + } else if (bpage->oldest_modification) {
2785 + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2786 + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2792 + memcpy(b, bpage, sizeof *b);
2795 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2796 != BUF_BLOCK_ZIP_FREE) {
2797 ut_a(bpage->buf_fix_count == 0);
2798 @@ -1362,6 +1500,10 @@
2800 ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
2802 + while (prev_b && !prev_b->in_LRU_list) {
2803 + prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2806 b->state = b->oldest_modification
2807 ? BUF_BLOCK_ZIP_DIRTY
2808 : BUF_BLOCK_ZIP_PAGE;
2809 @@ -1437,6 +1579,7 @@
2810 buf_LRU_add_block_low(b, buf_page_is_old(b));
2813 + mutex_enter(&flush_list_mutex);
2814 if (b->state == BUF_BLOCK_ZIP_PAGE) {
2815 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2816 buf_LRU_insert_zip_clean(b);
2817 @@ -1445,6 +1588,7 @@
2818 /* Relocate on buf_pool->flush_list. */
2819 buf_flush_relocate_on_flush_list(bpage, b);
2821 + mutex_exit(&flush_list_mutex);
2823 bpage->zip.data = NULL;
2824 page_zip_set_size(&bpage->zip, 0);
2825 @@ -1456,7 +1600,9 @@
2826 b->io_fix = BUF_IO_READ;
2829 - buf_pool_mutex_exit();
2830 + //buf_pool_mutex_exit();
2831 + mutex_exit(&LRU_list_mutex);
2832 + rw_lock_x_unlock(&page_hash_latch);
2833 mutex_exit(block_mutex);
2835 /* Remove possible adaptive hash index on the page.
2836 @@ -1488,7 +1634,9 @@
2837 : BUF_NO_CHECKSUM_MAGIC);
2840 - buf_pool_mutex_enter();
2841 + //buf_pool_mutex_enter();
2842 + if (have_LRU_mutex)
2843 + mutex_enter(&LRU_list_mutex);
2844 mutex_enter(block_mutex);
2847 @@ -1498,13 +1646,17 @@
2848 mutex_exit(&buf_pool_zip_mutex);
2851 - buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2852 + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
2854 /* The block_mutex should have been released by
2855 buf_LRU_block_remove_hashed_page() when it returns
2856 BUF_BLOCK_ZIP_FREE. */
2857 ut_ad(block_mutex == &buf_pool_zip_mutex);
2858 mutex_enter(block_mutex);
2860 + if (!have_LRU_mutex)
2861 + mutex_exit(&LRU_list_mutex);
2862 + rw_lock_x_unlock(&page_hash_latch);
2866 @@ -1516,12 +1668,13 @@
2868 buf_LRU_block_free_non_file_page(
2869 /*=============================*/
2870 - buf_block_t* block) /*!< in: block, must not contain a file page */
2871 + buf_block_t* block, /*!< in: block, must not contain a file page */
2872 + ibool have_page_hash_mutex)
2877 - ut_ad(buf_pool_mutex_own());
2878 + //ut_ad(buf_pool_mutex_own());
2879 ut_ad(mutex_own(&block->mutex));
2881 switch (buf_block_get_state(block)) {
2882 @@ -1555,15 +1708,17 @@
2884 block->page.zip.data = NULL;
2885 mutex_exit(&block->mutex);
2886 - buf_pool_mutex_exit_forbid();
2887 - buf_buddy_free(data, page_zip_get_size(&block->page.zip));
2888 - buf_pool_mutex_exit_allow();
2889 + //buf_pool_mutex_exit_forbid();
2890 + buf_buddy_free(data, page_zip_get_size(&block->page.zip), have_page_hash_mutex);
2891 + //buf_pool_mutex_exit_allow();
2892 mutex_enter(&block->mutex);
2893 page_zip_set_size(&block->page.zip, 0);
2896 - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
2897 + mutex_enter(&free_list_mutex);
2898 + UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
2899 ut_d(block->page.in_free_list = TRUE);
2900 + mutex_exit(&free_list_mutex);
2902 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
2904 @@ -1590,7 +1745,11 @@
2906 const buf_page_t* hashed_bpage;
2908 - ut_ad(buf_pool_mutex_own());
2909 + //ut_ad(buf_pool_mutex_own());
2910 + ut_ad(mutex_own(&LRU_list_mutex));
2911 +#ifdef UNIV_SYNC_DEBUG
2912 + ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
2914 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2916 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
2917 @@ -1696,7 +1855,9 @@
2919 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2920 mutex_exit(buf_page_get_mutex(bpage));
2921 - buf_pool_mutex_exit();
2922 + //buf_pool_mutex_exit();
2923 + mutex_exit(&LRU_list_mutex);
2924 + rw_lock_x_unlock(&page_hash_latch);
2928 @@ -1720,14 +1881,14 @@
2929 ut_a(buf_page_get_zip_size(bpage));
2931 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2932 - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
2933 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
2934 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2936 mutex_exit(&buf_pool_zip_mutex);
2937 - buf_pool_mutex_exit_forbid();
2938 + //buf_pool_mutex_exit_forbid();
2939 buf_buddy_free(bpage->zip.data,
2940 - page_zip_get_size(&bpage->zip));
2941 - buf_pool_mutex_exit_allow();
2942 + page_zip_get_size(&bpage->zip), TRUE);
2943 + //buf_pool_mutex_exit_allow();
2944 buf_page_free_descriptor(bpage);
2945 return(BUF_BLOCK_ZIP_FREE);
2947 @@ -1749,9 +1910,9 @@
2948 ut_ad(!bpage->in_flush_list);
2949 ut_ad(!bpage->in_LRU_list);
2950 mutex_exit(&((buf_block_t*) bpage)->mutex);
2951 - buf_pool_mutex_exit_forbid();
2952 - buf_buddy_free(data, page_zip_get_size(&bpage->zip));
2953 - buf_pool_mutex_exit_allow();
2954 + //buf_pool_mutex_exit_forbid();
2955 + buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE);
2956 + //buf_pool_mutex_exit_allow();
2957 mutex_enter(&((buf_block_t*) bpage)->mutex);
2958 page_zip_set_size(&bpage->zip, 0);
2960 @@ -1777,15 +1938,16 @@
2962 buf_LRU_block_free_hashed_page(
2963 /*===========================*/
2964 - buf_block_t* block) /*!< in: block, must contain a file page and
2965 + buf_block_t* block, /*!< in: block, must contain a file page and
2966 be in a state where it can be freed */
2967 + ibool have_page_hash_mutex)
2969 - ut_ad(buf_pool_mutex_own());
2970 + //ut_ad(buf_pool_mutex_own());
2971 ut_ad(mutex_own(&block->mutex));
2973 buf_block_set_state(block, BUF_BLOCK_MEMORY);
2975 - buf_LRU_block_free_non_file_page(block);
2976 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
2979 /**********************************************************************//**
2980 @@ -1811,7 +1973,8 @@
2984 - buf_pool_mutex_enter();
2985 + //buf_pool_mutex_enter();
2986 + mutex_enter(&LRU_list_mutex);
2988 if (ratio != buf_LRU_old_ratio) {
2989 buf_LRU_old_ratio = ratio;
2990 @@ -1822,7 +1985,8 @@
2994 - buf_pool_mutex_exit();
2995 + //buf_pool_mutex_exit();
2996 + mutex_exit(&LRU_list_mutex);
2998 buf_LRU_old_ratio = ratio;
3000 @@ -1848,7 +2012,8 @@
3004 - buf_pool_mutex_enter();
3005 + //buf_pool_mutex_enter();
3006 + mutex_enter(&buf_pool_mutex);
3008 /* Update the index. */
3009 item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
3010 @@ -1869,7 +2034,8 @@
3011 /* Put current entry in the array. */
3012 memcpy(item, &cur_stat, sizeof *item);
3014 - buf_pool_mutex_exit();
3015 + //buf_pool_mutex_exit();
3016 + mutex_exit(&buf_pool_mutex);
3019 /* Clear the current entry. */
3020 @@ -1891,7 +2057,8 @@
3024 - buf_pool_mutex_enter();
3025 + //buf_pool_mutex_enter();
3026 + mutex_enter(&LRU_list_mutex);
3028 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3030 @@ -1951,16 +2118,22 @@
3032 ut_a(buf_pool->LRU_old_len == old_len);
3034 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3035 + mutex_exit(&LRU_list_mutex);
3036 + mutex_enter(&free_list_mutex);
3038 + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3039 ut_ad(ut_list_node_313->in_free_list));
3041 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3043 - bpage = UT_LIST_GET_NEXT(list, bpage)) {
3044 + bpage = UT_LIST_GET_NEXT(free, bpage)) {
3046 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3049 + mutex_exit(&free_list_mutex);
3050 + mutex_enter(&LRU_list_mutex);
3052 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3053 ut_ad(ut_list_node_313->in_unzip_LRU_list
3054 && ut_list_node_313->page.in_LRU_list));
3055 @@ -1974,7 +2147,8 @@
3056 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3059 - buf_pool_mutex_exit();
3060 + //buf_pool_mutex_exit();
3061 + mutex_exit(&LRU_list_mutex);
3064 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3065 @@ -1990,7 +2164,8 @@
3066 const buf_page_t* bpage;
3069 - buf_pool_mutex_enter();
3070 + //buf_pool_mutex_enter();
3071 + mutex_enter(&LRU_list_mutex);
3073 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3075 @@ -2047,6 +2222,7 @@
3076 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3079 - buf_pool_mutex_exit();
3080 + //buf_pool_mutex_exit();
3081 + mutex_exit(&LRU_list_mutex);
3083 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
3084 --- a/storage/innodb_plugin/buf/buf0rea.c
3085 +++ b/storage/innodb_plugin/buf/buf0rea.c
3086 @@ -233,18 +233,22 @@
3087 high = fil_space_get_size(space);
3090 - buf_pool_mutex_enter();
3091 + //buf_pool_mutex_enter();
3092 + mutex_enter(&buf_pool_mutex);
3094 if (buf_pool->n_pend_reads
3095 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
3096 - buf_pool_mutex_exit();
3097 + //buf_pool_mutex_exit();
3098 + mutex_exit(&buf_pool_mutex);
3102 + mutex_exit(&buf_pool_mutex);
3104 /* Count how many blocks in the area have been recently accessed,
3105 that is, reside near the start of the LRU list. */
3107 + rw_lock_s_lock(&page_hash_latch);
3108 for (i = low; i < high; i++) {
3109 const buf_page_t* bpage = buf_page_hash_get(space, i);
3111 @@ -256,13 +260,15 @@
3113 if (recent_blocks >= BUF_READ_AHEAD_RANDOM_THRESHOLD) {
3115 - buf_pool_mutex_exit();
3116 + //buf_pool_mutex_exit();
3117 + rw_lock_s_unlock(&page_hash_latch);
3123 - buf_pool_mutex_exit();
3124 + //buf_pool_mutex_exit();
3125 + rw_lock_s_unlock(&page_hash_latch);
3129 @@ -460,10 +466,12 @@
3131 tablespace_version = fil_space_get_version(space);
3133 - buf_pool_mutex_enter();
3134 + //buf_pool_mutex_enter();
3135 + mutex_enter(&buf_pool_mutex);
3137 if (high > fil_space_get_size(space)) {
3138 - buf_pool_mutex_exit();
3139 + //buf_pool_mutex_exit();
3140 + mutex_exit(&buf_pool_mutex);
3141 /* The area is not whole, return */
3144 @@ -471,10 +479,12 @@
3146 if (buf_pool->n_pend_reads
3147 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
3148 - buf_pool_mutex_exit();
3149 + //buf_pool_mutex_exit();
3150 + mutex_exit(&buf_pool_mutex);
3154 + mutex_exit(&buf_pool_mutex);
3156 /* Check that almost all pages in the area have been accessed; if
3157 offset == low, the accesses must be in a descending order, otherwise,
3162 + rw_lock_s_lock(&page_hash_latch);
3163 for (i = low; i < high; i++) {
3164 bpage = buf_page_hash_get(space, i);
3168 if (fail_count > threshold) {
3169 /* Too many failures: return */
3170 - buf_pool_mutex_exit();
3171 + //buf_pool_mutex_exit();
3172 + rw_lock_s_unlock(&page_hash_latch);
3177 bpage = buf_page_hash_get(space, offset);
3179 if (bpage == NULL) {
3180 - buf_pool_mutex_exit();
3181 + //buf_pool_mutex_exit();
3182 + rw_lock_s_unlock(&page_hash_latch);
3187 pred_offset = fil_page_get_prev(frame);
3188 succ_offset = fil_page_get_next(frame);
3190 - buf_pool_mutex_exit();
3191 + //buf_pool_mutex_exit();
3192 + rw_lock_s_unlock(&page_hash_latch);
3194 if ((offset == low) && (succ_offset == offset + 1)) {
3196 --- a/storage/innodb_plugin/handler/i_s.cc
3197 +++ b/storage/innodb_plugin/handler/i_s.cc
3198 @@ -2229,7 +2229,8 @@
3200 RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
3202 - buf_pool_mutex_enter();
3203 + //buf_pool_mutex_enter();
3204 + mutex_enter(&zip_free_mutex);
3206 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3207 buf_buddy_stat_t* buddy_stat = &buf_buddy_stat[x];
3208 @@ -2255,7 +2256,8 @@
3212 - buf_pool_mutex_exit();
3213 + //buf_pool_mutex_exit();
3214 + mutex_exit(&zip_free_mutex);
3215 DBUG_RETURN(status);
3218 --- a/storage/innodb_plugin/handler/innodb_patch_info.h
3219 +++ b/storage/innodb_plugin/handler/innodb_patch_info.h
3221 {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
3222 {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
3223 {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
3224 +{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
3225 {NULL, NULL, NULL, NULL}
3227 --- a/storage/innodb_plugin/include/buf0buddy.h
3228 +++ b/storage/innodb_plugin/include/buf0buddy.h
3231 ulint size, /*!< in: compressed page size
3232 (between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
3233 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3234 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3235 TRUE if storage was allocated from the LRU list
3236 and buf_pool_mutex was temporarily released */
3237 + ibool have_page_hash_mutex)
3238 __attribute__((malloc, nonnull));
3239 /**********************************************************************//**
3243 void* buf, /*!< in: block to be freed, must not be
3244 pointed to by the buffer pool */
3245 - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3246 + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3247 + ibool have_page_hash_mutex)
3248 __attribute__((nonnull));
3250 /** Statistics of buddy blocks of a given size. */
3251 --- a/storage/innodb_plugin/include/buf0buddy.ic
3252 +++ b/storage/innodb_plugin/include/buf0buddy.ic
3254 /*================*/
3255 ulint i, /*!< in: index of buf_pool->zip_free[],
3256 or BUF_BUDDY_SIZES */
3257 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3258 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3259 TRUE if storage was allocated from the LRU list
3260 and buf_pool_mutex was temporarily released */
3261 + ibool have_page_hash_mutex)
3262 __attribute__((malloc, nonnull));
3264 /**********************************************************************//**
3267 void* buf, /*!< in: block to be freed, must not be
3268 pointed to by the buffer pool */
3269 - ulint i) /*!< in: index of buf_pool->zip_free[],
3270 + ulint i, /*!< in: index of buf_pool->zip_free[],
3271 or BUF_BUDDY_SIZES */
3272 + ibool have_page_hash_mutex)
3273 __attribute__((nonnull));
3275 /**********************************************************************//**
3278 ulint size, /*!< in: compressed page size
3279 (between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
3280 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3281 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3282 TRUE if storage was allocated from the LRU list
3283 and buf_pool_mutex was temporarily released */
3284 + ibool have_page_hash_mutex)
3286 - ut_ad(buf_pool_mutex_own());
3287 + //ut_ad(buf_pool_mutex_own());
3288 ut_ad(ut_is_2pow(size));
3289 ut_ad(size >= PAGE_ZIP_MIN_SIZE);
3290 ut_ad(size <= UNIV_PAGE_SIZE);
3292 - return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
3293 + return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3296 /**********************************************************************//**
3297 @@ -114,14 +117,27 @@
3299 void* buf, /*!< in: block to be freed, must not be
3300 pointed to by the buffer pool */
3301 - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3302 + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3303 + ibool have_page_hash_mutex)
3305 - ut_ad(buf_pool_mutex_own());
3306 + //ut_ad(buf_pool_mutex_own());
3307 ut_ad(ut_is_2pow(size));
3308 ut_ad(size >= PAGE_ZIP_MIN_SIZE);
3309 ut_ad(size <= UNIV_PAGE_SIZE);
3311 - buf_buddy_free_low(buf, buf_buddy_get_slot(size));
3312 + if (!have_page_hash_mutex) {
3313 + mutex_enter(&LRU_list_mutex);
3314 + rw_lock_x_lock(&page_hash_latch);
3317 + mutex_enter(&zip_free_mutex);
3318 + buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
3319 + mutex_exit(&zip_free_mutex);
3321 + if (!have_page_hash_mutex) {
3322 + mutex_exit(&LRU_list_mutex);
3323 + rw_lock_x_unlock(&page_hash_latch);
3327 #ifdef UNIV_MATERIALIZE
3328 --- a/storage/innodb_plugin/include/buf0buf.h
3329 +++ b/storage/innodb_plugin/include/buf0buf.h
3330 @@ -761,6 +761,15 @@
3331 const buf_page_t* bpage) /*!< in: pointer to control block */
3332 __attribute__((pure));
3334 +/*************************************************************************
3335 +Gets the mutex of a block and enter the mutex with consistency. */
3338 +buf_page_get_mutex_enter(
3339 +/*=========================*/
3340 + const buf_page_t* bpage) /*!< in: pointer to control block */
3341 + __attribute__((pure));
3343 /*********************************************************************//**
3344 Get the flush type of a page.
3345 @return flush type */
3346 @@ -1114,7 +1123,7 @@
3347 All these are protected by buf_pool_mutex. */
3350 - UT_LIST_NODE_T(buf_page_t) list;
3351 + /* UT_LIST_NODE_T(buf_page_t) list; */
3352 /*!< based on state, this is a
3353 list node, protected only by
3354 buf_pool_mutex, in one of the
3355 @@ -1134,6 +1143,10 @@
3356 BUF_BLOCK_REMOVE_HASH or
3357 BUF_BLOCK_READY_IN_USE. */
3359 + /* resplit for optimistic use */
3360 + UT_LIST_NODE_T(buf_page_t) free;
3361 + UT_LIST_NODE_T(buf_page_t) flush_list;
3362 + UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3364 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3365 when buf_pool_mutex is free, the
3366 @@ -1214,11 +1227,11 @@
3367 a block is in the unzip_LRU list
3368 if page.state == BUF_BLOCK_FILE_PAGE
3369 and page.zip.data != NULL */
3371 +//#ifdef UNIV_DEBUG
3372 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3373 decompressed LRU list;
3374 used in debugging */
3375 -#endif /* UNIV_DEBUG */
3376 +//#endif /* UNIV_DEBUG */
3377 mutex_t mutex; /*!< mutex protecting this block:
3378 state (also protected by the buffer
3379 pool mutex), io_fix, buf_fix_count,
3380 @@ -1498,6 +1511,12 @@
3381 /** mutex protecting the buffer pool struct and control blocks, except the
3382 read-write lock in them */
3383 extern mutex_t buf_pool_mutex;
3384 +extern mutex_t LRU_list_mutex;
3385 +extern mutex_t flush_list_mutex;
3386 +extern rw_lock_t page_hash_latch;
3387 +extern mutex_t free_list_mutex;
3388 +extern mutex_t zip_free_mutex;
3389 +extern mutex_t zip_hash_mutex;
3390 /** mutex protecting the control blocks of compressed-only pages
3391 (of type buf_page_t, not buf_block_t) */
3392 extern mutex_t buf_pool_zip_mutex;
3393 @@ -1509,8 +1528,8 @@
3394 /** Test if buf_pool_mutex is owned. */
3395 #define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
3396 /** Acquire the buffer pool mutex. */
3397 +/* the buf_pool_mutex is changed the latch order */
3398 #define buf_pool_mutex_enter() do { \
3399 - ut_ad(!mutex_own(&buf_pool_zip_mutex)); \
3400 mutex_enter(&buf_pool_mutex); \
3403 --- a/storage/innodb_plugin/include/buf0buf.ic
3404 +++ b/storage/innodb_plugin/include/buf0buf.ic
3409 - buf_pool_mutex_enter();
3411 + //buf_pool_mutex_enter();
3412 + mutex_enter(&flush_list_mutex);
3414 bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
3416 @@ -146,9 +148,14 @@
3418 ut_ad(bpage->in_flush_list);
3419 lsn = bpage->oldest_modification;
3421 + mutex_exit(&flush_list_mutex);
3426 - buf_pool_mutex_exit();
3427 + //buf_pool_mutex_exit();
3428 + mutex_exit(&flush_list_mutex);
3430 /* The returned answer may be out of date: the flush_list can
3431 change after the mutex has been released. */
3433 case BUF_BLOCK_ZIP_FREE:
3434 /* This is a free page in buf_pool->zip_free[].
3435 Such pages should only be accessed by the buddy allocator. */
3437 + /* ut_error; */ /* optimistic */
3439 case BUF_BLOCK_ZIP_PAGE:
3440 case BUF_BLOCK_ZIP_DIRTY:
3443 switch (buf_page_get_state(bpage)) {
3444 case BUF_BLOCK_ZIP_FREE:
3446 + /* ut_error; */ /* optimistic */
3448 case BUF_BLOCK_ZIP_PAGE:
3449 case BUF_BLOCK_ZIP_DIRTY:
3450 @@ -321,6 +328,28 @@
3454 +/*************************************************************************
3455 +Gets the mutex of a block and enter the mutex with consistency. */
3458 +buf_page_get_mutex_enter(
3459 +/*=========================*/
3460 + const buf_page_t* bpage) /*!< in: pointer to control block */
3462 + mutex_t* block_mutex;
3465 + block_mutex = buf_page_get_mutex(bpage);
3467 + return block_mutex;
3469 + mutex_enter(block_mutex);
3470 + if (block_mutex == buf_page_get_mutex(bpage))
3471 + return block_mutex;
3472 + mutex_exit(block_mutex);
3476 /*********************************************************************//**
3477 Get the flush type of a page.
3478 @return flush type */
3480 buf_page_t* bpage, /*!< in/out: control block */
3481 enum buf_io_fix io_fix) /*!< in: io_fix state */
3483 - ut_ad(buf_pool_mutex_own());
3484 + //ut_ad(buf_pool_mutex_own());
3485 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3487 bpage->io_fix = io_fix;
3488 @@ -444,12 +473,13 @@
3489 /*==================*/
3490 const buf_page_t* bpage) /*!< control block being relocated */
3492 - ut_ad(buf_pool_mutex_own());
3493 + //ut_ad(buf_pool_mutex_own());
3494 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3495 ut_ad(buf_page_in_file(bpage));
3496 - ut_ad(bpage->in_LRU_list);
3498 + //ut_ad(bpage->in_LRU_list);
3500 - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3501 + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3502 && bpage->buf_fix_count == 0);
3506 const buf_page_t* bpage) /*!< in: control block */
3508 ut_ad(buf_page_in_file(bpage));
3509 - ut_ad(buf_pool_mutex_own());
3510 + //ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */
3515 ibool old) /*!< in: old */
3517 ut_a(buf_page_in_file(bpage));
3518 - ut_ad(buf_pool_mutex_own());
3519 + //ut_ad(buf_pool_mutex_own());
3520 + ut_ad(mutex_own(&LRU_list_mutex));
3521 ut_ad(bpage->in_LRU_list);
3523 #ifdef UNIV_LRU_DEBUG
3525 ulint time_ms) /*!< in: ut_time_ms() */
3527 ut_a(buf_page_in_file(bpage));
3528 - ut_ad(buf_pool_mutex_own());
3529 + //ut_ad(buf_pool_mutex_own());
3530 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3532 if (!bpage->access_time) {
3533 /* Make this the time of the first access. */
3534 @@ -784,17 +816,17 @@
3536 buf_block_t* block) /*!< in, own: block to be freed */
3538 - buf_pool_mutex_enter();
3539 + //buf_pool_mutex_enter();
3541 mutex_enter(&block->mutex);
3543 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3545 - buf_LRU_block_free_non_file_page(block);
3546 + buf_LRU_block_free_non_file_page(block, FALSE);
3548 mutex_exit(&block->mutex);
3550 - buf_pool_mutex_exit();
3551 + //buf_pool_mutex_exit();
3553 #endif /* !UNIV_HOTBACKUP */
3555 @@ -842,17 +874,17 @@
3559 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
3560 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3562 - mutex_enter(block_mutex);
3564 - if (buf_page_in_file(bpage)) {
3565 + if (block_mutex && buf_page_in_file(bpage)) {
3566 lsn = bpage->newest_modification;
3571 - mutex_exit(block_mutex);
3572 + if (block_mutex) {
3573 + mutex_exit(block_mutex);
3579 buf_block_t* block) /*!< in: block */
3581 #ifdef UNIV_SYNC_DEBUG
3582 - ut_ad((buf_pool_mutex_own()
3583 + ut_ad((mutex_own(&LRU_list_mutex)
3584 && (block->page.buf_fix_count == 0))
3585 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3586 #endif /* UNIV_SYNC_DEBUG */
3587 @@ -947,7 +979,11 @@
3591 - ut_ad(buf_pool_mutex_own());
3592 + //ut_ad(buf_pool_mutex_own());
3593 +#ifdef UNIV_SYNC_DEBUG
3594 + ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
3595 + || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
3598 /* Look for the page in the hash table */
3600 @@ -1002,11 +1038,13 @@
3602 const buf_page_t* bpage;
3604 - buf_pool_mutex_enter();
3605 + //buf_pool_mutex_enter();
3606 + rw_lock_s_lock(&page_hash_latch);
3608 bpage = buf_page_hash_get(space, offset);
3610 - buf_pool_mutex_exit();
3611 + //buf_pool_mutex_exit();
3612 + rw_lock_s_unlock(&page_hash_latch);
3614 return(bpage != NULL);
3616 @@ -1061,18 +1099,21 @@
3617 buf_block_t* block, /*!< in: buffer block */
3618 ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
3620 - mtr_t* mtr) /*!< in: mtr */
3621 + mtr_t* mtr __attribute__((unused))) /*!< in: mtr */
3625 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3626 ut_a(block->page.buf_fix_count > 0);
3628 + /* buf_flush_note_modification() should be called before this function. */
3630 if (rw_latch == RW_X_LATCH && mtr->modifications) {
3631 buf_pool_mutex_enter();
3632 buf_flush_note_modification(block, mtr);
3633 buf_pool_mutex_exit();
3637 mutex_enter(&block->mutex);
3639 --- a/storage/innodb_plugin/include/buf0flu.ic
3640 +++ b/storage/innodb_plugin/include/buf0flu.ic
3642 buf_block_t* block, /*!< in: block which is modified */
3643 mtr_t* mtr) /*!< in: mtr */
3645 + ibool use_LRU_mutex = FALSE;
3647 + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
3648 + use_LRU_mutex = TRUE;
3650 + if (use_LRU_mutex)
3651 + mutex_enter(&LRU_list_mutex);
3653 + mutex_enter(&block->mutex);
3656 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3657 ut_ad(block->page.buf_fix_count > 0);
3658 #ifdef UNIV_SYNC_DEBUG
3659 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
3660 #endif /* UNIV_SYNC_DEBUG */
3661 - ut_ad(buf_pool_mutex_own());
3662 + //ut_ad(buf_pool_mutex_own());
3664 ut_ad(mtr->start_lsn != 0);
3665 ut_ad(mtr->modifications);
3667 block->page.newest_modification = mtr->end_lsn;
3669 if (!block->page.oldest_modification) {
3670 + mutex_enter(&flush_list_mutex);
3672 block->page.oldest_modification = mtr->start_lsn;
3673 ut_ad(block->page.oldest_modification != 0);
3675 buf_flush_insert_into_flush_list(block);
3676 + mutex_exit(&flush_list_mutex);
3678 ut_ad(block->page.oldest_modification <= mtr->start_lsn);
3681 + mutex_exit(&block->mutex);
3683 ++srv_buf_pool_write_requests;
3685 + if (use_LRU_mutex)
3686 + mutex_exit(&LRU_list_mutex);
3689 /********************************************************************//**
3691 ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
3694 + ibool use_LRU_mutex = FALSE;
3696 + if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
3697 + use_LRU_mutex = TRUE;
3699 + if (use_LRU_mutex)
3700 + mutex_enter(&LRU_list_mutex);
3702 + mutex_enter(&(block->mutex));
3705 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
3706 ut_ad(block->page.buf_fix_count > 0);
3707 @@ -101,23 +128,28 @@
3708 ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
3709 #endif /* UNIV_SYNC_DEBUG */
3711 - buf_pool_mutex_enter();
3712 + //buf_pool_mutex_enter();
3714 ut_ad(block->page.newest_modification <= end_lsn);
3716 block->page.newest_modification = end_lsn;
3718 if (!block->page.oldest_modification) {
3719 + mutex_enter(&flush_list_mutex);
3721 block->page.oldest_modification = start_lsn;
3723 ut_ad(block->page.oldest_modification != 0);
3725 buf_flush_insert_sorted_into_flush_list(block);
3726 + mutex_exit(&flush_list_mutex);
3728 ut_ad(block->page.oldest_modification <= start_lsn);
3731 - buf_pool_mutex_exit();
3732 + //buf_pool_mutex_exit();
3733 + if (use_LRU_mutex)
3734 + mutex_exit(&LRU_list_mutex);
3735 + mutex_exit(&(block->mutex));
3737 #endif /* !UNIV_HOTBACKUP */
3738 --- a/storage/innodb_plugin/include/buf0lru.h
3739 +++ b/storage/innodb_plugin/include/buf0lru.h
3743 buf_page_t* bpage, /*!< in: block to be freed */
3744 - ibool zip) /*!< in: TRUE if should remove also the
3745 + ibool zip, /*!< in: TRUE if should remove also the
3746 compressed page of an uncompressed page */
3747 + ibool have_LRU_mutex)
3748 __attribute__((nonnull));
3749 /******************************************************************//**
3750 Try to free a replaceable block.
3753 buf_LRU_block_free_non_file_page(
3754 /*=============================*/
3755 - buf_block_t* block); /*!< in: block, must not contain a file page */
3756 + buf_block_t* block, /*!< in: block, must not contain a file page */
3757 + ibool have_page_hash_mutex);
3758 /******************************************************************//**
3759 Adds a block to the LRU list. */
3761 --- a/storage/innodb_plugin/include/sync0sync.h
3762 +++ b/storage/innodb_plugin/include/sync0sync.h
3763 @@ -489,8 +489,14 @@
3764 SYNC_SEARCH_SYS, as memory allocation
3765 can call routines there! Otherwise
3766 the level is SYNC_MEM_HASH. */
3767 +#define SYNC_BUF_LRU_LIST 157
3768 +#define SYNC_BUF_PAGE_HASH 156
3769 +#define SYNC_BUF_BLOCK 155
3770 +#define SYNC_BUF_FREE_LIST 153
3771 +#define SYNC_BUF_ZIP_FREE 152
3772 +#define SYNC_BUF_ZIP_HASH 151
3773 #define SYNC_BUF_POOL 150
3774 -#define SYNC_BUF_BLOCK 149
3775 +#define SYNC_BUF_FLUSH_LIST 149
3776 #define SYNC_DOUBLEWRITE 140
3777 #define SYNC_ANY_LATCH 135
3778 #define SYNC_THR_LOCAL 133
3780 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3781 when atomic operations are not enabled */
3783 - ulint waiters; /*!< This ulint is set to 1 if there are (or
3784 + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3785 may be) threads waiting in the global wait
3786 array for this mutex to be released.
3787 Otherwise, this is 0. */
3788 --- a/storage/innodb_plugin/mtr/mtr0mtr.c
3789 +++ b/storage/innodb_plugin/mtr/mtr0mtr.c
3791 #include "page0types.h"
3792 #include "mtr0log.h"
3793 #include "log0log.h"
3794 +#include "buf0flu.h"
3796 #ifndef UNIV_HOTBACKUP
3797 # include "log0recv.h"
3798 @@ -105,6 +106,38 @@
3804 +mtr_memo_note_modification_all(
3805 +/*===========================*/
3806 + mtr_t* mtr) /* in: mtr */
3808 + mtr_memo_slot_t* slot;
3809 + dyn_array_t* memo;
3813 + ut_ad(mtr->magic_n == MTR_MAGIC_N);
3814 + ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
3816 + ut_ad(mtr->modifications);
3818 + memo = &(mtr->memo);
3820 + offset = dyn_array_get_data_size(memo);
3822 + while (offset > 0) {
3823 + offset -= sizeof(mtr_memo_slot_t);
3824 + slot = dyn_array_get_element(memo, offset);
3826 + if (UNIV_LIKELY(slot->object != NULL) &&
3827 + slot->type == MTR_MEMO_PAGE_X_FIX) {
3828 + buf_flush_note_modification(
3829 + (buf_block_t*)slot->object, mtr);
3834 /************************************************************//**
3835 Writes the contents of a mini-transaction log, if any, to the database log. */
3840 mtr_log_reserve_and_write(mtr);
3842 + mtr_memo_note_modification_all(mtr);
3845 /* We first update the modification info to buffer pages, and only
3846 @@ -198,11 +233,13 @@
3847 required when we insert modified buffer pages in to the flush list
3848 which must be sorted on oldest_modification. */
3850 - mtr_memo_pop_all(mtr);
3856 + /* All unlocking has been moved here, after log_sys mutex release. */
3857 + mtr_memo_pop_all(mtr);
3859 #endif /* !UNIV_HOTBACKUP */
3861 ut_d(mtr->state = MTR_COMMITTED);
3862 @@ -239,6 +276,12 @@
3863 slot = dyn_array_get_element(memo, offset);
3865 if ((object == slot->object) && (type == slot->type)) {
3866 + if (mtr->modifications &&
3867 + UNIV_LIKELY(slot->object != NULL) &&
3868 + slot->type == MTR_MEMO_PAGE_X_FIX) {
3869 + buf_flush_note_modification(
3870 + (buf_block_t*)slot->object, mtr);
3873 mtr_memo_slot_release(mtr, slot);
3875 --- a/storage/innodb_plugin/srv/srv0srv.c
3876 +++ b/storage/innodb_plugin/srv/srv0srv.c
3877 @@ -2873,7 +2873,7 @@
3879 mutex_exit(&(log_sys->mutex));
3881 - buf_pool_mutex_enter();
3882 + mutex_enter(&flush_list_mutex);
3885 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3886 @@ -2895,7 +2895,7 @@
3890 - buf_pool_mutex_exit();
3891 + mutex_exit(&flush_list_mutex);
3893 if (!srv_use_doublewrite_buf) {
3894 /* flush is faster than when doublewrite */
3895 --- a/storage/innodb_plugin/sync/sync0sync.c
3896 +++ b/storage/innodb_plugin/sync/sync0sync.c
3898 mutex->lock_word = 0;
3900 mutex->event = os_event_create(NULL);
3901 - mutex_set_waiters(mutex, 0);
3902 + mutex->waiters = 0;
3904 mutex->magic_n = MUTEX_MAGIC_N;
3905 #endif /* UNIV_DEBUG */
3906 @@ -432,6 +432,15 @@
3907 mutex_t* mutex, /*!< in: mutex */
3908 ulint n) /*!< in: value to set */
3910 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3914 + os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3916 + os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3919 volatile ulint* ptr; /* declared volatile to ensure that
3920 the value is stored to memory */
3924 *ptr = n; /* Here we assume that the write of a single
3925 word in memory is atomic */
3929 /******************************************************************//**
3930 @@ -1158,6 +1168,12 @@
3931 case SYNC_TRX_SYS_HEADER:
3932 case SYNC_FILE_FORMAT_TAG:
3933 case SYNC_DOUBLEWRITE:
3934 + case SYNC_BUF_LRU_LIST:
3935 + case SYNC_BUF_FLUSH_LIST:
3936 + case SYNC_BUF_PAGE_HASH:
3937 + case SYNC_BUF_FREE_LIST:
3938 + case SYNC_BUF_ZIP_FREE:
3939 + case SYNC_BUF_ZIP_HASH:
3941 case SYNC_SEARCH_SYS:
3942 case SYNC_SEARCH_SYS_CONF:
3943 @@ -1186,7 +1202,7 @@
3944 buffer block (block->mutex or buf_pool_zip_mutex). */
3945 if (!sync_thread_levels_g(array, level, FALSE)) {
3946 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3947 - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3948 + ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));