1 # name : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
15 - buf_pool_mutex_enter(buf_pool);
16 + //buf_pool_mutex_enter(buf_pool);
17 + mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
20 /* Only free the block if it is still allocated to
21 @@ -4053,17 +4054,22 @@
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
25 - if (buf_LRU_free_block(&block->page, all, NULL)
26 + if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
28 - && all && block->page.zip.data) {
29 + && all && block->page.zip.data
30 + /* Now, buf_LRU_free_block() may release mutex temporarily */
31 + && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32 + && buf_block_get_space(block) == space
33 + && buf_block_get_page_no(block) == page_no) {
34 /* Attempt to deallocate the uncompressed page
35 if the whole block cannot be deallocted. */
37 - buf_LRU_free_block(&block->page, FALSE, NULL);
38 + buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
42 - buf_pool_mutex_exit(buf_pool);
43 + //buf_pool_mutex_exit(buf_pool);
44 + mutex_exit(&buf_pool->LRU_list_mutex);
45 mutex_exit(&block->mutex);
48 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49 --- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
50 +++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
54 rw_lock_x_lock(&btr_search_latch);
55 - buf_pool_mutex_enter_all();
56 + //buf_pool_mutex_enter_all();
58 table = btr_search_sys->hash_index;
62 buf_pool = buf_pool_from_array(j);
64 + mutex_enter(&buf_pool->LRU_list_mutex);
66 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
68 while (bpage != NULL) {
69 @@ -1301,9 +1303,11 @@
71 bpage = UT_LIST_GET_PREV(LRU, bpage);
74 + mutex_exit(&buf_pool->LRU_list_mutex);
77 - buf_pool_mutex_exit_all();
78 + //buf_pool_mutex_exit_all();
79 rw_lock_x_unlock(&btr_search_latch);
81 if (UNIV_LIKELY_NULL(heap)) {
83 rec_offs_init(offsets_);
85 rw_lock_x_lock(&btr_search_latch);
86 - buf_pool_mutex_enter_all();
87 + buf_pool_page_hash_x_lock_all();
89 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
91 @@ -1904,11 +1908,11 @@
92 /* We release btr_search_latch every once in a while to
93 give other queries a chance to run. */
94 if ((i != 0) && ((i % chunk_size) == 0)) {
95 - buf_pool_mutex_exit_all();
96 + buf_pool_page_hash_x_unlock_all();
97 rw_lock_x_unlock(&btr_search_latch);
99 rw_lock_x_lock(&btr_search_latch);
100 - buf_pool_mutex_enter_all();
101 + buf_pool_page_hash_x_lock_all();
104 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
105 @@ -2019,11 +2023,11 @@
106 /* We release btr_search_latch every once in a while to
107 give other queries a chance to run. */
109 - buf_pool_mutex_exit_all();
110 + buf_pool_page_hash_x_unlock_all();
111 rw_lock_x_unlock(&btr_search_latch);
113 rw_lock_x_lock(&btr_search_latch);
114 - buf_pool_mutex_enter_all();
115 + buf_pool_page_hash_x_lock_all();
118 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
119 @@ -2031,7 +2035,7 @@
123 - buf_pool_mutex_exit_all();
124 + buf_pool_page_hash_x_unlock_all();
125 rw_lock_x_unlock(&btr_search_latch);
126 if (UNIV_LIKELY_NULL(heap)) {
128 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
129 --- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
130 +++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
132 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
133 #endif /* UNIV_DEBUG_VALGRIND */
135 - ut_ad(buf_pool_mutex_own(buf_pool));
136 + //ut_ad(buf_pool_mutex_own(buf_pool));
137 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
138 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
139 ut_ad(buf_pool->zip_free[i].start != bpage);
140 - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
141 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
143 #ifdef UNIV_DEBUG_VALGRIND
144 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
146 buf_pool->zip_free[] */
148 #ifdef UNIV_DEBUG_VALGRIND
149 - buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
150 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
151 + buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
152 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
154 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
155 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
157 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
158 #endif /* UNIV_DEBUG_VALGRIND */
160 - ut_ad(buf_pool_mutex_own(buf_pool));
161 + //ut_ad(buf_pool_mutex_own(buf_pool));
162 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
163 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
164 - UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
165 + UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
167 #ifdef UNIV_DEBUG_VALGRIND
168 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
169 @@ -128,12 +130,13 @@
173 - ut_ad(buf_pool_mutex_own(buf_pool));
174 + //ut_ad(buf_pool_mutex_own(buf_pool));
175 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
176 ut_a(i < BUF_BUDDY_SIZES);
178 #ifndef UNIV_DEBUG_VALGRIND
179 /* Valgrind would complain about accessing free memory. */
180 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
181 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
182 ut_ad(buf_page_get_state(ut_list_node_313)
183 == BUF_BLOCK_ZIP_FREE)));
184 #endif /* !UNIV_DEBUG_VALGRIND */
185 @@ -177,16 +180,19 @@
186 buf_buddy_block_free(
187 /*=================*/
188 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
189 - void* buf) /*!< in: buffer frame to deallocate */
190 + void* buf, /*!< in: buffer frame to deallocate */
191 + ibool have_page_hash_mutex)
193 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
197 - ut_ad(buf_pool_mutex_own(buf_pool));
198 + //ut_ad(buf_pool_mutex_own(buf_pool));
199 ut_ad(!mutex_own(&buf_pool->zip_mutex));
200 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
202 + mutex_enter(&buf_pool->zip_hash_mutex);
204 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
205 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
206 && bpage->in_zip_hash && !bpage->in_page_hash),
207 @@ -198,12 +204,14 @@
208 ut_d(bpage->in_zip_hash = FALSE);
209 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
211 + mutex_exit(&buf_pool->zip_hash_mutex);
213 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
214 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
216 block = (buf_block_t*) bpage;
217 mutex_enter(&block->mutex);
218 - buf_LRU_block_free_non_file_page(block);
219 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
220 mutex_exit(&block->mutex);
222 ut_ad(buf_pool->buddy_n_frames > 0);
225 buf_pool_t* buf_pool = buf_pool_from_block(block);
226 const ulint fold = BUF_POOL_ZIP_FOLD(block);
227 - ut_ad(buf_pool_mutex_own(buf_pool));
228 + //ut_ad(buf_pool_mutex_own(buf_pool));
229 ut_ad(!mutex_own(&buf_pool->zip_mutex));
230 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
233 ut_ad(!block->page.in_page_hash);
234 ut_ad(!block->page.in_zip_hash);
235 ut_d(block->page.in_zip_hash = TRUE);
237 + mutex_enter(&buf_pool->zip_hash_mutex);
238 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
239 + mutex_exit(&buf_pool->zip_hash_mutex);
241 ut_d(buf_pool->buddy_n_frames++);
244 bpage->state = BUF_BLOCK_ZIP_FREE;
245 #ifndef UNIV_DEBUG_VALGRIND
246 /* Valgrind would complain about accessing free memory. */
247 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
248 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
249 ut_ad(buf_page_get_state(
251 == BUF_BLOCK_ZIP_FREE)));
252 @@ -291,25 +302,29 @@
253 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
254 ulint i, /*!< in: index of buf_pool->zip_free[],
255 or BUF_BUDDY_SIZES */
256 - ibool* lru) /*!< in: pointer to a variable that
257 + ibool* lru, /*!< in: pointer to a variable that
258 will be assigned TRUE if storage was
259 allocated from the LRU list and
260 buf_pool->mutex was temporarily
261 released, or NULL if the LRU list
262 should not be used */
263 + ibool have_page_hash_mutex)
267 - ut_ad(buf_pool_mutex_own(buf_pool));
268 + //ut_ad(buf_pool_mutex_own(buf_pool));
269 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
270 ut_ad(!mutex_own(&buf_pool->zip_mutex));
272 if (i < BUF_BUDDY_SIZES) {
273 /* Try to allocate from the buddy system. */
274 + mutex_enter(&buf_pool->zip_free_mutex);
275 block = buf_buddy_alloc_zip(buf_pool, i);
280 + mutex_exit(&buf_pool->zip_free_mutex);
283 /* Try allocating from the buf_pool->free list. */
284 @@ -326,19 +341,30 @@
287 /* Try replacing an uncompressed page in the buffer pool. */
288 - buf_pool_mutex_exit(buf_pool);
289 + //buf_pool_mutex_exit(buf_pool);
290 + mutex_exit(&buf_pool->LRU_list_mutex);
291 + if (have_page_hash_mutex) {
292 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
294 block = buf_LRU_get_free_block(buf_pool, 0);
296 - buf_pool_mutex_enter(buf_pool);
297 + //buf_pool_mutex_enter(buf_pool);
298 + mutex_enter(&buf_pool->LRU_list_mutex);
299 + if (have_page_hash_mutex) {
300 + rw_lock_x_lock(&buf_pool->page_hash_latch);
304 buf_buddy_block_register(block);
306 + mutex_enter(&buf_pool->zip_free_mutex);
307 block = buf_buddy_alloc_from(
308 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
311 buf_pool->buddy_stat[i].used++;
312 + mutex_exit(&buf_pool->zip_free_mutex);
319 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
321 - ut_ad(buf_pool_mutex_own(buf_pool));
322 + //ut_ad(buf_pool_mutex_own(buf_pool));
323 +#ifdef UNIV_SYNC_DEBUG
324 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
327 switch (buf_page_get_state(bpage)) {
328 case BUF_BLOCK_ZIP_FREE:
330 case BUF_BLOCK_FILE_PAGE:
331 case BUF_BLOCK_MEMORY:
332 case BUF_BLOCK_REMOVE_HASH:
334 + /* ut_error; */ /* optimistic */
335 case BUF_BLOCK_ZIP_DIRTY:
336 /* Cannot relocate dirty pages. */
341 mutex_enter(&buf_pool->zip_mutex);
342 + mutex_enter(&buf_pool->zip_free_mutex);
344 if (!buf_page_can_relocate(bpage)) {
345 mutex_exit(&buf_pool->zip_mutex);
346 + mutex_exit(&buf_pool->zip_free_mutex);
350 + if (bpage != buf_page_hash_get(buf_pool,
351 + bpage->space, bpage->offset)) {
352 + mutex_exit(&buf_pool->zip_mutex);
353 + mutex_exit(&buf_pool->zip_free_mutex);
357 @@ -384,18 +422,19 @@
358 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
360 /* relocate buf_pool->zip_clean */
361 - b = UT_LIST_GET_PREV(list, dpage);
362 - UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
363 + b = UT_LIST_GET_PREV(zip_list, dpage);
364 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
367 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
368 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
370 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
371 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
374 UNIV_MEM_INVALID(bpage, sizeof *bpage);
376 mutex_exit(&buf_pool->zip_mutex);
377 + mutex_exit(&buf_pool->zip_free_mutex);
381 @@ -409,14 +448,16 @@
382 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
383 void* src, /*!< in: block to relocate */
384 void* dst, /*!< in: free block to relocate to */
385 - ulint i) /*!< in: index of
386 + ulint i, /*!< in: index of
387 buf_pool->zip_free[] */
388 + ibool have_page_hash_mutex)
391 const ulint size = BUF_BUDDY_LOW << i;
392 ullint usec = ut_time_us(NULL);
394 - ut_ad(buf_pool_mutex_own(buf_pool));
395 + //ut_ad(buf_pool_mutex_own(buf_pool));
396 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
397 ut_ad(!mutex_own(&buf_pool->zip_mutex));
398 ut_ad(!ut_align_offset(src, size));
399 ut_ad(!ut_align_offset(dst, size));
401 /* This is a compressed page. */
404 + if (!have_page_hash_mutex) {
405 + mutex_exit(&buf_pool->zip_free_mutex);
406 + mutex_enter(&buf_pool->LRU_list_mutex);
407 + rw_lock_x_lock(&buf_pool->page_hash_latch);
410 /* The src block may be split into smaller blocks,
411 some of which may be free. Thus, the
412 mach_read_from_4() calls below may attempt to read
414 added to buf_pool->page_hash yet. Obviously,
415 it cannot be relocated. */
417 + if (!have_page_hash_mutex) {
418 + mutex_enter(&buf_pool->zip_free_mutex);
419 + mutex_exit(&buf_pool->LRU_list_mutex);
420 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
425 @@ -473,18 +525,27 @@
426 For the sake of simplicity, give up. */
427 ut_ad(page_zip_get_size(&bpage->zip) < size);
429 + if (!have_page_hash_mutex) {
430 + mutex_enter(&buf_pool->zip_free_mutex);
431 + mutex_exit(&buf_pool->LRU_list_mutex);
432 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
437 + /* To keep latch order */
438 + if (have_page_hash_mutex)
439 + mutex_exit(&buf_pool->zip_free_mutex);
441 /* The block must have been allocated, but it may
442 contain uninitialized data. */
443 UNIV_MEM_ASSERT_W(src, size);
445 - mutex = buf_page_get_mutex(bpage);
446 + mutex = buf_page_get_mutex_enter(bpage);
448 - mutex_enter(mutex);
449 + mutex_enter(&buf_pool->zip_free_mutex);
451 - if (buf_page_can_relocate(bpage)) {
452 + if (mutex && buf_page_can_relocate(bpage)) {
453 /* Relocate the compressed page. */
454 ut_a(bpage->zip.data == src);
455 memcpy(dst, src, size);
456 @@ -499,10 +560,22 @@
457 buddy_stat->relocated_usec
458 += ut_time_us(NULL) - usec;
461 + if (!have_page_hash_mutex) {
462 + mutex_exit(&buf_pool->LRU_list_mutex);
463 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
469 + if (!have_page_hash_mutex) {
470 + mutex_exit(&buf_pool->LRU_list_mutex);
471 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
477 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
478 /* This must be a buf_page_t object. */
479 #if UNIV_WORD_SIZE == 4
480 @@ -511,10 +584,31 @@
481 about uninitialized pad bytes. */
482 UNIV_MEM_ASSERT_RW(src, size);
485 + mutex_exit(&buf_pool->zip_free_mutex);
487 + if (!have_page_hash_mutex) {
488 + mutex_enter(&buf_pool->LRU_list_mutex);
489 + rw_lock_x_lock(&buf_pool->page_hash_latch);
492 if (buf_buddy_relocate_block(src, dst)) {
493 + mutex_enter(&buf_pool->zip_free_mutex);
495 + if (!have_page_hash_mutex) {
496 + mutex_exit(&buf_pool->LRU_list_mutex);
497 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
503 + mutex_enter(&buf_pool->zip_free_mutex);
505 + if (!have_page_hash_mutex) {
506 + mutex_exit(&buf_pool->LRU_list_mutex);
507 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
512 @@ -529,13 +623,15 @@
513 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
514 void* buf, /*!< in: block to be freed, must not be
515 pointed to by the buffer pool */
516 - ulint i) /*!< in: index of buf_pool->zip_free[],
517 + ulint i, /*!< in: index of buf_pool->zip_free[],
518 or BUF_BUDDY_SIZES */
519 + ibool have_page_hash_mutex)
524 - ut_ad(buf_pool_mutex_own(buf_pool));
525 + //ut_ad(buf_pool_mutex_own(buf_pool));
526 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
527 ut_ad(!mutex_own(&buf_pool->zip_mutex));
528 ut_ad(i <= BUF_BUDDY_SIZES);
529 ut_ad(buf_pool->buddy_stat[i].used > 0);
531 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
533 if (i == BUF_BUDDY_SIZES) {
534 - buf_buddy_block_free(buf_pool, buf);
535 + mutex_exit(&buf_pool->zip_free_mutex);
536 + buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
537 + mutex_enter(&buf_pool->zip_free_mutex);
545 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
546 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
547 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
550 @@ -600,13 +698,13 @@
551 #ifndef UNIV_DEBUG_VALGRIND
553 /* Valgrind would complain about accessing free memory. */
554 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
555 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
556 ut_ad(buf_page_get_state(ut_list_node_313)
557 == BUF_BLOCK_ZIP_FREE)));
558 #endif /* UNIV_DEBUG_VALGRIND */
560 /* The buddy is not free. Is there a free block of this size? */
561 - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
562 + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
565 /* Remove the block from the free list, because a successful
567 buf_buddy_remove_from_free(buf_pool, bpage, i);
569 /* Try to relocate the buddy of buf to the free block. */
570 - if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
571 + if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
573 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
575 @@ -636,14 +734,14 @@
577 (Parts of the buddy can be free in
578 buf_pool->zip_free[j] with j < i.) */
579 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
580 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
581 ut_ad(buf_page_get_state(
583 == BUF_BLOCK_ZIP_FREE
584 && ut_list_node_313 != buddy)));
585 #endif /* !UNIV_DEBUG_VALGRIND */
587 - if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
588 + if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
591 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
592 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
593 --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
594 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
596 #ifdef UNIV_PFS_RWLOCK
597 /* Keys to register buffer block related rwlocks and mutexes with
598 performance schema */
599 +UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
600 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
601 # ifdef UNIV_SYNC_DEBUG
602 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
604 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
605 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
606 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
607 +UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
608 +UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
609 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
610 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
611 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
612 #endif /* UNIV_PFS_MUTEX */
615 block->page.in_zip_hash = FALSE;
616 block->page.in_flush_list = FALSE;
617 block->page.in_free_list = FALSE;
618 - block->in_unzip_LRU_list = FALSE;
619 #endif /* UNIV_DEBUG */
620 block->page.in_LRU_list = FALSE;
621 + block->in_unzip_LRU_list = FALSE;
622 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
623 block->n_pointers = 0;
624 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
626 memset(block->frame, '\0', UNIV_PAGE_SIZE);
628 /* Add the block to the free list */
629 - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
630 + mutex_enter(&buf_pool->free_list_mutex);
631 + UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
633 ut_d(block->page.in_free_list = TRUE);
634 + mutex_exit(&buf_pool->free_list_mutex);
635 ut_ad(buf_pool_from_block(block) == buf_pool);
638 @@ -1038,7 +1045,8 @@
639 buf_chunk_t* chunk = buf_pool->chunks;
642 - ut_ad(buf_pool_mutex_own(buf_pool));
643 + //ut_ad(buf_pool_mutex_own(buf_pool));
644 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
645 for (n = buf_pool->n_chunks; n--; chunk++) {
647 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
648 @@ -1138,7 +1146,7 @@
650 const buf_block_t* block_end;
652 - ut_ad(buf_pool_mutex_own(buf_pool));
653 + //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
655 block_end = chunk->blocks + chunk->size;
657 @@ -1150,8 +1158,10 @@
658 ut_ad(!block->in_unzip_LRU_list);
659 ut_ad(!block->page.in_flush_list);
660 /* Remove the block from the free list. */
661 + mutex_enter(&buf_pool->free_list_mutex);
662 ut_ad(block->page.in_free_list);
663 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
664 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
665 + mutex_exit(&buf_pool->free_list_mutex);
667 /* Free the latches. */
668 mutex_free(&block->mutex);
669 @@ -1208,9 +1218,21 @@
670 ------------------------------- */
671 mutex_create(buf_pool_mutex_key,
672 &buf_pool->mutex, SYNC_BUF_POOL);
673 + mutex_create(buf_pool_LRU_list_mutex_key,
674 + &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
675 + rw_lock_create(buf_pool_page_hash_key,
676 + &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
677 + mutex_create(buf_pool_free_list_mutex_key,
678 + &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
679 + mutex_create(buf_pool_zip_free_mutex_key,
680 + &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
681 + mutex_create(buf_pool_zip_hash_mutex_key,
682 + &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
683 mutex_create(buf_pool_zip_mutex_key,
684 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
686 + mutex_enter(&buf_pool->LRU_list_mutex);
687 + rw_lock_x_lock(&buf_pool->page_hash_latch);
688 buf_pool_mutex_enter(buf_pool);
690 if (buf_pool_size > 0) {
691 @@ -1223,6 +1245,8 @@
695 + mutex_exit(&buf_pool->LRU_list_mutex);
696 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
697 buf_pool_mutex_exit(buf_pool);
700 @@ -1253,6 +1277,8 @@
702 /* All fields are initialized by mem_zalloc(). */
704 + mutex_exit(&buf_pool->LRU_list_mutex);
705 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
706 buf_pool_mutex_exit(buf_pool);
709 @@ -1469,7 +1495,11 @@
711 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
713 - ut_ad(buf_pool_mutex_own(buf_pool));
714 + //ut_ad(buf_pool_mutex_own(buf_pool));
715 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
716 +#ifdef UNIV_SYNC_DEBUG
717 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
719 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
720 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
721 ut_a(bpage->buf_fix_count == 0);
722 @@ -1556,7 +1586,8 @@
725 btr_search_disable(); /* Empty the adaptive hash index again */
726 - buf_pool_mutex_enter(buf_pool);
727 + //buf_pool_mutex_enter(buf_pool);
728 + mutex_enter(&buf_pool->LRU_list_mutex);
731 if (buf_pool->n_chunks <= 1) {
732 @@ -1627,7 +1658,7 @@
734 buf_LRU_make_block_old(&block->page);
736 - } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
737 + } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
741 @@ -1635,7 +1666,8 @@
742 mutex_exit(&block->mutex);
745 - buf_pool_mutex_exit(buf_pool);
746 + //buf_pool_mutex_exit(buf_pool);
747 + mutex_exit(&buf_pool->LRU_list_mutex);
749 /* Request for a flush of the chunk if it helps.
750 Do not flush if there are non-free blocks, since
751 @@ -1685,7 +1717,8 @@
753 buf_pool->old_pool_size = buf_pool->curr_pool_size;
755 - buf_pool_mutex_exit(buf_pool);
756 + //buf_pool_mutex_exit(buf_pool);
757 + mutex_exit(&buf_pool->LRU_list_mutex);
761 @@ -1726,7 +1759,9 @@
762 hash_table_t* zip_hash;
763 hash_table_t* page_hash;
765 - buf_pool_mutex_enter(buf_pool);
766 + //buf_pool_mutex_enter(buf_pool);
767 + mutex_enter(&buf_pool->LRU_list_mutex);
768 + rw_lock_x_lock(&buf_pool->page_hash_latch);
770 /* Free, create, and populate the hash table. */
771 hash_table_free(buf_pool->page_hash);
772 @@ -1767,8 +1802,9 @@
773 All such blocks are either in buf_pool->zip_clean or
774 in buf_pool->flush_list. */
776 + mutex_enter(&buf_pool->zip_mutex);
777 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
778 - b = UT_LIST_GET_NEXT(list, b)) {
779 + b = UT_LIST_GET_NEXT(zip_list, b)) {
780 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
781 ut_ad(!b->in_flush_list);
782 ut_ad(b->in_LRU_list);
783 @@ -1778,10 +1814,11 @@
784 HASH_INSERT(buf_page_t, hash, page_hash,
785 buf_page_address_fold(b->space, b->offset), b);
787 + mutex_exit(&buf_pool->zip_mutex);
789 buf_flush_list_mutex_enter(buf_pool);
790 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
791 - b = UT_LIST_GET_NEXT(list, b)) {
792 + b = UT_LIST_GET_NEXT(flush_list, b)) {
793 ut_ad(b->in_flush_list);
794 ut_ad(b->in_LRU_list);
795 ut_ad(b->in_page_hash);
796 @@ -1808,7 +1845,9 @@
799 buf_flush_list_mutex_exit(buf_pool);
800 - buf_pool_mutex_exit(buf_pool);
801 + //buf_pool_mutex_exit(buf_pool);
802 + mutex_exit(&buf_pool->LRU_list_mutex);
803 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
806 /********************************************************************
807 @@ -1855,21 +1894,32 @@
810 buf_pool_t* buf_pool = buf_pool_get(space, offset);
811 + mutex_t* block_mutex;
813 - ut_ad(buf_pool_mutex_own(buf_pool));
814 + //ut_ad(buf_pool_mutex_own(buf_pool));
816 + rw_lock_x_lock(&buf_pool->page_hash_latch);
817 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
819 + block_mutex = buf_page_get_mutex_enter(bpage);
823 if (UNIV_LIKELY_NULL(bpage)) {
824 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
825 /* The page was loaded meanwhile. */
826 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
829 /* Add to an existing watch. */
830 bpage->buf_fix_count++;
831 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
832 + mutex_exit(block_mutex);
836 + /* buf_pool->watch is protected by zip_mutex for now */
837 + mutex_enter(&buf_pool->zip_mutex);
838 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
839 bpage = &buf_pool->watch[i];
841 @@ -1897,6 +1947,8 @@
842 ut_d(bpage->in_page_hash = TRUE);
843 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
845 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
846 + mutex_exit(&buf_pool->zip_mutex);
848 case BUF_BLOCK_ZIP_PAGE:
849 ut_ad(bpage->in_page_hash);
850 @@ -1914,6 +1966,8 @@
853 /* Fix compiler warning */
854 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
855 + mutex_exit(&buf_pool->zip_mutex);
859 @@ -1943,6 +1997,8 @@
863 + mutex_enter(&buf_pool->LRU_list_mutex);
864 + rw_lock_x_lock(&buf_pool->page_hash_latch);
865 buf_pool_mutex_enter(buf_pool);
866 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
868 @@ -1961,6 +2017,8 @@
869 buf_pool->n_chunks++;
872 + mutex_exit(&buf_pool->LRU_list_mutex);
873 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
874 buf_pool_mutex_exit(buf_pool);
877 @@ -2048,7 +2106,11 @@
879 buf_page_t* watch) /*!< in/out: sentinel for watch */
881 - ut_ad(buf_pool_mutex_own(buf_pool));
882 + //ut_ad(buf_pool_mutex_own(buf_pool));
883 +#ifdef UNIV_SYNC_DEBUG
884 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
886 + ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
888 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
889 ut_d(watch->in_page_hash = FALSE);
890 @@ -2070,28 +2132,31 @@
891 buf_pool_t* buf_pool = buf_pool_get(space, offset);
892 ulint fold = buf_page_address_fold(space, offset);
894 - buf_pool_mutex_enter(buf_pool);
895 + //buf_pool_mutex_enter(buf_pool);
896 + rw_lock_x_lock(&buf_pool->page_hash_latch);
897 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
898 /* The page must exist because buf_pool_watch_set()
899 increments buf_fix_count. */
902 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
903 - mutex_t* mutex = buf_page_get_mutex(bpage);
904 + mutex_t* mutex = buf_page_get_mutex_enter(bpage);
906 - mutex_enter(mutex);
907 ut_a(bpage->buf_fix_count > 0);
908 bpage->buf_fix_count--;
911 + mutex_enter(&buf_pool->zip_mutex);
912 ut_a(bpage->buf_fix_count > 0);
914 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
915 buf_pool_watch_remove(buf_pool, fold, bpage);
917 + mutex_exit(&buf_pool->zip_mutex);
920 - buf_pool_mutex_exit(buf_pool);
921 + //buf_pool_mutex_exit(buf_pool);
922 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
925 /****************************************************************//**
926 @@ -2111,14 +2176,16 @@
927 buf_pool_t* buf_pool = buf_pool_get(space, offset);
928 ulint fold = buf_page_address_fold(space, offset);
930 - buf_pool_mutex_enter(buf_pool);
931 + //buf_pool_mutex_enter(buf_pool);
932 + rw_lock_s_lock(&buf_pool->page_hash_latch);
934 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
935 /* The page must exist because buf_pool_watch_set()
936 increments buf_fix_count. */
938 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
939 - buf_pool_mutex_exit(buf_pool);
940 + //buf_pool_mutex_exit(buf_pool);
941 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
945 @@ -2135,13 +2202,15 @@
947 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
949 - buf_pool_mutex_enter(buf_pool);
950 + //buf_pool_mutex_enter(buf_pool);
951 + mutex_enter(&buf_pool->LRU_list_mutex);
953 ut_a(buf_page_in_file(bpage));
955 buf_LRU_make_block_young(bpage);
957 - buf_pool_mutex_exit(buf_pool);
958 + //buf_pool_mutex_exit(buf_pool);
959 + mutex_exit(&buf_pool->LRU_list_mutex);
962 /********************************************************************//**
963 @@ -2165,14 +2234,20 @@
964 ut_a(buf_page_in_file(bpage));
966 if (buf_page_peek_if_too_old(bpage)) {
967 - buf_pool_mutex_enter(buf_pool);
968 + //buf_pool_mutex_enter(buf_pool);
969 + mutex_enter(&buf_pool->LRU_list_mutex);
970 buf_LRU_make_block_young(bpage);
971 - buf_pool_mutex_exit(buf_pool);
972 + //buf_pool_mutex_exit(buf_pool);
973 + mutex_exit(&buf_pool->LRU_list_mutex);
974 } else if (!access_time) {
975 ulint time_ms = ut_time_ms();
976 - buf_pool_mutex_enter(buf_pool);
977 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
978 + //buf_pool_mutex_enter(buf_pool);
980 buf_page_set_accessed(bpage, time_ms);
981 - buf_pool_mutex_exit(buf_pool);
982 + mutex_exit(block_mutex);
984 + //buf_pool_mutex_exit(buf_pool);
988 @@ -2189,7 +2264,8 @@
990 buf_pool_t* buf_pool = buf_pool_get(space, offset);
992 - buf_pool_mutex_enter(buf_pool);
993 + //buf_pool_mutex_enter(buf_pool);
994 + rw_lock_s_lock(&buf_pool->page_hash_latch);
996 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
998 @@ -2198,7 +2274,8 @@
999 block->check_index_page_at_flush = FALSE;
1002 - buf_pool_mutex_exit(buf_pool);
1003 + //buf_pool_mutex_exit(buf_pool);
1004 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1007 /********************************************************************//**
1008 @@ -2217,7 +2294,8 @@
1010 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1012 - buf_pool_mutex_enter(buf_pool);
1013 + //buf_pool_mutex_enter(buf_pool);
1014 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1016 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1018 @@ -2228,7 +2306,8 @@
1019 is_hashed = block->is_hashed;
1022 - buf_pool_mutex_exit(buf_pool);
1023 + //buf_pool_mutex_exit(buf_pool);
1024 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1028 @@ -2250,7 +2329,8 @@
1030 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1032 - buf_pool_mutex_enter(buf_pool);
1033 + //buf_pool_mutex_enter(buf_pool);
1034 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1036 bpage = buf_page_hash_get(buf_pool, space, offset);
1038 @@ -2259,7 +2339,8 @@
1039 bpage->file_page_was_freed = TRUE;
1042 - buf_pool_mutex_exit(buf_pool);
1043 + //buf_pool_mutex_exit(buf_pool);
1044 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1048 @@ -2280,7 +2361,8 @@
1050 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1052 - buf_pool_mutex_enter(buf_pool);
1053 + //buf_pool_mutex_enter(buf_pool);
1054 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1056 bpage = buf_page_hash_get(buf_pool, space, offset);
1058 @@ -2289,7 +2371,8 @@
1059 bpage->file_page_was_freed = FALSE;
1062 - buf_pool_mutex_exit(buf_pool);
1063 + //buf_pool_mutex_exit(buf_pool);
1064 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1068 @@ -2324,8 +2407,9 @@
1069 buf_pool->stat.n_page_gets++;
1072 - buf_pool_mutex_enter(buf_pool);
1073 + //buf_pool_mutex_enter(buf_pool);
1075 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1076 bpage = buf_page_hash_get(buf_pool, space, offset);
1078 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1079 @@ -2334,7 +2418,8 @@
1081 /* Page not in buf_pool: needs to be read from file */
1083 - buf_pool_mutex_exit(buf_pool);
1084 + //buf_pool_mutex_exit(buf_pool);
1085 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1087 buf_read_page(space, zip_size, offset);
1089 @@ -2346,10 +2431,15 @@
1090 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1091 /* There is no compressed page. */
1093 - buf_pool_mutex_exit(buf_pool);
1094 + //buf_pool_mutex_exit(buf_pool);
1095 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1099 + block_mutex = buf_page_get_mutex_enter(bpage);
1101 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1103 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1105 switch (buf_page_get_state(bpage)) {
1106 @@ -2358,19 +2448,19 @@
1107 case BUF_BLOCK_MEMORY:
1108 case BUF_BLOCK_REMOVE_HASH:
1109 case BUF_BLOCK_ZIP_FREE:
1111 + mutex_exit(block_mutex);
1113 case BUF_BLOCK_ZIP_PAGE:
1114 case BUF_BLOCK_ZIP_DIRTY:
1115 - block_mutex = &buf_pool->zip_mutex;
1116 - mutex_enter(block_mutex);
1117 + ut_a(block_mutex == &buf_pool->zip_mutex);
1118 bpage->buf_fix_count++;
1120 case BUF_BLOCK_FILE_PAGE:
1121 - block_mutex = &((buf_block_t*) bpage)->mutex;
1122 - mutex_enter(block_mutex);
1123 + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1125 /* Discard the uncompressed page frame if possible. */
1126 - if (buf_LRU_free_block(bpage, FALSE, NULL)
1127 + if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1130 mutex_exit(block_mutex);
1131 @@ -2389,7 +2479,7 @@
1132 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1133 access_time = buf_page_is_accessed(bpage);
1135 - buf_pool_mutex_exit(buf_pool);
1136 + //buf_pool_mutex_exit(buf_pool);
1138 mutex_exit(block_mutex);
1140 @@ -2698,7 +2788,7 @@
1141 const buf_block_t* block) /*!< in: pointer to block,
1144 - ut_ad(buf_pool_mutex_own(buf_pool));
1145 + //ut_ad(buf_pool_mutex_own(buf_pool));
1147 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1148 /* The pointer should be aligned. */
1149 @@ -2734,6 +2824,7 @@
1153 + mutex_t* block_mutex = NULL;
1154 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1157 @@ -2755,9 +2846,11 @@
1158 fold = buf_page_address_fold(space, offset);
1161 - buf_pool_mutex_enter(buf_pool);
1162 + //buf_pool_mutex_enter(buf_pool);
1165 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1167 /* If the guess is a compressed page descriptor that
1168 has been allocated by buf_buddy_alloc(), it may have
1169 been invalidated by buf_buddy_relocate(). In that
1170 @@ -2766,11 +2859,15 @@
1171 the guess may be pointing to a buffer pool chunk that
1172 has been released when resizing the buffer pool. */
1174 - if (!buf_block_is_uncompressed(buf_pool, block)
1175 + if (!block_mutex) {
1176 + block = guess = NULL;
1177 + } else if (!buf_block_is_uncompressed(buf_pool, block)
1178 || offset != block->page.offset
1179 || space != block->page.space
1180 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1182 + mutex_exit(block_mutex);
1184 block = guess = NULL;
1186 ut_ad(!block->page.in_zip_hash);
1187 @@ -2779,12 +2876,19 @@
1190 if (block == NULL) {
1191 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1192 block = (buf_block_t*) buf_page_hash_get_low(
1193 buf_pool, space, offset, fold);
1195 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1196 + ut_a(block_mutex);
1198 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1202 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1203 + mutex_exit(block_mutex);
1207 @@ -2796,12 +2900,14 @@
1208 space, offset, fold);
1210 if (UNIV_LIKELY_NULL(block)) {
1212 + block_mutex = buf_page_get_mutex((buf_page_t*)block);
1213 + ut_a(block_mutex);
1214 + ut_ad(mutex_own(block_mutex));
1219 - buf_pool_mutex_exit(buf_pool);
1220 + //buf_pool_mutex_exit(buf_pool);
1222 if (mode == BUF_GET_IF_IN_POOL
1223 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1224 @@ -2849,7 +2955,8 @@
1225 /* The page is being read to buffer pool,
1226 but we cannot wait around for the read to
1228 - buf_pool_mutex_exit(buf_pool);
1229 + //buf_pool_mutex_exit(buf_pool);
1230 + mutex_exit(block_mutex);
1234 @@ -2859,38 +2966,49 @@
1237 case BUF_BLOCK_FILE_PAGE:
1238 + if (block_mutex == &buf_pool->zip_mutex) {
1239 + /* it is wrong mutex... */
1240 + mutex_exit(block_mutex);
1245 case BUF_BLOCK_ZIP_PAGE:
1246 case BUF_BLOCK_ZIP_DIRTY:
1247 + ut_ad(block_mutex == &buf_pool->zip_mutex);
1248 bpage = &block->page;
1249 /* Protect bpage->buf_fix_count. */
1250 - mutex_enter(&buf_pool->zip_mutex);
1251 + //mutex_enter(&buf_pool->zip_mutex);
1253 if (bpage->buf_fix_count
1254 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1255 /* This condition often occurs when the buffer
1256 is not buffer-fixed, but I/O-fixed by
1257 buf_page_init_for_read(). */
1258 - mutex_exit(&buf_pool->zip_mutex);
1259 + //mutex_exit(&buf_pool->zip_mutex);
1261 /* The block is buffer-fixed or I/O-fixed.
1263 - buf_pool_mutex_exit(buf_pool);
1264 + //buf_pool_mutex_exit(buf_pool);
1265 + mutex_exit(block_mutex);
1266 os_thread_sleep(WAIT_FOR_READ);
1271 /* Allocate an uncompressed page. */
1272 - buf_pool_mutex_exit(buf_pool);
1273 - mutex_exit(&buf_pool->zip_mutex);
1274 + //buf_pool_mutex_exit(buf_pool);
1275 + //mutex_exit(&buf_pool->zip_mutex);
1276 + mutex_exit(block_mutex);
1278 block = buf_LRU_get_free_block(buf_pool, 0);
1280 + block_mutex = &block->mutex;
1282 - buf_pool_mutex_enter(buf_pool);
1283 - mutex_enter(&block->mutex);
1284 + //buf_pool_mutex_enter(buf_pool);
1285 + mutex_enter(&buf_pool->LRU_list_mutex);
1286 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1287 + mutex_enter(block_mutex);
1290 buf_page_t* hash_bpage;
1291 @@ -2903,35 +3021,47 @@
1292 while buf_pool->mutex was released.
1293 Free the block that was allocated. */
1295 - buf_LRU_block_free_non_file_page(block);
1296 - mutex_exit(&block->mutex);
1297 + buf_LRU_block_free_non_file_page(block, TRUE);
1298 + mutex_exit(block_mutex);
1300 block = (buf_block_t*) hash_bpage;
1302 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1303 + ut_a(block_mutex);
1305 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1306 + mutex_exit(&buf_pool->LRU_list_mutex);
1311 + mutex_enter(&buf_pool->zip_mutex);
1314 (bpage->buf_fix_count
1315 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1317 + mutex_exit(&buf_pool->zip_mutex);
1318 /* The block was buffer-fixed or I/O-fixed
1319 while buf_pool->mutex was not held by this thread.
1320 Free the block that was allocated and try again.
1321 This should be extremely unlikely. */
1323 - buf_LRU_block_free_non_file_page(block);
1324 - mutex_exit(&block->mutex);
1325 + buf_LRU_block_free_non_file_page(block, TRUE);
1326 + //mutex_exit(&block->mutex);
1328 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1329 + mutex_exit(&buf_pool->LRU_list_mutex);
1330 goto wait_until_unfixed;
1333 /* Move the compressed page from bpage to block,
1334 and uncompress it. */
1336 - mutex_enter(&buf_pool->zip_mutex);
1338 buf_relocate(bpage, &block->page);
1340 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1342 buf_block_init_low(block);
1343 block->lock_hash_val = lock_rec_hash(space, offset);
1345 @@ -2940,7 +3070,7 @@
1347 if (buf_page_get_state(&block->page)
1348 == BUF_BLOCK_ZIP_PAGE) {
1349 - UT_LIST_REMOVE(list, buf_pool->zip_clean,
1350 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1352 ut_ad(!block->page.in_flush_list);
1354 @@ -2957,19 +3087,24 @@
1355 /* Insert at the front of unzip_LRU list */
1356 buf_unzip_LRU_add_block(block, FALSE);
1358 + mutex_exit(&buf_pool->LRU_list_mutex);
1360 block->page.buf_fix_count = 1;
1361 buf_block_set_io_fix(block, BUF_IO_READ);
1362 rw_lock_x_lock_func(&block->lock, 0, file, line);
1364 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1366 - mutex_exit(&block->mutex);
1367 + mutex_exit(block_mutex);
1368 mutex_exit(&buf_pool->zip_mutex);
1370 + buf_pool_mutex_enter(buf_pool);
1371 buf_pool->n_pend_unzip++;
1372 + buf_pool_mutex_exit(buf_pool);
1374 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1375 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1377 - buf_pool_mutex_exit(buf_pool);
1378 + //buf_pool_mutex_exit(buf_pool);
1380 /* Decompress the page and apply buffered operations
1381 while not holding buf_pool->mutex or block->mutex. */
1382 @@ -2982,12 +3117,15 @@
1385 /* Unfix and unlatch the block. */
1386 - buf_pool_mutex_enter(buf_pool);
1387 - mutex_enter(&block->mutex);
1388 + //buf_pool_mutex_enter(buf_pool);
1389 + block_mutex = &block->mutex;
1390 + mutex_enter(block_mutex);
1391 block->page.buf_fix_count--;
1392 buf_block_set_io_fix(block, BUF_IO_NONE);
1393 - mutex_exit(&block->mutex);
1395 + buf_pool_mutex_enter(buf_pool);
1396 buf_pool->n_pend_unzip--;
1397 + buf_pool_mutex_exit(buf_pool);
1398 rw_lock_x_unlock(&block->lock);
1401 @@ -3003,7 +3141,7 @@
1403 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1405 - mutex_enter(&block->mutex);
1406 + //mutex_enter(&block->mutex);
1407 #if UNIV_WORD_SIZE == 4
1408 /* On 32-bit systems, there is no padding in buf_page_t. On
1409 other systems, Valgrind could complain about uninitialized pad
1410 @@ -3013,13 +3151,14 @@
1412 buf_block_buf_fix_inc(block, file, line);
1414 - mutex_exit(&block->mutex);
1415 + //mutex_exit(&block->mutex);
1417 /* Check if this is the first access to the page */
1419 access_time = buf_page_is_accessed(&block->page);
1421 - buf_pool_mutex_exit(buf_pool);
1422 + //buf_pool_mutex_exit(buf_pool);
1423 + mutex_exit(block_mutex);
1425 buf_page_set_accessed_make_young(&block->page, access_time);
1427 @@ -3252,9 +3391,11 @@
1428 buf_pool = buf_pool_from_block(block);
1430 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1431 - buf_pool_mutex_enter(buf_pool);
1432 + //buf_pool_mutex_enter(buf_pool);
1433 + mutex_enter(&buf_pool->LRU_list_mutex);
1434 buf_LRU_make_block_young(&block->page);
1435 - buf_pool_mutex_exit(buf_pool);
1436 + //buf_pool_mutex_exit(buf_pool);
1437 + mutex_exit(&buf_pool->LRU_list_mutex);
1438 } else if (!buf_page_is_accessed(&block->page)) {
1439 /* Above, we do a dirty read on purpose, to avoid
1440 mutex contention. The field buf_page_t::access_time
1441 @@ -3262,9 +3403,11 @@
1442 field must be protected by mutex, however. */
1443 ulint time_ms = ut_time_ms();
1445 - buf_pool_mutex_enter(buf_pool);
1446 + //buf_pool_mutex_enter(buf_pool);
1447 + mutex_enter(&block->mutex);
1448 buf_page_set_accessed(&block->page, time_ms);
1449 - buf_pool_mutex_exit(buf_pool);
1450 + //buf_pool_mutex_exit(buf_pool);
1451 + mutex_exit(&block->mutex);
1454 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1455 @@ -3331,18 +3474,21 @@
1457 ut_ad(mtr->state == MTR_ACTIVE);
1459 - buf_pool_mutex_enter(buf_pool);
1460 + //buf_pool_mutex_enter(buf_pool);
1461 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1462 block = buf_block_hash_get(buf_pool, space_id, page_no);
1464 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1465 - buf_pool_mutex_exit(buf_pool);
1466 + //buf_pool_mutex_exit(buf_pool);
1467 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1471 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1473 mutex_enter(&block->mutex);
1474 - buf_pool_mutex_exit(buf_pool);
1475 + //buf_pool_mutex_exit(buf_pool);
1476 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1478 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1479 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1480 @@ -3431,7 +3577,10 @@
1481 buf_page_t* hash_page;
1482 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1484 - ut_ad(buf_pool_mutex_own(buf_pool));
1485 + //ut_ad(buf_pool_mutex_own(buf_pool));
1486 +#ifdef UNIV_SYNC_DEBUG
1487 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1489 ut_ad(mutex_own(&(block->mutex)));
1490 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1492 @@ -3460,11 +3609,14 @@
1493 if (UNIV_LIKELY(!hash_page)) {
1494 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1495 /* Preserve the reference count. */
1496 - ulint buf_fix_count = hash_page->buf_fix_count;
1497 + ulint buf_fix_count;
1499 + mutex_enter(&buf_pool->zip_mutex);
1500 + buf_fix_count = hash_page->buf_fix_count;
1501 ut_a(buf_fix_count > 0);
1502 block->page.buf_fix_count += buf_fix_count;
1503 buf_pool_watch_remove(buf_pool, fold, hash_page);
1504 + mutex_exit(&buf_pool->zip_mutex);
1507 "InnoDB: Error: page %lu %lu already found"
1508 @@ -3474,7 +3626,8 @@
1509 (const void*) hash_page, (const void*) block);
1510 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1511 mutex_exit(&block->mutex);
1512 - buf_pool_mutex_exit(buf_pool);
1513 + //buf_pool_mutex_exit(buf_pool);
1514 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1518 @@ -3558,7 +3711,9 @@
1520 fold = buf_page_address_fold(space, offset);
1522 - buf_pool_mutex_enter(buf_pool);
1523 + //buf_pool_mutex_enter(buf_pool);
1524 + mutex_enter(&buf_pool->LRU_list_mutex);
1525 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1527 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1528 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1529 @@ -3567,9 +3722,15 @@
1532 mutex_enter(&block->mutex);
1533 - buf_LRU_block_free_non_file_page(block);
1534 + mutex_exit(&buf_pool->LRU_list_mutex);
1535 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1536 + buf_LRU_block_free_non_file_page(block, FALSE);
1537 mutex_exit(&block->mutex);
1540 + mutex_exit(&buf_pool->LRU_list_mutex);
1541 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1546 @@ -3592,6 +3753,8 @@
1548 buf_page_init(space, offset, fold, block);
1550 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1552 /* The block must be put to the LRU list, to the old blocks */
1553 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1555 @@ -3619,7 +3782,7 @@
1556 been added to buf_pool->LRU and
1557 buf_pool->page_hash. */
1558 mutex_exit(&block->mutex);
1559 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1560 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1561 mutex_enter(&block->mutex);
1562 block->page.zip.data = data;
1564 @@ -3632,6 +3795,7 @@
1565 buf_unzip_LRU_add_block(block, TRUE);
1568 + mutex_exit(&buf_pool->LRU_list_mutex);
1569 mutex_exit(&block->mutex);
1571 /* Defer buf_buddy_alloc() until after the block has
1572 @@ -3643,8 +3807,8 @@
1573 control block (bpage), in order to avoid the
1574 invocation of buf_buddy_relocate_block() on
1575 uninitialized data. */
1576 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1577 - bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1578 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1579 + bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1581 /* Initialize the buf_pool pointer. */
1582 bpage->buf_pool = buf_pool;
1583 @@ -3663,8 +3827,11 @@
1585 /* The block was added by some other thread. */
1587 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1588 - buf_buddy_free(buf_pool, data, zip_size);
1589 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1590 + buf_buddy_free(buf_pool, data, zip_size, TRUE);
1592 + mutex_exit(&buf_pool->LRU_list_mutex);
1593 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1597 @@ -3708,18 +3875,24 @@
1598 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1601 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1603 /* The block must be put to the LRU list, to the old blocks */
1604 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1605 buf_LRU_insert_zip_clean(bpage);
1607 + mutex_exit(&buf_pool->LRU_list_mutex);
1609 buf_page_set_io_fix(bpage, BUF_IO_READ);
1611 mutex_exit(&buf_pool->zip_mutex);
1614 + buf_pool_mutex_enter(buf_pool);
1615 buf_pool->n_pend_reads++;
1617 buf_pool_mutex_exit(buf_pool);
1619 + //buf_pool_mutex_exit(buf_pool);
1621 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1623 @@ -3761,7 +3934,9 @@
1625 fold = buf_page_address_fold(space, offset);
1627 - buf_pool_mutex_enter(buf_pool);
1628 + //buf_pool_mutex_enter(buf_pool);
1629 + mutex_enter(&buf_pool->LRU_list_mutex);
1630 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1632 block = (buf_block_t*) buf_page_hash_get_low(
1633 buf_pool, space, offset, fold);
1634 @@ -3777,7 +3952,9 @@
1635 #endif /* UNIV_DEBUG_FILE_ACCESSES */
1637 /* Page can be found in buf_pool */
1638 - buf_pool_mutex_exit(buf_pool);
1639 + //buf_pool_mutex_exit(buf_pool);
1640 + mutex_exit(&buf_pool->LRU_list_mutex);
1641 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1643 buf_block_free(free_block);
1645 @@ -3799,6 +3976,7 @@
1646 mutex_enter(&block->mutex);
1648 buf_page_init(space, offset, fold, block);
1649 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1651 /* The block must be put to the LRU list */
1652 buf_LRU_add_block(&block->page, FALSE);
1653 @@ -3825,7 +4003,7 @@
1654 the reacquisition of buf_pool->mutex. We also must
1655 defer this operation until after the block descriptor
1656 has been added to buf_pool->LRU and buf_pool->page_hash. */
1657 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1658 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1659 mutex_enter(&block->mutex);
1660 block->page.zip.data = data;
1662 @@ -3843,7 +4021,8 @@
1664 buf_page_set_accessed(&block->page, time_ms);
1666 - buf_pool_mutex_exit(buf_pool);
1667 + //buf_pool_mutex_exit(buf_pool);
1668 + mutex_exit(&buf_pool->LRU_list_mutex);
1670 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1672 @@ -3894,6 +4073,8 @@
1673 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1674 const ibool uncompressed = (buf_page_get_state(bpage)
1675 == BUF_BLOCK_FILE_PAGE);
1676 + ibool have_LRU_mutex = FALSE;
1677 + mutex_t* block_mutex;
1679 ut_a(buf_page_in_file(bpage));
1681 @@ -4027,8 +4208,26 @@
1685 + if (io_type == BUF_IO_WRITE
1686 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1687 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1688 + /* to keep consistency at buf_LRU_insert_zip_clean() */
1689 + have_LRU_mutex = TRUE; /* optimistic */
1692 + if (have_LRU_mutex)
1693 + mutex_enter(&buf_pool->LRU_list_mutex);
1694 + block_mutex = buf_page_get_mutex_enter(bpage);
1695 + ut_a(block_mutex);
1696 + if (io_type == BUF_IO_WRITE
1697 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1698 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1699 + && !have_LRU_mutex) {
1700 + mutex_exit(block_mutex);
1701 + have_LRU_mutex = TRUE;
1704 buf_pool_mutex_enter(buf_pool);
1705 - mutex_enter(buf_page_get_mutex(bpage));
1707 #ifdef UNIV_IBUF_COUNT_DEBUG
1708 if (io_type == BUF_IO_WRITE || uncompressed) {
1709 @@ -4051,6 +4250,7 @@
1710 the x-latch to this OS thread: do not let this confuse you in
1713 + ut_a(!have_LRU_mutex);
1714 ut_ad(buf_pool->n_pend_reads > 0);
1715 buf_pool->n_pend_reads--;
1716 buf_pool->stat.n_pages_read++;
1717 @@ -4068,6 +4268,9 @@
1719 buf_flush_write_complete(bpage);
1721 + if (have_LRU_mutex)
1722 + mutex_exit(&buf_pool->LRU_list_mutex);
1725 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1727 @@ -4090,8 +4293,8 @@
1729 #endif /* UNIV_DEBUG */
1731 - mutex_exit(buf_page_get_mutex(bpage));
1732 buf_pool_mutex_exit(buf_pool);
1733 + mutex_exit(block_mutex);
1736 /*********************************************************************//**
1737 @@ -4108,7 +4311,9 @@
1741 - buf_pool_mutex_enter(buf_pool);
1742 + //buf_pool_mutex_enter(buf_pool);
1743 + mutex_enter(&buf_pool->LRU_list_mutex);
1744 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1746 chunk = buf_pool->chunks;
1748 @@ -4125,7 +4330,9 @@
1752 - buf_pool_mutex_exit(buf_pool);
1753 + //buf_pool_mutex_exit(buf_pool);
1754 + mutex_exit(&buf_pool->LRU_list_mutex);
1755 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1759 @@ -4173,7 +4380,8 @@
1760 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1763 - buf_pool_mutex_enter(buf_pool);
1764 + //buf_pool_mutex_enter(buf_pool);
1765 + mutex_enter(&buf_pool->LRU_list_mutex);
1767 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1768 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1769 @@ -4186,7 +4394,8 @@
1770 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1771 buf_refresh_io_stats(buf_pool);
1773 - buf_pool_mutex_exit(buf_pool);
1774 + //buf_pool_mutex_exit(buf_pool);
1775 + mutex_exit(&buf_pool->LRU_list_mutex);
1778 /*********************************************************************//**
1779 @@ -4228,7 +4437,10 @@
1783 - buf_pool_mutex_enter(buf_pool);
1784 + //buf_pool_mutex_enter(buf_pool);
1785 + mutex_enter(&buf_pool->LRU_list_mutex);
1786 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1787 + /* for keep the new latch order, it cannot validate correctly... */
1789 chunk = buf_pool->chunks;
1791 @@ -4323,7 +4535,7 @@
1792 /* Check clean compressed-only blocks. */
1794 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1795 - b = UT_LIST_GET_NEXT(list, b)) {
1796 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1797 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1798 switch (buf_page_get_io_fix(b)) {
1800 @@ -4354,7 +4566,7 @@
1802 buf_flush_list_mutex_enter(buf_pool);
1803 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1804 - b = UT_LIST_GET_NEXT(list, b)) {
1805 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1806 ut_ad(b->in_flush_list);
1807 ut_a(b->oldest_modification);
1809 @@ -4413,6 +4625,8 @@
1812 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1813 + /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1815 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1816 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1817 (ulong) UT_LIST_GET_LEN(buf_pool->free),
1818 @@ -4423,8 +4637,11 @@
1819 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1820 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1821 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1824 - buf_pool_mutex_exit(buf_pool);
1825 + //buf_pool_mutex_exit(buf_pool);
1826 + mutex_exit(&buf_pool->LRU_list_mutex);
1827 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1829 ut_a(buf_LRU_validate());
1830 ut_a(buf_flush_validate(buf_pool));
1831 @@ -4480,7 +4697,9 @@
1832 index_ids = mem_alloc(size * sizeof *index_ids);
1833 counts = mem_alloc(sizeof(ulint) * size);
1835 - buf_pool_mutex_enter(buf_pool);
1836 + //buf_pool_mutex_enter(buf_pool);
1837 + mutex_enter(&buf_pool->LRU_list_mutex);
1838 + mutex_enter(&buf_pool->free_list_mutex);
1839 buf_flush_list_mutex_enter(buf_pool);
1842 @@ -4549,7 +4768,9 @@
1846 - buf_pool_mutex_exit(buf_pool);
1847 + //buf_pool_mutex_exit(buf_pool);
1848 + mutex_exit(&buf_pool->LRU_list_mutex);
1849 + mutex_exit(&buf_pool->free_list_mutex);
1851 for (i = 0; i < n_found; i++) {
1852 index = dict_index_get_if_in_cache(index_ids[i]);
1853 @@ -4606,7 +4827,7 @@
1855 ulint fixed_pages_number = 0;
1857 - buf_pool_mutex_enter(buf_pool);
1858 + //buf_pool_mutex_enter(buf_pool);
1860 chunk = buf_pool->chunks;
1862 @@ -4640,7 +4861,7 @@
1863 /* Traverse the lists of clean and dirty compressed-only blocks. */
1865 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1866 - b = UT_LIST_GET_NEXT(list, b)) {
1867 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1868 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1869 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1871 @@ -4652,7 +4873,7 @@
1873 buf_flush_list_mutex_enter(buf_pool);
1874 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1875 - b = UT_LIST_GET_NEXT(list, b)) {
1876 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1877 ut_ad(b->in_flush_list);
1879 switch (buf_page_get_state(b)) {
1880 @@ -4678,7 +4899,7 @@
1882 buf_flush_list_mutex_exit(buf_pool);
1883 mutex_exit(&buf_pool->zip_mutex);
1884 - buf_pool_mutex_exit(buf_pool);
1885 + //buf_pool_mutex_exit(buf_pool);
1887 return(fixed_pages_number);
1889 @@ -4772,6 +4993,8 @@
1893 + mutex_enter(&buf_pool->LRU_list_mutex);
1894 + mutex_enter(&buf_pool->free_list_mutex);
1895 buf_pool_mutex_enter(buf_pool);
1896 buf_flush_list_mutex_enter(buf_pool);
1898 @@ -4875,6 +5098,8 @@
1899 buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
1901 buf_refresh_io_stats(buf_pool);
1902 + mutex_exit(&buf_pool->LRU_list_mutex);
1903 + mutex_exit(&buf_pool->free_list_mutex);
1904 buf_pool_mutex_exit(buf_pool);
1907 @@ -4994,11 +5219,13 @@
1911 - buf_pool_mutex_enter(buf_pool);
1912 + //buf_pool_mutex_enter(buf_pool);
1913 + mutex_enter(&buf_pool->free_list_mutex);
1915 len = UT_LIST_GET_LEN(buf_pool->free);
1917 - buf_pool_mutex_exit(buf_pool);
1918 + //buf_pool_mutex_exit(buf_pool);
1919 + mutex_exit(&buf_pool->free_list_mutex);
1923 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1924 --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1925 +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
1928 ut_d(block->page.in_flush_list = TRUE);
1929 block->page.oldest_modification = lsn;
1930 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1931 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1933 #ifdef UNIV_DEBUG_VALGRIND
1935 @@ -373,14 +373,14 @@
1936 > block->page.oldest_modification) {
1937 ut_ad(b->in_flush_list);
1939 - b = UT_LIST_GET_NEXT(list, b);
1940 + b = UT_LIST_GET_NEXT(flush_list, b);
1944 if (prev_b == NULL) {
1945 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1946 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1948 - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1949 + UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1950 prev_b, &block->page);
1954 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1955 //ut_ad(buf_pool_mutex_own(buf_pool));
1957 - //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1958 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1959 //ut_ad(bpage->in_LRU_list);
1961 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1962 @@ -442,14 +442,14 @@
1963 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1966 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1967 - ut_ad(buf_pool_mutex_own(buf_pool));
1968 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1969 + //ut_ad(buf_pool_mutex_own(buf_pool));
1971 - ut_a(buf_page_in_file(bpage));
1972 + //ut_a(buf_page_in_file(bpage));
1973 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1974 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1976 - if (bpage->oldest_modification != 0
1977 + if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1978 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1979 ut_ad(bpage->in_flush_list);
1983 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1985 - ut_ad(buf_pool_mutex_own(buf_pool));
1986 + //ut_ad(buf_pool_mutex_own(buf_pool));
1987 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1988 ut_ad(bpage->in_flush_list);
1990 @@ -498,11 +498,11 @@
1992 case BUF_BLOCK_ZIP_DIRTY:
1993 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1994 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1995 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1996 buf_LRU_insert_zip_clean(bpage);
1998 case BUF_BLOCK_FILE_PAGE:
1999 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2000 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2005 buf_page_t* prev_b = NULL;
2006 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2008 - ut_ad(buf_pool_mutex_own(buf_pool));
2009 + //ut_ad(buf_pool_mutex_own(buf_pool));
2010 /* Must reside in the same buffer pool. */
2011 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2013 @@ -575,18 +575,18 @@
2014 because we assert on in_flush_list in comparison function. */
2015 ut_d(bpage->in_flush_list = FALSE);
2017 - prev = UT_LIST_GET_PREV(list, bpage);
2018 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2019 + prev = UT_LIST_GET_PREV(flush_list, bpage);
2020 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2023 ut_ad(prev->in_flush_list);
2024 UT_LIST_INSERT_AFTER(
2027 buf_pool->flush_list,
2033 buf_pool->flush_list,
2036 @@ -1055,7 +1055,7 @@
2039 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2040 - ut_ad(!buf_pool_mutex_own(buf_pool));
2041 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2044 #ifdef UNIV_LOG_DEBUG
2045 @@ -1069,7 +1069,8 @@
2046 io_fixed and oldest_modification != 0. Thus, it cannot be
2047 relocated in the buffer pool or removed from flush_list or
2049 - ut_ad(!buf_pool_mutex_own(buf_pool));
2050 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2051 + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2052 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2053 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2054 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2055 @@ -1155,12 +1156,18 @@
2056 ibool is_uncompressed;
2058 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2059 - ut_ad(buf_pool_mutex_own(buf_pool));
2060 + //ut_ad(buf_pool_mutex_own(buf_pool));
2061 +#ifdef UNIV_SYNC_DEBUG
2062 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2064 ut_ad(buf_page_in_file(bpage));
2066 block_mutex = buf_page_get_mutex(bpage);
2067 ut_ad(mutex_own(block_mutex));
2069 + buf_pool_mutex_enter(buf_pool);
2070 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2072 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2074 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2075 @@ -1322,14 +1329,16 @@
2077 buf_pool = buf_pool_get(space, i);
2079 - buf_pool_mutex_enter(buf_pool);
2080 + //buf_pool_mutex_enter(buf_pool);
2081 + rw_lock_s_lock(&buf_pool->page_hash_latch);
2083 /* We only want to flush pages from this buffer pool. */
2084 bpage = buf_page_hash_get(buf_pool, space, i);
2088 - buf_pool_mutex_exit(buf_pool);
2089 + //buf_pool_mutex_exit(buf_pool);
2090 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2094 @@ -1341,11 +1350,9 @@
2095 if (flush_type != BUF_FLUSH_LRU
2097 || buf_page_is_old(bpage)) {
2098 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2099 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2101 - mutex_enter(block_mutex);
2103 - if (buf_flush_ready_for_flush(bpage, flush_type)
2104 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2105 && (i == offset || !bpage->buf_fix_count)) {
2106 /* We only try to flush those
2107 neighbors != offset where the buf fix
2108 @@ -1361,11 +1368,12 @@
2109 ut_ad(!buf_pool_mutex_own(buf_pool));
2113 + } else if (block_mutex) {
2114 mutex_exit(block_mutex);
2117 - buf_pool_mutex_exit(buf_pool);
2118 + //buf_pool_mutex_exit(buf_pool);
2119 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2123 @@ -1398,21 +1406,25 @@
2124 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2125 #endif /* UNIV_DEBUG */
2127 - ut_ad(buf_pool_mutex_own(buf_pool));
2128 + //ut_ad(buf_pool_mutex_own(buf_pool));
2129 + ut_ad(flush_type != BUF_FLUSH_LRU
2130 + || mutex_own(&buf_pool->LRU_list_mutex));
2132 - block_mutex = buf_page_get_mutex(bpage);
2133 - mutex_enter(block_mutex);
2134 + block_mutex = buf_page_get_mutex_enter(bpage);
2136 - ut_a(buf_page_in_file(bpage));
2137 + //ut_a(buf_page_in_file(bpage));
2139 - if (buf_flush_ready_for_flush(bpage, flush_type)) {
2140 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2143 buf_pool_t* buf_pool;
2145 buf_pool = buf_pool_from_bpage(bpage);
2147 - buf_pool_mutex_exit(buf_pool);
2148 + //buf_pool_mutex_exit(buf_pool);
2149 + if (flush_type == BUF_FLUSH_LRU) {
2150 + mutex_exit(&buf_pool->LRU_list_mutex);
2153 /* These fields are protected by both the
2154 buffer pool mutex and block mutex. */
2155 @@ -1428,13 +1440,18 @@
2159 - buf_pool_mutex_enter(buf_pool);
2160 + //buf_pool_mutex_enter(buf_pool);
2161 + if (flush_type == BUF_FLUSH_LRU) {
2162 + mutex_enter(&buf_pool->LRU_list_mutex);
2166 + } else if (block_mutex) {
2167 mutex_exit(block_mutex);
2170 - ut_ad(buf_pool_mutex_own(buf_pool));
2171 + //ut_ad(buf_pool_mutex_own(buf_pool));
2172 + ut_ad(flush_type != BUF_FLUSH_LRU
2173 + || mutex_own(&buf_pool->LRU_list_mutex));
2177 @@ -1455,7 +1472,8 @@
2181 - ut_ad(buf_pool_mutex_own(buf_pool));
2182 + //ut_ad(buf_pool_mutex_own(buf_pool));
2183 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2186 /* Start from the end of the list looking for a
2187 @@ -1477,7 +1495,8 @@
2188 should be flushed, we factor in this value. */
2189 buf_lru_flush_page_count += count;
2191 - ut_ad(buf_pool_mutex_own(buf_pool));
2192 + //ut_ad(buf_pool_mutex_own(buf_pool));
2193 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2197 @@ -1505,9 +1524,10 @@
2201 + buf_page_t* prev_bpage = NULL;
2204 - ut_ad(buf_pool_mutex_own(buf_pool));
2205 + //ut_ad(buf_pool_mutex_own(buf_pool));
2207 /* If we have flushed enough, leave the loop */
2209 @@ -1526,6 +1546,7 @@
2212 ut_a(bpage->oldest_modification > 0);
2213 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2216 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2217 @@ -1567,9 +1588,17 @@
2221 - bpage = UT_LIST_GET_PREV(list, bpage);
2222 + bpage = UT_LIST_GET_PREV(flush_list, bpage);
2224 - ut_ad(!bpage || bpage->in_flush_list);
2225 + //ut_ad(!bpage || bpage->in_flush_list);
2226 + if (bpage != prev_bpage) {
2227 + /* the search might warp.. retrying */
2228 + buf_flush_list_mutex_exit(buf_pool);
2232 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2235 buf_flush_list_mutex_exit(buf_pool);
2237 @@ -1578,7 +1607,7 @@
2239 } while (count < min_n && bpage != NULL && len > 0);
2241 - ut_ad(buf_pool_mutex_own(buf_pool));
2242 + //ut_ad(buf_pool_mutex_own(buf_pool));
2246 @@ -1617,13 +1646,15 @@
2247 || sync_thread_levels_empty_gen(TRUE));
2248 #endif /* UNIV_SYNC_DEBUG */
2250 - buf_pool_mutex_enter(buf_pool);
2251 + //buf_pool_mutex_enter(buf_pool);
2253 /* Note: The buffer pool mutex is released and reacquired within
2254 the flush functions. */
2255 switch(flush_type) {
2257 + mutex_enter(&buf_pool->LRU_list_mutex);
2258 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2259 + mutex_exit(&buf_pool->LRU_list_mutex);
2261 case BUF_FLUSH_LIST:
2262 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2263 @@ -1632,7 +1663,7 @@
2267 - buf_pool_mutex_exit(buf_pool);
2268 + //buf_pool_mutex_exit(buf_pool);
2270 buf_flush_buffered_writes();
2272 @@ -1888,7 +1919,7 @@
2274 //buf_pool_mutex_enter(buf_pool);
2276 - buf_pool_mutex_enter(buf_pool);
2277 + mutex_enter(&buf_pool->LRU_list_mutex);
2279 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2281 @@ -1905,15 +1936,15 @@
2282 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2285 - block_mutex = buf_page_get_mutex(bpage);
2287 - mutex_enter(block_mutex);
2288 + block_mutex = buf_page_get_mutex_enter(bpage);
2290 - if (buf_flush_ready_for_replace(bpage)) {
2291 + if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2295 - mutex_exit(block_mutex);
2296 + if (block_mutex) {
2297 + mutex_exit(block_mutex);
2302 @@ -1922,7 +1953,7 @@
2304 //buf_pool_mutex_exit(buf_pool);
2306 - buf_pool_mutex_exit(buf_pool);
2307 + mutex_exit(&buf_pool->LRU_list_mutex);
2309 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2311 @@ -2121,7 +2152,7 @@
2313 ut_ad(buf_flush_list_mutex_own(buf_pool));
2315 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2316 + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2317 ut_ad(ut_list_node_313->in_flush_list));
2319 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2320 @@ -2161,7 +2192,7 @@
2321 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2324 - bpage = UT_LIST_GET_NEXT(list, bpage);
2325 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2327 ut_a(!bpage || om >= bpage->oldest_modification);
2329 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2330 --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2331 +++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2334 buf_LRU_block_free_hashed_page(
2335 /*===========================*/
2336 - buf_block_t* block); /*!< in: block, must contain a file page and
2337 + buf_block_t* block, /*!< in: block, must contain a file page and
2338 be in a state where it can be freed */
2339 + ibool have_page_hash_mutex);
2341 /******************************************************************//**
2342 Determines if the unzip_LRU list should be used for evicting a victim
2343 @@ -154,15 +155,20 @@
2345 buf_LRU_evict_from_unzip_LRU(
2346 /*=========================*/
2347 - buf_pool_t* buf_pool)
2348 + buf_pool_t* buf_pool,
2349 + ibool have_LRU_mutex)
2354 - ut_ad(buf_pool_mutex_own(buf_pool));
2355 + //ut_ad(buf_pool_mutex_own(buf_pool));
2357 + if (!have_LRU_mutex)
2358 + mutex_enter(&buf_pool->LRU_list_mutex);
2359 /* If the unzip_LRU list is empty, we can only use the LRU. */
2360 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2361 + if (!have_LRU_mutex)
2362 + mutex_exit(&buf_pool->LRU_list_mutex);
2366 @@ -171,14 +177,20 @@
2367 decompressed pages in the buffer pool. */
2368 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2369 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2370 + if (!have_LRU_mutex)
2371 + mutex_exit(&buf_pool->LRU_list_mutex);
2375 /* If eviction hasn't started yet, we assume by default
2376 that a workload is disk bound. */
2377 if (buf_pool->freed_page_clock == 0) {
2378 + if (!have_LRU_mutex)
2379 + mutex_exit(&buf_pool->LRU_list_mutex);
2382 + if (!have_LRU_mutex)
2383 + mutex_exit(&buf_pool->LRU_list_mutex);
2385 /* Calculate the average over past intervals, and add the values
2386 of the current interval. */
2387 @@ -246,19 +258,23 @@
2388 page_arr = ut_malloc(
2389 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2391 - buf_pool_mutex_enter(buf_pool);
2392 + //buf_pool_mutex_enter(buf_pool);
2393 + mutex_enter(&buf_pool->LRU_list_mutex);
2397 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2399 while (bpage != NULL) {
2400 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2401 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2402 buf_page_t* prev_bpage;
2404 - mutex_enter(block_mutex);
2405 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2407 + if (!block_mutex) {
2411 ut_a(buf_page_in_file(bpage));
2413 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2414 @@ -287,14 +303,16 @@
2416 /* Array full. We release the buf_pool->mutex to
2417 obey the latching order. */
2418 - buf_pool_mutex_exit(buf_pool);
2419 + //buf_pool_mutex_exit(buf_pool);
2420 + mutex_exit(&buf_pool->LRU_list_mutex);
2422 buf_LRU_drop_page_hash_batch(
2423 id, zip_size, page_arr, num_entries);
2427 - buf_pool_mutex_enter(buf_pool);
2428 + //buf_pool_mutex_enter(buf_pool);
2429 + mutex_enter(&buf_pool->LRU_list_mutex);
2431 mutex_exit(block_mutex);
2437 - buf_pool_mutex_exit(buf_pool);
2438 + //buf_pool_mutex_exit(buf_pool);
2439 + mutex_exit(&buf_pool->LRU_list_mutex);
2441 /* Drop any remaining batch of search hashed pages. */
2442 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2447 - buf_pool_mutex_enter(buf_pool);
2448 + //buf_pool_mutex_enter(buf_pool);
2449 + mutex_enter(&buf_pool->LRU_list_mutex);
2450 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2454 @@ -369,8 +390,16 @@
2458 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2459 - mutex_enter(block_mutex);
2460 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2462 + if (!block_mutex) {
2463 + /* It may be impossible case...
2464 + Something wrong, so will be scan_again */
2466 + all_freed = FALSE;
2468 + goto next_page_no_mutex;
2471 if (bpage->buf_fix_count > 0) {
2477 - buf_pool_mutex_exit(buf_pool);
2478 + //buf_pool_mutex_exit(buf_pool);
2479 + mutex_exit(&buf_pool->LRU_list_mutex);
2480 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2482 zip_size = buf_page_get_zip_size(bpage);
2483 page_no = buf_page_get_page_no(bpage);
2485 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2486 != BUF_BLOCK_ZIP_FREE) {
2487 buf_LRU_block_free_hashed_page((buf_block_t*)
2491 /* The block_mutex should have been
2492 released by buf_LRU_block_remove_hashed_page()
2497 - buf_pool_mutex_exit(buf_pool);
2498 + //buf_pool_mutex_exit(buf_pool);
2499 + mutex_exit(&buf_pool->LRU_list_mutex);
2500 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2503 os_thread_sleep(20000);
2506 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2508 - ut_ad(buf_pool_mutex_own(buf_pool));
2509 + //ut_ad(buf_pool_mutex_own(buf_pool));
2510 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2511 + ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2512 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2514 /* Find the first successor of bpage in the LRU list
2515 @@ -540,17 +575,17 @@
2518 b = UT_LIST_GET_NEXT(LRU, b);
2519 - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2520 + } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2522 /* Insert bpage before b, i.e., after the predecessor of b. */
2524 - b = UT_LIST_GET_PREV(list, b);
2525 + b = UT_LIST_GET_PREV(zip_list, b);
2529 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2530 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2532 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2533 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2537 @@ -563,18 +598,19 @@
2538 buf_LRU_free_from_unzip_LRU_list(
2539 /*=============================*/
2540 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2541 - ulint n_iterations) /*!< in: how many times this has
2542 + ulint n_iterations, /*!< in: how many times this has
2543 been called repeatedly without
2544 result: a high value means that
2545 we should search farther; we will
2546 search n_iterations / 5 of the
2547 unzip_LRU list, or nothing if
2548 n_iterations >= 5 */
2549 + ibool have_LRU_mutex)
2554 - ut_ad(buf_pool_mutex_own(buf_pool));
2555 + //ut_ad(buf_pool_mutex_own(buf_pool));
2557 /* Theoratically it should be much easier to find a victim
2558 from unzip_LRU as we can choose even a dirty block (as we'll
2560 if we have done five iterations so far. */
2562 if (UNIV_UNLIKELY(n_iterations >= 5)
2563 - || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2564 + || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2568 @@ -592,18 +628,25 @@
2569 distance = 100 + (n_iterations
2570 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2573 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2574 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2575 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2577 enum buf_lru_free_block_status freed;
2579 + mutex_enter(&block->mutex);
2580 + if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2581 + || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2582 + mutex_exit(&block->mutex);
2586 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2587 ut_ad(block->in_unzip_LRU_list);
2588 ut_ad(block->page.in_LRU_list);
2590 - mutex_enter(&block->mutex);
2591 - freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2592 + freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2593 mutex_exit(&block->mutex);
2596 @@ -637,21 +680,23 @@
2597 buf_LRU_free_from_common_LRU_list(
2598 /*==============================*/
2599 buf_pool_t* buf_pool,
2600 - ulint n_iterations)
2601 + ulint n_iterations,
2602 /*!< in: how many times this has been called
2603 repeatedly without result: a high value means
2604 that we should search farther; if
2605 n_iterations < 10, then we search
2606 n_iterations / 10 * buf_pool->curr_size
2607 pages from the end of the LRU list */
2608 + ibool have_LRU_mutex)
2613 - ut_ad(buf_pool_mutex_own(buf_pool));
2614 + //ut_ad(buf_pool_mutex_own(buf_pool));
2616 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2619 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2620 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2621 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2622 @@ -659,14 +704,23 @@
2623 enum buf_lru_free_block_status freed;
2625 mutex_t* block_mutex
2626 - = buf_page_get_mutex(bpage);
2627 + = buf_page_get_mutex_enter(bpage);
2629 + if (!block_mutex) {
2633 + if (!bpage->in_LRU_list
2634 + || !buf_page_in_file(bpage)) {
2635 + mutex_exit(block_mutex);
2639 ut_ad(buf_page_in_file(bpage));
2640 ut_ad(bpage->in_LRU_list);
2642 - mutex_enter(block_mutex);
2643 accessed = buf_page_is_accessed(bpage);
2644 - freed = buf_LRU_free_block(bpage, TRUE, NULL);
2645 + freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2646 mutex_exit(block_mutex);
2649 @@ -718,16 +772,23 @@
2650 n_iterations / 5 of the unzip_LRU list. */
2652 ibool freed = FALSE;
2653 + ibool have_LRU_mutex = FALSE;
2655 - buf_pool_mutex_enter(buf_pool);
2656 + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2657 + have_LRU_mutex = TRUE;
2659 + //buf_pool_mutex_enter(buf_pool);
2660 + if (have_LRU_mutex)
2661 + mutex_enter(&buf_pool->LRU_list_mutex);
2663 - freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2664 + freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2667 freed = buf_LRU_free_from_common_LRU_list(
2668 - buf_pool, n_iterations);
2669 + buf_pool, n_iterations, have_LRU_mutex);
2672 + buf_pool_mutex_enter(buf_pool);
2674 buf_pool->LRU_flush_ended = 0;
2675 } else if (buf_pool->LRU_flush_ended > 0) {
2679 buf_pool_mutex_exit(buf_pool);
2680 + if (have_LRU_mutex)
2681 + mutex_exit(&buf_pool->LRU_list_mutex);
2687 buf_pool = buf_pool_from_array(i);
2689 - buf_pool_mutex_enter(buf_pool);
2690 + //buf_pool_mutex_enter(buf_pool);
2691 + mutex_enter(&buf_pool->LRU_list_mutex);
2692 + mutex_enter(&buf_pool->free_list_mutex);
2694 if (!recv_recovery_on
2695 && UT_LIST_GET_LEN(buf_pool->free)
2700 - buf_pool_mutex_exit(buf_pool);
2701 + //buf_pool_mutex_exit(buf_pool);
2702 + mutex_exit(&buf_pool->LRU_list_mutex);
2703 + mutex_exit(&buf_pool->free_list_mutex);
2707 @@ -823,9 +890,10 @@
2711 - ut_ad(buf_pool_mutex_own(buf_pool));
2712 + //ut_ad(buf_pool_mutex_own(buf_pool));
2714 - block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2715 + mutex_enter(&buf_pool->free_list_mutex);
2716 + block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2721 ut_ad(!block->page.in_flush_list);
2722 ut_ad(!block->page.in_LRU_list);
2723 ut_a(!buf_page_in_file(&block->page));
2724 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2725 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2727 + mutex_exit(&buf_pool->free_list_mutex);
2729 mutex_enter(&block->mutex);
2732 ut_ad(buf_pool_from_block(block) == buf_pool);
2734 mutex_exit(&block->mutex);
2736 + mutex_exit(&buf_pool->free_list_mutex);
2741 ibool mon_value_was = FALSE;
2742 ibool started_monitor = FALSE;
2744 - buf_pool_mutex_enter(buf_pool);
2745 + //buf_pool_mutex_enter(buf_pool);
2747 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2748 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2749 @@ -951,8 +1023,10 @@
2751 page_zip_set_size(&block->page.zip, zip_size);
2753 + mutex_enter(&buf_pool->LRU_list_mutex);
2754 block->page.zip.data = buf_buddy_alloc(
2755 - buf_pool, zip_size, &lru);
2756 + buf_pool, zip_size, &lru, FALSE);
2757 + mutex_exit(&buf_pool->LRU_list_mutex);
2759 UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2761 @@ -960,7 +1034,7 @@
2762 block->page.zip.data = NULL;
2765 - buf_pool_mutex_exit(buf_pool);
2766 + //buf_pool_mutex_exit(buf_pool);
2768 if (started_monitor) {
2769 srv_print_innodb_monitor = mon_value_was;
2770 @@ -972,7 +1046,7 @@
2771 /* If no block was in the free list, search from the end of the LRU
2772 list and try to free a block there */
2774 - buf_pool_mutex_exit(buf_pool);
2775 + //buf_pool_mutex_exit(buf_pool);
2777 freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2779 @@ -1058,7 +1132,8 @@
2782 ut_a(buf_pool->LRU_old);
2783 - ut_ad(buf_pool_mutex_own(buf_pool));
2784 + //ut_ad(buf_pool_mutex_own(buf_pool));
2785 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2786 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2787 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2788 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2789 @@ -1124,7 +1199,8 @@
2793 - ut_ad(buf_pool_mutex_own(buf_pool));
2794 + //ut_ad(buf_pool_mutex_own(buf_pool));
2795 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2796 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2798 /* We first initialize all blocks in the LRU list as old and then use
2799 @@ -1159,13 +1235,14 @@
2802 ut_ad(buf_page_in_file(bpage));
2803 - ut_ad(buf_pool_mutex_own(buf_pool));
2804 + //ut_ad(buf_pool_mutex_own(buf_pool));
2805 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2807 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2808 buf_block_t* block = (buf_block_t*) bpage;
2810 ut_ad(block->in_unzip_LRU_list);
2811 - ut_d(block->in_unzip_LRU_list = FALSE);
2812 + block->in_unzip_LRU_list = FALSE;
2814 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2816 @@ -1183,7 +1260,8 @@
2820 - ut_ad(buf_pool_mutex_own(buf_pool));
2821 + //ut_ad(buf_pool_mutex_own(buf_pool));
2822 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2824 ut_a(buf_page_in_file(bpage));
2826 @@ -1260,12 +1338,13 @@
2830 - ut_ad(buf_pool_mutex_own(buf_pool));
2831 + //ut_ad(buf_pool_mutex_own(buf_pool));
2832 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2834 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2836 ut_ad(!block->in_unzip_LRU_list);
2837 - ut_d(block->in_unzip_LRU_list = TRUE);
2838 + block->in_unzip_LRU_list = TRUE;
2841 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2842 @@ -1286,7 +1365,8 @@
2846 - ut_ad(buf_pool_mutex_own(buf_pool));
2847 + //ut_ad(buf_pool_mutex_own(buf_pool));
2848 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2850 ut_a(buf_page_in_file(bpage));
2852 @@ -1337,7 +1417,8 @@
2856 - ut_ad(buf_pool_mutex_own(buf_pool));
2857 + //ut_ad(buf_pool_mutex_own(buf_pool));
2858 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2860 ut_a(buf_page_in_file(bpage));
2861 ut_ad(!bpage->in_LRU_list);
2862 @@ -1416,7 +1497,8 @@
2864 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2866 - ut_ad(buf_pool_mutex_own(buf_pool));
2867 + //ut_ad(buf_pool_mutex_own(buf_pool));
2868 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2871 buf_pool->stat.n_pages_made_young++;
2872 @@ -1458,19 +1540,20 @@
2873 buf_page_t* bpage, /*!< in: block to be freed */
2874 ibool zip, /*!< in: TRUE if should remove also the
2875 compressed page of an uncompressed page */
2876 - ibool* buf_pool_mutex_released)
2877 + ibool* buf_pool_mutex_released,
2878 /*!< in: pointer to a variable that will
2879 be assigned TRUE if buf_pool_mutex
2880 was temporarily released, or NULL */
2881 + ibool have_LRU_mutex)
2883 buf_page_t* b = NULL;
2884 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2885 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2887 - ut_ad(buf_pool_mutex_own(buf_pool));
2888 + //ut_ad(buf_pool_mutex_own(buf_pool));
2889 ut_ad(mutex_own(block_mutex));
2890 ut_ad(buf_page_in_file(bpage));
2891 - ut_ad(bpage->in_LRU_list);
2892 + //ut_ad(bpage->in_LRU_list);
2893 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2894 #if UNIV_WORD_SIZE == 4
2895 /* On 32-bit systems, there is no padding in buf_page_t. On
2896 @@ -1479,7 +1562,7 @@
2897 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2900 - if (!buf_page_can_relocate(bpage)) {
2901 + if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2903 /* Do not free buffer-fixed or I/O-fixed blocks. */
2904 return(BUF_LRU_NOT_FREED);
2905 @@ -1511,15 +1594,15 @@
2906 If it cannot be allocated (without freeing a block
2907 from the LRU list), refuse to free bpage. */
2909 - buf_pool_mutex_exit_forbid(buf_pool);
2910 - b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2911 - buf_pool_mutex_exit_allow(buf_pool);
2912 + //buf_pool_mutex_exit_forbid(buf_pool);
2913 + b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2914 + //buf_pool_mutex_exit_allow(buf_pool);
2916 if (UNIV_UNLIKELY(!b)) {
2917 return(BUF_LRU_CANNOT_RELOCATE);
2920 - memcpy(b, bpage, sizeof *b);
2921 + //memcpy(b, bpage, sizeof *b);
2925 @@ -1530,6 +1613,39 @@
2927 #endif /* UNIV_DEBUG */
2929 + /* not to break latch order, must re-enter block_mutex */
2930 + mutex_exit(block_mutex);
2932 + if (!have_LRU_mutex)
2933 + mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2934 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2935 + mutex_enter(block_mutex);
2937 + /* recheck states of block */
2938 + if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2939 + || !buf_page_can_relocate(bpage)) {
2942 + buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2944 + if (!have_LRU_mutex)
2945 + mutex_exit(&buf_pool->LRU_list_mutex);
2946 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2947 + return(BUF_LRU_NOT_FREED);
2948 + } else if (zip || !bpage->zip.data) {
2949 + if (bpage->oldest_modification)
2951 + } else if (bpage->oldest_modification) {
2952 + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2953 + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2959 + memcpy(b, bpage, sizeof *b);
2962 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2963 != BUF_BLOCK_ZIP_FREE) {
2964 ut_a(bpage->buf_fix_count == 0);
2965 @@ -1546,6 +1662,10 @@
2969 + while (prev_b && !prev_b->in_LRU_list) {
2970 + prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2973 b->state = b->oldest_modification
2974 ? BUF_BLOCK_ZIP_DIRTY
2975 : BUF_BLOCK_ZIP_PAGE;
2976 @@ -1642,7 +1762,9 @@
2977 *buf_pool_mutex_released = TRUE;
2980 - buf_pool_mutex_exit(buf_pool);
2981 + //buf_pool_mutex_exit(buf_pool);
2982 + mutex_exit(&buf_pool->LRU_list_mutex);
2983 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2984 mutex_exit(block_mutex);
2986 /* Remove possible adaptive hash index on the page.
2987 @@ -1674,7 +1796,9 @@
2988 : BUF_NO_CHECKSUM_MAGIC);
2991 - buf_pool_mutex_enter(buf_pool);
2992 + //buf_pool_mutex_enter(buf_pool);
2993 + if (have_LRU_mutex)
2994 + mutex_enter(&buf_pool->LRU_list_mutex);
2995 mutex_enter(block_mutex);
2998 @@ -1684,13 +1808,17 @@
2999 mutex_exit(&buf_pool->zip_mutex);
3002 - buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
3003 + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3005 /* The block_mutex should have been released by
3006 buf_LRU_block_remove_hashed_page() when it returns
3007 BUF_BLOCK_ZIP_FREE. */
3008 ut_ad(block_mutex == &buf_pool->zip_mutex);
3009 mutex_enter(block_mutex);
3011 + if (!have_LRU_mutex)
3012 + mutex_exit(&buf_pool->LRU_list_mutex);
3013 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3016 return(BUF_LRU_FREED);
3017 @@ -1702,13 +1830,14 @@
3019 buf_LRU_block_free_non_file_page(
3020 /*=============================*/
3021 - buf_block_t* block) /*!< in: block, must not contain a file page */
3022 + buf_block_t* block, /*!< in: block, must not contain a file page */
3023 + ibool have_page_hash_mutex)
3026 buf_pool_t* buf_pool = buf_pool_from_block(block);
3029 - ut_ad(buf_pool_mutex_own(buf_pool));
3030 + //ut_ad(buf_pool_mutex_own(buf_pool));
3031 ut_ad(mutex_own(&block->mutex));
3033 switch (buf_block_get_state(block)) {
3034 @@ -1742,18 +1871,21 @@
3036 block->page.zip.data = NULL;
3037 mutex_exit(&block->mutex);
3038 - buf_pool_mutex_exit_forbid(buf_pool);
3039 + //buf_pool_mutex_exit_forbid(buf_pool);
3042 - buf_pool, data, page_zip_get_size(&block->page.zip));
3043 + buf_pool, data, page_zip_get_size(&block->page.zip),
3044 + have_page_hash_mutex);
3046 - buf_pool_mutex_exit_allow(buf_pool);
3047 + //buf_pool_mutex_exit_allow(buf_pool);
3048 mutex_enter(&block->mutex);
3049 page_zip_set_size(&block->page.zip, 0);
3052 - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3053 + mutex_enter(&buf_pool->free_list_mutex);
3054 + UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3055 ut_d(block->page.in_free_list = TRUE);
3056 + mutex_exit(&buf_pool->free_list_mutex);
3058 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3060 @@ -1783,7 +1915,11 @@
3061 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3064 - ut_ad(buf_pool_mutex_own(buf_pool));
3065 + //ut_ad(buf_pool_mutex_own(buf_pool));
3066 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3067 +#ifdef UNIV_SYNC_DEBUG
3068 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3070 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3072 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3073 @@ -1891,7 +2027,9 @@
3075 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3076 mutex_exit(buf_page_get_mutex(bpage));
3077 - buf_pool_mutex_exit(buf_pool);
3078 + //buf_pool_mutex_exit(buf_pool);
3079 + mutex_exit(&buf_pool->LRU_list_mutex);
3080 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3084 @@ -1912,17 +2050,17 @@
3085 ut_a(bpage->zip.data);
3086 ut_a(buf_page_get_zip_size(bpage));
3088 - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3089 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3091 mutex_exit(&buf_pool->zip_mutex);
3092 - buf_pool_mutex_exit_forbid(buf_pool);
3093 + //buf_pool_mutex_exit_forbid(buf_pool);
3096 buf_pool, bpage->zip.data,
3097 - page_zip_get_size(&bpage->zip));
3098 + page_zip_get_size(&bpage->zip), TRUE);
3100 - buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3101 - buf_pool_mutex_exit_allow(buf_pool);
3102 + buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3103 + //buf_pool_mutex_exit_allow(buf_pool);
3105 UNIV_MEM_UNDESC(bpage);
3106 return(BUF_BLOCK_ZIP_FREE);
3107 @@ -1945,13 +2083,13 @@
3108 ut_ad(!bpage->in_flush_list);
3109 ut_ad(!bpage->in_LRU_list);
3110 mutex_exit(&((buf_block_t*) bpage)->mutex);
3111 - buf_pool_mutex_exit_forbid(buf_pool);
3112 + //buf_pool_mutex_exit_forbid(buf_pool);
3116 - page_zip_get_size(&bpage->zip));
3117 + page_zip_get_size(&bpage->zip), TRUE);
3119 - buf_pool_mutex_exit_allow(buf_pool);
3120 + //buf_pool_mutex_exit_allow(buf_pool);
3121 mutex_enter(&((buf_block_t*) bpage)->mutex);
3122 page_zip_set_size(&bpage->zip, 0);
3124 @@ -1977,18 +2115,19 @@
3126 buf_LRU_block_free_hashed_page(
3127 /*===========================*/
3128 - buf_block_t* block) /*!< in: block, must contain a file page and
3129 + buf_block_t* block, /*!< in: block, must contain a file page and
3130 be in a state where it can be freed */
3131 + ibool have_page_hash_mutex)
3134 - buf_pool_t* buf_pool = buf_pool_from_block(block);
3135 - ut_ad(buf_pool_mutex_own(buf_pool));
3136 + //buf_pool_t* buf_pool = buf_pool_from_block(block);
3137 + //ut_ad(buf_pool_mutex_own(buf_pool));
3139 ut_ad(mutex_own(&block->mutex));
3141 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3143 - buf_LRU_block_free_non_file_page(block);
3144 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3147 /**********************************************************************//**
3148 @@ -2015,7 +2154,8 @@
3152 - buf_pool_mutex_enter(buf_pool);
3153 + //buf_pool_mutex_enter(buf_pool);
3154 + mutex_enter(&buf_pool->LRU_list_mutex);
3156 if (ratio != buf_pool->LRU_old_ratio) {
3157 buf_pool->LRU_old_ratio = ratio;
3158 @@ -2027,7 +2167,8 @@
3162 - buf_pool_mutex_exit(buf_pool);
3163 + //buf_pool_mutex_exit(buf_pool);
3164 + mutex_exit(&buf_pool->LRU_list_mutex);
3166 buf_pool->LRU_old_ratio = ratio;
3168 @@ -2124,7 +2265,8 @@
3172 - buf_pool_mutex_enter(buf_pool);
3173 + //buf_pool_mutex_enter(buf_pool);
3174 + mutex_enter(&buf_pool->LRU_list_mutex);
3176 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3178 @@ -2185,16 +2327,22 @@
3180 ut_a(buf_pool->LRU_old_len == old_len);
3182 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3183 + mutex_exit(&buf_pool->LRU_list_mutex);
3184 + mutex_enter(&buf_pool->free_list_mutex);
3186 + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3187 ut_ad(ut_list_node_313->in_free_list));
3189 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3191 - bpage = UT_LIST_GET_NEXT(list, bpage)) {
3192 + bpage = UT_LIST_GET_NEXT(free, bpage)) {
3194 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3197 + mutex_exit(&buf_pool->free_list_mutex);
3198 + mutex_enter(&buf_pool->LRU_list_mutex);
3200 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3201 ut_ad(ut_list_node_313->in_unzip_LRU_list
3202 && ut_list_node_313->page.in_LRU_list));
3203 @@ -2208,7 +2356,8 @@
3204 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3207 - buf_pool_mutex_exit(buf_pool);
3208 + //buf_pool_mutex_exit(buf_pool);
3209 + mutex_exit(&buf_pool->LRU_list_mutex);
3212 /**********************************************************************//**
3213 @@ -2244,7 +2393,8 @@
3214 const buf_page_t* bpage;
3217 - buf_pool_mutex_enter(buf_pool);
3218 + //buf_pool_mutex_enter(buf_pool);
3219 + mutex_enter(&buf_pool->LRU_list_mutex);
3221 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3223 @@ -2301,7 +2451,8 @@
3224 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3227 - buf_pool_mutex_exit(buf_pool);
3228 + //buf_pool_mutex_exit(buf_pool);
3229 + mutex_exit(&buf_pool->LRU_list_mutex);
3232 /**********************************************************************//**
3233 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3234 --- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3235 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3240 + buf_pool_mutex_exit(buf_pool);
3242 /* Check that almost all pages in the area have been accessed; if
3243 offset == low, the accesses must be in a descending order, otherwise,
3248 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3249 for (i = low; i < high; i++) {
3250 bpage = buf_page_hash_get(buf_pool, space, i);
3254 if (fail_count > threshold) {
3255 /* Too many failures: return */
3256 - buf_pool_mutex_exit(buf_pool);
3257 + //buf_pool_mutex_exit(buf_pool);
3258 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3263 bpage = buf_page_hash_get(buf_pool, space, offset);
3265 if (bpage == NULL) {
3266 - buf_pool_mutex_exit(buf_pool);
3267 + //buf_pool_mutex_exit(buf_pool);
3268 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3273 pred_offset = fil_page_get_prev(frame);
3274 succ_offset = fil_page_get_next(frame);
3276 - buf_pool_mutex_exit(buf_pool);
3277 + //buf_pool_mutex_exit(buf_pool);
3278 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3280 if ((offset == low) && (succ_offset == offset + 1)) {
3282 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3283 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3284 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
3285 @@ -250,6 +250,10 @@
3286 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3287 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3288 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3289 + {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3290 + {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3291 + {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3292 + {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3293 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3294 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3295 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3297 {&archive_lock_key, "archive_lock", 0},
3298 # endif /* UNIV_LOG_ARCHIVE */
3299 {&btr_search_latch_key, "btr_search_latch", 0},
3300 + {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3301 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3302 {&buf_block_lock_key, "buf_block_lock", 0},
3303 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3304 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3305 --- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3306 +++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
3307 @@ -1725,7 +1725,8 @@
3309 buf_pool = buf_pool_from_array(i);
3311 - buf_pool_mutex_enter(buf_pool);
3312 + //buf_pool_mutex_enter(buf_pool);
3313 + mutex_enter(&buf_pool->zip_free_mutex);
3315 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3316 buf_buddy_stat_t* buddy_stat;
3317 @@ -1755,7 +1756,8 @@
3321 - buf_pool_mutex_exit(buf_pool);
3322 + //buf_pool_mutex_exit(buf_pool);
3323 + mutex_exit(&buf_pool->zip_free_mutex);
3327 diff -ruN a/storage/innobase/handler/innodb_patch_info.h b/storage/innobase/handler/innodb_patch_info.h
3328 --- a/storage/innobase/handler/innodb_patch_info.h 2010-12-03 15:48:03.064995674 +0900
3329 +++ b/storage/innobase/handler/innodb_patch_info.h 2010-12-03 15:48:29.331955850 +0900
3331 {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
3332 {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
3333 {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
3334 +{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
3335 {NULL, NULL, NULL, NULL}
3337 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3338 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3339 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3340 @@ -3700,9 +3700,11 @@
3341 ulint fold = buf_page_address_fold(space, page_no);
3342 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3344 - buf_pool_mutex_enter(buf_pool);
3345 + //buf_pool_mutex_enter(buf_pool);
3346 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3347 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3348 - buf_pool_mutex_exit(buf_pool);
3349 + //buf_pool_mutex_exit(buf_pool);
3350 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3352 if (UNIV_LIKELY_NULL(bpage)) {
3353 /* A buffer pool watch has been set or the
3354 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3355 --- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3356 +++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3358 buf_pool_t* buf_pool,
3359 /*!< buffer pool in which the block resides */
3360 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3361 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3362 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3363 TRUE if storage was allocated from the LRU list
3364 and buf_pool->mutex was temporarily released,
3365 or NULL if the LRU list should not be used */
3366 + ibool have_page_hash_mutex)
3367 __attribute__((malloc));
3369 /**********************************************************************//**
3371 /*!< buffer pool in which the block resides */
3372 void* buf, /*!< in: block to be freed, must not be
3373 pointed to by the buffer pool */
3374 - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3375 + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3376 + ibool have_page_hash_mutex)
3377 __attribute__((nonnull));
3380 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3381 --- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3382 +++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3384 /*!< in: buffer pool in which the page resides */
3385 ulint i, /*!< in: index of buf_pool->zip_free[],
3386 or BUF_BUDDY_SIZES */
3387 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3388 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3389 TRUE if storage was allocated from the LRU list
3390 and buf_pool->mutex was temporarily released,
3391 or NULL if the LRU list should not be used */
3392 + ibool have_page_hash_mutex)
3393 __attribute__((malloc));
3395 /**********************************************************************//**
3397 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3398 void* buf, /*!< in: block to be freed, must not be
3399 pointed to by the buffer pool */
3400 - ulint i) /*!< in: index of buf_pool->zip_free[],
3401 + ulint i, /*!< in: index of buf_pool->zip_free[],
3402 or BUF_BUDDY_SIZES */
3403 + ibool have_page_hash_mutex)
3404 __attribute__((nonnull));
3406 /**********************************************************************//**
3407 @@ -102,16 +104,17 @@
3409 ulint size, /*!< in: block size, up to
3411 - ibool* lru) /*!< in: pointer to a variable
3412 + ibool* lru, /*!< in: pointer to a variable
3413 that will be assigned TRUE if
3414 storage was allocated from the
3415 LRU list and buf_pool->mutex was
3416 temporarily released, or NULL if
3417 the LRU list should not be used */
3418 + ibool have_page_hash_mutex)
3420 - ut_ad(buf_pool_mutex_own(buf_pool));
3421 + //ut_ad(buf_pool_mutex_own(buf_pool));
3423 - return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3424 + return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3427 /**********************************************************************//**
3428 @@ -123,12 +126,25 @@
3429 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3430 void* buf, /*!< in: block to be freed, must not be
3431 pointed to by the buffer pool */
3432 - ulint size) /*!< in: block size, up to
3433 + ulint size, /*!< in: block size, up to
3435 + ibool have_page_hash_mutex)
3437 - ut_ad(buf_pool_mutex_own(buf_pool));
3438 + //ut_ad(buf_pool_mutex_own(buf_pool));
3440 + if (!have_page_hash_mutex) {
3441 + mutex_enter(&buf_pool->LRU_list_mutex);
3442 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3445 - buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3446 + mutex_enter(&buf_pool->zip_free_mutex);
3447 + buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3448 + mutex_exit(&buf_pool->zip_free_mutex);
3450 + if (!have_page_hash_mutex) {
3451 + mutex_exit(&buf_pool->LRU_list_mutex);
3452 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3456 #ifdef UNIV_MATERIALIZE
3457 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3458 --- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3459 +++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
3460 @@ -132,6 +132,20 @@
3461 /*==========================*/
3463 /********************************************************************//**
3467 +buf_pool_page_hash_x_lock_all(void);
3468 +/*================================*/
3470 +/********************************************************************//**
3474 +buf_pool_page_hash_x_unlock_all(void);
3475 +/*==================================*/
3477 +/********************************************************************//**
3478 Creates the buffer pool.
3479 @return own: buf_pool object, NULL if not enough memory or error */
3481 @@ -761,6 +775,15 @@
3482 const buf_page_t* bpage) /*!< in: pointer to control block */
3483 __attribute__((pure));
3485 +/*************************************************************************
3486 +Gets the mutex of a block and enter the mutex with consistency. */
3489 +buf_page_get_mutex_enter(
3490 +/*=========================*/
3491 + const buf_page_t* bpage) /*!< in: pointer to control block */
3492 + __attribute__((pure));
3494 /*********************************************************************//**
3495 Get the flush type of a page.
3496 @return flush type */
3497 @@ -1227,7 +1250,7 @@
3498 All these are protected by buf_pool_mutex. */
3501 - UT_LIST_NODE_T(buf_page_t) list;
3502 + /* UT_LIST_NODE_T(buf_page_t) list; */
3503 /*!< based on state, this is a
3504 list node, protected either by
3505 buf_pool_mutex or by
3506 @@ -1254,6 +1277,10 @@
3507 BUF_BLOCK_REMOVE_HASH or
3508 BUF_BLOCK_READY_IN_USE. */
3510 + /* resplit for optimistic use */
3511 + UT_LIST_NODE_T(buf_page_t) free;
3512 + UT_LIST_NODE_T(buf_page_t) flush_list;
3513 + UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3515 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3516 when flush_list_mutex is free, the
3517 @@ -1347,11 +1374,11 @@
3518 a block is in the unzip_LRU list
3519 if page.state == BUF_BLOCK_FILE_PAGE
3520 and page.zip.data != NULL */
3522 +//#ifdef UNIV_DEBUG
3523 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3524 decompressed LRU list;
3525 used in debugging */
3526 -#endif /* UNIV_DEBUG */
3527 +//#endif /* UNIV_DEBUG */
3528 mutex_t mutex; /*!< mutex protecting this block:
3529 state (also protected by the buffer
3530 pool mutex), io_fix, buf_fix_count,
3531 @@ -1517,6 +1544,11 @@
3532 pool instance, protects compressed
3533 only pages (of type buf_page_t, not
3535 + mutex_t LRU_list_mutex;
3536 + rw_lock_t page_hash_latch;
3537 + mutex_t free_list_mutex;
3538 + mutex_t zip_free_mutex;
3539 + mutex_t zip_hash_mutex;
3540 ulint instance_no; /*!< Array index of this buffer
3542 ulint old_pool_size; /*!< Old pool size in bytes */
3543 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3544 --- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3545 +++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3547 case BUF_BLOCK_ZIP_FREE:
3548 /* This is a free page in buf_pool->zip_free[].
3549 Such pages should only be accessed by the buddy allocator. */
3551 + /* ut_error; */ /* optimistic */
3553 case BUF_BLOCK_ZIP_PAGE:
3554 case BUF_BLOCK_ZIP_DIRTY:
3555 @@ -275,9 +275,14 @@
3557 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3559 + if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3560 + /* TODO: this code is the interim. should be confirmed later. */
3561 + return(&buf_pool->zip_mutex);
3564 switch (buf_page_get_state(bpage)) {
3565 case BUF_BLOCK_ZIP_FREE:
3567 + /* ut_error; */ /* optimistic */
3569 case BUF_BLOCK_ZIP_PAGE:
3570 case BUF_BLOCK_ZIP_DIRTY:
3571 @@ -287,6 +292,28 @@
3575 +/*************************************************************************
3576 +Gets the mutex of a block and enter the mutex with consistency. */
3579 +buf_page_get_mutex_enter(
3580 +/*=========================*/
3581 + const buf_page_t* bpage) /*!< in: pointer to control block */
3583 + mutex_t* block_mutex;
3586 + block_mutex = buf_page_get_mutex(bpage);
3588 + return block_mutex;
3590 + mutex_enter(block_mutex);
3591 + if (block_mutex == buf_page_get_mutex(bpage))
3592 + return block_mutex;
3593 + mutex_exit(block_mutex);
3597 /*********************************************************************//**
3598 Get the flush type of a page.
3599 @return flush type */
3601 enum buf_io_fix io_fix) /*!< in: io_fix state */
3604 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3605 - ut_ad(buf_pool_mutex_own(buf_pool));
3606 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3607 + //ut_ad(buf_pool_mutex_own(buf_pool));
3609 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3611 @@ -414,14 +441,14 @@
3612 const buf_page_t* bpage) /*!< control block being relocated */
3615 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3616 - ut_ad(buf_pool_mutex_own(buf_pool));
3617 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3618 + //ut_ad(buf_pool_mutex_own(buf_pool));
3620 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3621 ut_ad(buf_page_in_file(bpage));
3622 - ut_ad(bpage->in_LRU_list);
3623 + //ut_ad(bpage->in_LRU_list);
3625 - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3626 + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3627 && bpage->buf_fix_count == 0);
3631 const buf_page_t* bpage) /*!< in: control block */
3634 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3635 - ut_ad(buf_pool_mutex_own(buf_pool));
3636 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3637 + //ut_ad(buf_pool_mutex_own(buf_pool));
3639 ut_ad(buf_page_in_file(bpage));
3642 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3643 #endif /* UNIV_DEBUG */
3644 ut_a(buf_page_in_file(bpage));
3645 - ut_ad(buf_pool_mutex_own(buf_pool));
3646 + //ut_ad(buf_pool_mutex_own(buf_pool));
3647 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3648 ut_ad(bpage->in_LRU_list);
3650 #ifdef UNIV_LRU_DEBUG
3651 @@ -503,9 +531,10 @@
3652 ulint time_ms) /*!< in: ut_time_ms() */
3655 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3656 - ut_ad(buf_pool_mutex_own(buf_pool));
3657 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3658 + //ut_ad(buf_pool_mutex_own(buf_pool));
3660 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3661 ut_a(buf_page_in_file(bpage));
3663 if (!bpage->access_time) {
3664 @@ -719,19 +748,19 @@
3666 buf_block_t* block) /*!< in, own: block to be freed */
3668 - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3669 + //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3671 - buf_pool_mutex_enter(buf_pool);
3672 + //buf_pool_mutex_enter(buf_pool);
3674 mutex_enter(&block->mutex);
3676 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3678 - buf_LRU_block_free_non_file_page(block);
3679 + buf_LRU_block_free_non_file_page(block, FALSE);
3681 mutex_exit(&block->mutex);
3683 - buf_pool_mutex_exit(buf_pool);
3684 + //buf_pool_mutex_exit(buf_pool);
3686 #endif /* !UNIV_HOTBACKUP */
3688 @@ -779,17 +808,17 @@
3692 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
3694 - mutex_enter(block_mutex);
3695 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3697 - if (buf_page_in_file(bpage)) {
3698 + if (block_mutex && buf_page_in_file(bpage)) {
3699 lsn = bpage->newest_modification;
3704 - mutex_exit(block_mutex);
3705 + if (block_mutex) {
3706 + mutex_exit(block_mutex);
3712 #ifdef UNIV_SYNC_DEBUG
3713 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3715 - ut_ad((buf_pool_mutex_own(buf_pool)
3716 + ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3717 && (block->page.buf_fix_count == 0))
3718 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3719 #endif /* UNIV_SYNC_DEBUG */
3720 @@ -962,7 +991,11 @@
3724 - ut_ad(buf_pool_mutex_own(buf_pool));
3725 + //ut_ad(buf_pool_mutex_own(buf_pool));
3726 +#ifdef UNIV_SYNC_DEBUG
3727 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3728 + || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3730 ut_ad(fold == buf_page_address_fold(space, offset));
3732 /* Look for the page in the hash table */
3733 @@ -1047,11 +1080,13 @@
3734 const buf_page_t* bpage;
3735 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3737 - buf_pool_mutex_enter(buf_pool);
3738 + //buf_pool_mutex_enter(buf_pool);
3739 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3741 bpage = buf_page_hash_get(buf_pool, space, offset);
3743 - buf_pool_mutex_exit(buf_pool);
3744 + //buf_pool_mutex_exit(buf_pool);
3745 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3747 return(bpage != NULL);
3749 @@ -1179,4 +1214,38 @@
3750 buf_pool_mutex_exit(buf_pool);
3754 +/********************************************************************//**
3758 +buf_pool_page_hash_x_lock_all(void)
3759 +/*===============================*/
3763 + for (i = 0; i < srv_buf_pool_instances; i++) {
3764 + buf_pool_t* buf_pool;
3766 + buf_pool = buf_pool_from_array(i);
3767 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3771 +/********************************************************************//**
3775 +buf_pool_page_hash_x_unlock_all(void)
3776 +/*=================================*/
3780 + for (i = 0; i < srv_buf_pool_instances; i++) {
3781 + buf_pool_t* buf_pool;
3783 + buf_pool = buf_pool_from_array(i);
3784 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3787 #endif /* !UNIV_HOTBACKUP */
3788 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3789 --- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3790 +++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
3791 @@ -113,10 +113,11 @@
3792 buf_page_t* bpage, /*!< in: block to be freed */
3793 ibool zip, /*!< in: TRUE if should remove also the
3794 compressed page of an uncompressed page */
3795 - ibool* buf_pool_mutex_released);
3796 + ibool* buf_pool_mutex_released,
3797 /*!< in: pointer to a variable that will
3798 be assigned TRUE if buf_pool->mutex
3799 was temporarily released, or NULL */
3800 + ibool have_LRU_mutex);
3801 /******************************************************************//**
3802 Try to free a replaceable block.
3803 @return TRUE if found and freed */
3806 buf_LRU_block_free_non_file_page(
3807 /*=============================*/
3808 - buf_block_t* block); /*!< in: block, must not contain a file page */
3809 + buf_block_t* block, /*!< in: block, must not contain a file page */
3810 + ibool have_page_hash_mutex);
3811 /******************************************************************//**
3812 Adds a block to the LRU list. */
3814 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3815 --- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3816 +++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3818 extern mysql_pfs_key_t archive_lock_key;
3819 # endif /* UNIV_LOG_ARCHIVE */
3820 extern mysql_pfs_key_t btr_search_latch_key;
3821 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3822 extern mysql_pfs_key_t buf_block_lock_key;
3823 # ifdef UNIV_SYNC_DEBUG
3824 extern mysql_pfs_key_t buf_block_debug_latch_key;
3825 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3826 --- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3827 +++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3829 extern mysql_pfs_key_t buffer_block_mutex_key;
3830 extern mysql_pfs_key_t buf_pool_mutex_key;
3831 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3832 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3833 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3834 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3835 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3836 extern mysql_pfs_key_t cache_last_read_mutex_key;
3837 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3838 extern mysql_pfs_key_t dict_sys_mutex_key;
3840 #define SYNC_TRX_LOCK_HEAP 298
3841 #define SYNC_TRX_SYS_HEADER 290
3842 #define SYNC_LOG 170
3843 -#define SYNC_LOG_FLUSH_ORDER 147
3844 +#define SYNC_LOG_FLUSH_ORDER 156
3845 #define SYNC_RECV 168
3846 #define SYNC_WORK_QUEUE 162
3847 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3848 @@ -671,8 +675,13 @@
3849 SYNC_SEARCH_SYS, as memory allocation
3850 can call routines there! Otherwise
3851 the level is SYNC_MEM_HASH. */
3852 +#define SYNC_BUF_LRU_LIST 158
3853 +#define SYNC_BUF_PAGE_HASH 157
3854 +#define SYNC_BUF_BLOCK 155 /* Block mutex */
3855 +#define SYNC_BUF_FREE_LIST 153
3856 +#define SYNC_BUF_ZIP_FREE 152
3857 +#define SYNC_BUF_ZIP_HASH 151
3858 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3859 -#define SYNC_BUF_BLOCK 146 /* Block mutex */
3860 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3861 #define SYNC_DOUBLEWRITE 140
3862 #define SYNC_ANY_LATCH 135
3864 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3865 when atomic operations are not enabled */
3867 - ulint waiters; /*!< This ulint is set to 1 if there are (or
3868 + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3869 may be) threads waiting in the global wait
3870 array for this mutex to be released.
3871 Otherwise, this is 0. */
3872 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3873 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3874 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
3875 @@ -3065,7 +3065,7 @@
3876 level += log_sys->max_checkpoint_age
3877 - (lsn - oldest_modification);
3879 - bpage = UT_LIST_GET_NEXT(list, bpage);
3880 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3884 @@ -3150,7 +3150,7 @@
3888 - bpage = UT_LIST_GET_NEXT(list, bpage);
3889 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3893 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3894 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3895 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3897 mutex->lock_word = 0;
3899 mutex->event = os_event_create(NULL);
3900 - mutex_set_waiters(mutex, 0);
3901 + mutex->waiters = 0;
3903 mutex->magic_n = MUTEX_MAGIC_N;
3904 #endif /* UNIV_DEBUG */
3905 @@ -444,6 +444,15 @@
3906 mutex_t* mutex, /*!< in: mutex */
3907 ulint n) /*!< in: value to set */
3909 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3913 + os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3915 + os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3918 volatile ulint* ptr; /* declared volatile to ensure that
3919 the value is stored to memory */
3923 *ptr = n; /* Here we assume that the write of a single
3924 word in memory is atomic */
3928 /******************************************************************//**
3929 @@ -1193,7 +1203,12 @@
3933 + case SYNC_BUF_LRU_LIST:
3934 case SYNC_BUF_FLUSH_LIST:
3935 + case SYNC_BUF_PAGE_HASH:
3936 + case SYNC_BUF_FREE_LIST:
3937 + case SYNC_BUF_ZIP_FREE:
3938 + case SYNC_BUF_ZIP_HASH:
3940 /* We can have multiple mutexes of this type therefore we
3941 can only check whether the greater than condition holds. */
3942 @@ -1211,7 +1226,8 @@
3943 buffer block (block->mutex or buf_pool_zip_mutex). */
3944 if (!sync_thread_levels_g(array, level, FALSE)) {
3945 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3946 - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3947 + /* the exact rule is not fixed yet, for now */
3948 + //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));