1 # name : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
15 - buf_pool_mutex_enter(buf_pool);
16 + //buf_pool_mutex_enter(buf_pool);
17 + mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
20 /* Only free the block if it is still allocated to
21 @@ -4050,17 +4051,22 @@
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
25 - if (buf_LRU_free_block(&block->page, all, NULL)
26 + if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
28 - && all && block->page.zip.data) {
29 + && all && block->page.zip.data
30 + /* Now, buf_LRU_free_block() may release mutex temporarily */
31 + && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32 + && buf_block_get_space(block) == space
33 + && buf_block_get_page_no(block) == page_no) {
34 /* Attempt to deallocate the uncompressed page
35 if the whole block cannot be deallocted. */
37 - buf_LRU_free_block(&block->page, FALSE, NULL);
38 + buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
42 - buf_pool_mutex_exit(buf_pool);
43 + //buf_pool_mutex_exit(buf_pool);
44 + mutex_exit(&buf_pool->LRU_list_mutex);
45 mutex_exit(&block->mutex);
48 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49 --- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
50 +++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
54 rw_lock_x_lock(&btr_search_latch);
55 - buf_pool_mutex_enter_all();
56 + //buf_pool_mutex_enter_all();
58 table = btr_search_sys->hash_index;
62 buf_pool = buf_pool_from_array(j);
64 + mutex_enter(&buf_pool->LRU_list_mutex);
66 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
68 while (bpage != NULL) {
69 @@ -1301,9 +1303,11 @@
71 bpage = UT_LIST_GET_PREV(LRU, bpage);
74 + mutex_exit(&buf_pool->LRU_list_mutex);
77 - buf_pool_mutex_exit_all();
78 + //buf_pool_mutex_exit_all();
79 rw_lock_x_unlock(&btr_search_latch);
81 if (UNIV_LIKELY_NULL(heap)) {
83 rec_offs_init(offsets_);
85 rw_lock_x_lock(&btr_search_latch);
86 - buf_pool_mutex_enter_all();
87 + buf_pool_page_hash_x_lock_all();
89 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
91 @@ -1904,11 +1908,11 @@
92 /* We release btr_search_latch every once in a while to
93 give other queries a chance to run. */
94 if ((i != 0) && ((i % chunk_size) == 0)) {
95 - buf_pool_mutex_exit_all();
96 + buf_pool_page_hash_x_unlock_all();
97 rw_lock_x_unlock(&btr_search_latch);
99 rw_lock_x_lock(&btr_search_latch);
100 - buf_pool_mutex_enter_all();
101 + buf_pool_page_hash_x_lock_all();
104 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
105 @@ -2019,11 +2023,11 @@
106 /* We release btr_search_latch every once in a while to
107 give other queries a chance to run. */
109 - buf_pool_mutex_exit_all();
110 + buf_pool_page_hash_x_unlock_all();
111 rw_lock_x_unlock(&btr_search_latch);
113 rw_lock_x_lock(&btr_search_latch);
114 - buf_pool_mutex_enter_all();
115 + buf_pool_page_hash_x_lock_all();
118 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
119 @@ -2031,7 +2035,7 @@
123 - buf_pool_mutex_exit_all();
124 + buf_pool_page_hash_x_unlock_all();
125 rw_lock_x_unlock(&btr_search_latch);
126 if (UNIV_LIKELY_NULL(heap)) {
128 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
129 --- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
130 +++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
132 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
133 #endif /* UNIV_DEBUG_VALGRIND */
135 - ut_ad(buf_pool_mutex_own(buf_pool));
136 + //ut_ad(buf_pool_mutex_own(buf_pool));
137 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
138 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
139 ut_ad(buf_pool->zip_free[i].start != bpage);
140 - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
141 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
143 #ifdef UNIV_DEBUG_VALGRIND
144 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
146 buf_pool->zip_free[] */
148 #ifdef UNIV_DEBUG_VALGRIND
149 - buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
150 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
151 + buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
152 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
154 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
155 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
157 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
158 #endif /* UNIV_DEBUG_VALGRIND */
160 - ut_ad(buf_pool_mutex_own(buf_pool));
161 + //ut_ad(buf_pool_mutex_own(buf_pool));
162 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
163 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
164 - UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
165 + UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
167 #ifdef UNIV_DEBUG_VALGRIND
168 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
169 @@ -128,12 +130,13 @@
173 - ut_ad(buf_pool_mutex_own(buf_pool));
174 + //ut_ad(buf_pool_mutex_own(buf_pool));
175 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
176 ut_a(i < BUF_BUDDY_SIZES);
178 #ifndef UNIV_DEBUG_VALGRIND
179 /* Valgrind would complain about accessing free memory. */
180 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
181 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
182 ut_ad(buf_page_get_state(ut_list_node_313)
183 == BUF_BLOCK_ZIP_FREE)));
184 #endif /* !UNIV_DEBUG_VALGRIND */
185 @@ -177,16 +180,19 @@
186 buf_buddy_block_free(
187 /*=================*/
188 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
189 - void* buf) /*!< in: buffer frame to deallocate */
190 + void* buf, /*!< in: buffer frame to deallocate */
191 + ibool have_page_hash_mutex)
193 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
197 - ut_ad(buf_pool_mutex_own(buf_pool));
198 + //ut_ad(buf_pool_mutex_own(buf_pool));
199 ut_ad(!mutex_own(&buf_pool->zip_mutex));
200 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
202 + mutex_enter(&buf_pool->zip_hash_mutex);
204 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
205 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
206 && bpage->in_zip_hash && !bpage->in_page_hash),
207 @@ -198,12 +204,14 @@
208 ut_d(bpage->in_zip_hash = FALSE);
209 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
211 + mutex_exit(&buf_pool->zip_hash_mutex);
213 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
214 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
216 block = (buf_block_t*) bpage;
217 mutex_enter(&block->mutex);
218 - buf_LRU_block_free_non_file_page(block);
219 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
220 mutex_exit(&block->mutex);
222 ut_ad(buf_pool->buddy_n_frames > 0);
225 buf_pool_t* buf_pool = buf_pool_from_block(block);
226 const ulint fold = BUF_POOL_ZIP_FOLD(block);
227 - ut_ad(buf_pool_mutex_own(buf_pool));
228 + //ut_ad(buf_pool_mutex_own(buf_pool));
229 ut_ad(!mutex_own(&buf_pool->zip_mutex));
230 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
233 ut_ad(!block->page.in_page_hash);
234 ut_ad(!block->page.in_zip_hash);
235 ut_d(block->page.in_zip_hash = TRUE);
237 + mutex_enter(&buf_pool->zip_hash_mutex);
238 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
239 + mutex_exit(&buf_pool->zip_hash_mutex);
241 ut_d(buf_pool->buddy_n_frames++);
244 bpage->state = BUF_BLOCK_ZIP_FREE;
245 #ifndef UNIV_DEBUG_VALGRIND
246 /* Valgrind would complain about accessing free memory. */
247 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
248 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
249 ut_ad(buf_page_get_state(
251 == BUF_BLOCK_ZIP_FREE)));
252 @@ -291,25 +302,29 @@
253 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
254 ulint i, /*!< in: index of buf_pool->zip_free[],
255 or BUF_BUDDY_SIZES */
256 - ibool* lru) /*!< in: pointer to a variable that
257 + ibool* lru, /*!< in: pointer to a variable that
258 will be assigned TRUE if storage was
259 allocated from the LRU list and
260 buf_pool->mutex was temporarily
261 released, or NULL if the LRU list
262 should not be used */
263 + ibool have_page_hash_mutex)
267 - ut_ad(buf_pool_mutex_own(buf_pool));
268 + //ut_ad(buf_pool_mutex_own(buf_pool));
269 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
270 ut_ad(!mutex_own(&buf_pool->zip_mutex));
272 if (i < BUF_BUDDY_SIZES) {
273 /* Try to allocate from the buddy system. */
274 + mutex_enter(&buf_pool->zip_free_mutex);
275 block = buf_buddy_alloc_zip(buf_pool, i);
280 + mutex_exit(&buf_pool->zip_free_mutex);
283 /* Try allocating from the buf_pool->free list. */
284 @@ -326,19 +341,30 @@
287 /* Try replacing an uncompressed page in the buffer pool. */
288 - buf_pool_mutex_exit(buf_pool);
289 + //buf_pool_mutex_exit(buf_pool);
290 + mutex_exit(&buf_pool->LRU_list_mutex);
291 + if (have_page_hash_mutex) {
292 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
294 block = buf_LRU_get_free_block(buf_pool, 0);
296 - buf_pool_mutex_enter(buf_pool);
297 + //buf_pool_mutex_enter(buf_pool);
298 + mutex_enter(&buf_pool->LRU_list_mutex);
299 + if (have_page_hash_mutex) {
300 + rw_lock_x_lock(&buf_pool->page_hash_latch);
304 buf_buddy_block_register(block);
306 + mutex_enter(&buf_pool->zip_free_mutex);
307 block = buf_buddy_alloc_from(
308 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
311 buf_pool->buddy_stat[i].used++;
312 + mutex_exit(&buf_pool->zip_free_mutex);
319 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
321 - ut_ad(buf_pool_mutex_own(buf_pool));
322 + //ut_ad(buf_pool_mutex_own(buf_pool));
323 +#ifdef UNIV_SYNC_DEBUG
324 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
327 switch (buf_page_get_state(bpage)) {
328 case BUF_BLOCK_ZIP_FREE:
330 case BUF_BLOCK_FILE_PAGE:
331 case BUF_BLOCK_MEMORY:
332 case BUF_BLOCK_REMOVE_HASH:
334 + /* ut_error; */ /* optimistic */
335 case BUF_BLOCK_ZIP_DIRTY:
336 /* Cannot relocate dirty pages. */
341 mutex_enter(&buf_pool->zip_mutex);
342 + mutex_enter(&buf_pool->zip_free_mutex);
344 if (!buf_page_can_relocate(bpage)) {
345 mutex_exit(&buf_pool->zip_mutex);
346 + mutex_exit(&buf_pool->zip_free_mutex);
350 + if (bpage != buf_page_hash_get(buf_pool,
351 + bpage->space, bpage->offset)) {
352 + mutex_exit(&buf_pool->zip_mutex);
353 + mutex_exit(&buf_pool->zip_free_mutex);
357 @@ -384,18 +422,19 @@
358 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
360 /* relocate buf_pool->zip_clean */
361 - b = UT_LIST_GET_PREV(list, dpage);
362 - UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
363 + b = UT_LIST_GET_PREV(zip_list, dpage);
364 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
367 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
368 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
370 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
371 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
374 UNIV_MEM_INVALID(bpage, sizeof *bpage);
376 mutex_exit(&buf_pool->zip_mutex);
377 + mutex_exit(&buf_pool->zip_free_mutex);
381 @@ -409,14 +448,16 @@
382 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
383 void* src, /*!< in: block to relocate */
384 void* dst, /*!< in: free block to relocate to */
385 - ulint i) /*!< in: index of
386 + ulint i, /*!< in: index of
387 buf_pool->zip_free[] */
388 + ibool have_page_hash_mutex)
391 const ulint size = BUF_BUDDY_LOW << i;
392 ullint usec = ut_time_us(NULL);
394 - ut_ad(buf_pool_mutex_own(buf_pool));
395 + //ut_ad(buf_pool_mutex_own(buf_pool));
396 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
397 ut_ad(!mutex_own(&buf_pool->zip_mutex));
398 ut_ad(!ut_align_offset(src, size));
399 ut_ad(!ut_align_offset(dst, size));
401 /* This is a compressed page. */
404 + if (!have_page_hash_mutex) {
405 + mutex_exit(&buf_pool->zip_free_mutex);
406 + mutex_enter(&buf_pool->LRU_list_mutex);
407 + rw_lock_x_lock(&buf_pool->page_hash_latch);
410 /* The src block may be split into smaller blocks,
411 some of which may be free. Thus, the
412 mach_read_from_4() calls below may attempt to read
414 added to buf_pool->page_hash yet. Obviously,
415 it cannot be relocated. */
417 + if (!have_page_hash_mutex) {
418 + mutex_enter(&buf_pool->zip_free_mutex);
419 + mutex_exit(&buf_pool->LRU_list_mutex);
420 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
425 @@ -473,18 +525,27 @@
426 For the sake of simplicity, give up. */
427 ut_ad(page_zip_get_size(&bpage->zip) < size);
429 + if (!have_page_hash_mutex) {
430 + mutex_enter(&buf_pool->zip_free_mutex);
431 + mutex_exit(&buf_pool->LRU_list_mutex);
432 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
437 + /* To keep latch order */
438 + if (have_page_hash_mutex)
439 + mutex_exit(&buf_pool->zip_free_mutex);
441 /* The block must have been allocated, but it may
442 contain uninitialized data. */
443 UNIV_MEM_ASSERT_W(src, size);
445 - mutex = buf_page_get_mutex(bpage);
446 + mutex = buf_page_get_mutex_enter(bpage);
448 - mutex_enter(mutex);
449 + mutex_enter(&buf_pool->zip_free_mutex);
451 - if (buf_page_can_relocate(bpage)) {
452 + if (mutex && buf_page_can_relocate(bpage)) {
453 /* Relocate the compressed page. */
454 ut_a(bpage->zip.data == src);
455 memcpy(dst, src, size);
456 @@ -499,10 +560,22 @@
457 buddy_stat->relocated_usec
458 += ut_time_us(NULL) - usec;
461 + if (!have_page_hash_mutex) {
462 + mutex_exit(&buf_pool->LRU_list_mutex);
463 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
469 + if (!have_page_hash_mutex) {
470 + mutex_exit(&buf_pool->LRU_list_mutex);
471 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
477 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
478 /* This must be a buf_page_t object. */
479 #if UNIV_WORD_SIZE == 4
480 @@ -511,10 +584,31 @@
481 about uninitialized pad bytes. */
482 UNIV_MEM_ASSERT_RW(src, size);
485 + mutex_exit(&buf_pool->zip_free_mutex);
487 + if (!have_page_hash_mutex) {
488 + mutex_enter(&buf_pool->LRU_list_mutex);
489 + rw_lock_x_lock(&buf_pool->page_hash_latch);
492 if (buf_buddy_relocate_block(src, dst)) {
493 + mutex_enter(&buf_pool->zip_free_mutex);
495 + if (!have_page_hash_mutex) {
496 + mutex_exit(&buf_pool->LRU_list_mutex);
497 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
503 + mutex_enter(&buf_pool->zip_free_mutex);
505 + if (!have_page_hash_mutex) {
506 + mutex_exit(&buf_pool->LRU_list_mutex);
507 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
512 @@ -529,13 +623,15 @@
513 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
514 void* buf, /*!< in: block to be freed, must not be
515 pointed to by the buffer pool */
516 - ulint i) /*!< in: index of buf_pool->zip_free[],
517 + ulint i, /*!< in: index of buf_pool->zip_free[],
518 or BUF_BUDDY_SIZES */
519 + ibool have_page_hash_mutex)
524 - ut_ad(buf_pool_mutex_own(buf_pool));
525 + //ut_ad(buf_pool_mutex_own(buf_pool));
526 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
527 ut_ad(!mutex_own(&buf_pool->zip_mutex));
528 ut_ad(i <= BUF_BUDDY_SIZES);
529 ut_ad(buf_pool->buddy_stat[i].used > 0);
531 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
533 if (i == BUF_BUDDY_SIZES) {
534 - buf_buddy_block_free(buf_pool, buf);
535 + mutex_exit(&buf_pool->zip_free_mutex);
536 + buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
537 + mutex_enter(&buf_pool->zip_free_mutex);
545 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
546 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
547 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
550 @@ -600,13 +698,13 @@
551 #ifndef UNIV_DEBUG_VALGRIND
553 /* Valgrind would complain about accessing free memory. */
554 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
555 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
556 ut_ad(buf_page_get_state(ut_list_node_313)
557 == BUF_BLOCK_ZIP_FREE)));
558 #endif /* UNIV_DEBUG_VALGRIND */
560 /* The buddy is not free. Is there a free block of this size? */
561 - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
562 + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
565 /* Remove the block from the free list, because a successful
567 buf_buddy_remove_from_free(buf_pool, bpage, i);
569 /* Try to relocate the buddy of buf to the free block. */
570 - if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
571 + if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
573 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
575 @@ -636,14 +734,14 @@
577 (Parts of the buddy can be free in
578 buf_pool->zip_free[j] with j < i.) */
579 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
580 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
581 ut_ad(buf_page_get_state(
583 == BUF_BLOCK_ZIP_FREE
584 && ut_list_node_313 != buddy)));
585 #endif /* !UNIV_DEBUG_VALGRIND */
587 - if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
588 + if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
591 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
592 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
593 --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
594 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
596 #ifdef UNIV_PFS_RWLOCK
597 /* Keys to register buffer block related rwlocks and mutexes with
598 performance schema */
599 +UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
600 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
601 # ifdef UNIV_SYNC_DEBUG
602 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
604 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
605 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
606 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
607 +UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
608 +UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
609 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
610 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
611 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
612 #endif /* UNIV_PFS_MUTEX */
615 block->page.in_zip_hash = FALSE;
616 block->page.in_flush_list = FALSE;
617 block->page.in_free_list = FALSE;
618 - block->in_unzip_LRU_list = FALSE;
619 #endif /* UNIV_DEBUG */
620 block->page.in_LRU_list = FALSE;
621 + block->in_unzip_LRU_list = FALSE;
622 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
623 block->n_pointers = 0;
624 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
626 memset(block->frame, '\0', UNIV_PAGE_SIZE);
628 /* Add the block to the free list */
629 - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
630 + mutex_enter(&buf_pool->free_list_mutex);
631 + UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
633 ut_d(block->page.in_free_list = TRUE);
634 + mutex_exit(&buf_pool->free_list_mutex);
635 ut_ad(buf_pool_from_block(block) == buf_pool);
638 @@ -1038,7 +1045,8 @@
639 buf_chunk_t* chunk = buf_pool->chunks;
642 - ut_ad(buf_pool_mutex_own(buf_pool));
643 + //ut_ad(buf_pool_mutex_own(buf_pool));
644 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
645 for (n = buf_pool->n_chunks; n--; chunk++) {
647 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
648 @@ -1138,7 +1146,7 @@
650 const buf_block_t* block_end;
652 - ut_ad(buf_pool_mutex_own(buf_pool));
653 + //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
655 block_end = chunk->blocks + chunk->size;
657 @@ -1150,8 +1158,10 @@
658 ut_ad(!block->in_unzip_LRU_list);
659 ut_ad(!block->page.in_flush_list);
660 /* Remove the block from the free list. */
661 + mutex_enter(&buf_pool->free_list_mutex);
662 ut_ad(block->page.in_free_list);
663 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
664 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
665 + mutex_exit(&buf_pool->free_list_mutex);
667 /* Free the latches. */
668 mutex_free(&block->mutex);
669 @@ -1208,9 +1218,21 @@
670 ------------------------------- */
671 mutex_create(buf_pool_mutex_key,
672 &buf_pool->mutex, SYNC_BUF_POOL);
673 + mutex_create(buf_pool_LRU_list_mutex_key,
674 + &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
675 + rw_lock_create(buf_pool_page_hash_key,
676 + &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
677 + mutex_create(buf_pool_free_list_mutex_key,
678 + &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
679 + mutex_create(buf_pool_zip_free_mutex_key,
680 + &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
681 + mutex_create(buf_pool_zip_hash_mutex_key,
682 + &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
683 mutex_create(buf_pool_zip_mutex_key,
684 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
686 + mutex_enter(&buf_pool->LRU_list_mutex);
687 + rw_lock_x_lock(&buf_pool->page_hash_latch);
688 buf_pool_mutex_enter(buf_pool);
690 if (buf_pool_size > 0) {
691 @@ -1223,6 +1245,8 @@
695 + mutex_exit(&buf_pool->LRU_list_mutex);
696 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
697 buf_pool_mutex_exit(buf_pool);
700 @@ -1253,6 +1277,8 @@
702 /* All fields are initialized by mem_zalloc(). */
704 + mutex_exit(&buf_pool->LRU_list_mutex);
705 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
706 buf_pool_mutex_exit(buf_pool);
709 @@ -1467,7 +1493,11 @@
711 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
713 - ut_ad(buf_pool_mutex_own(buf_pool));
714 + //ut_ad(buf_pool_mutex_own(buf_pool));
715 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
716 +#ifdef UNIV_SYNC_DEBUG
717 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
719 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
720 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
721 ut_a(bpage->buf_fix_count == 0);
722 @@ -1554,7 +1584,8 @@
725 btr_search_disable(); /* Empty the adaptive hash index again */
726 - buf_pool_mutex_enter(buf_pool);
727 + //buf_pool_mutex_enter(buf_pool);
728 + mutex_enter(&buf_pool->LRU_list_mutex);
731 if (buf_pool->n_chunks <= 1) {
732 @@ -1625,7 +1656,7 @@
734 buf_LRU_make_block_old(&block->page);
736 - } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
737 + } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
741 @@ -1633,7 +1664,8 @@
742 mutex_exit(&block->mutex);
745 - buf_pool_mutex_exit(buf_pool);
746 + //buf_pool_mutex_exit(buf_pool);
747 + mutex_exit(&buf_pool->LRU_list_mutex);
749 /* Request for a flush of the chunk if it helps.
750 Do not flush if there are non-free blocks, since
751 @@ -1683,7 +1715,8 @@
753 buf_pool->old_pool_size = buf_pool->curr_pool_size;
755 - buf_pool_mutex_exit(buf_pool);
756 + //buf_pool_mutex_exit(buf_pool);
757 + mutex_exit(&buf_pool->LRU_list_mutex);
761 @@ -1724,7 +1757,9 @@
762 hash_table_t* zip_hash;
763 hash_table_t* page_hash;
765 - buf_pool_mutex_enter(buf_pool);
766 + //buf_pool_mutex_enter(buf_pool);
767 + mutex_enter(&buf_pool->LRU_list_mutex);
768 + rw_lock_x_lock(&buf_pool->page_hash_latch);
770 /* Free, create, and populate the hash table. */
771 hash_table_free(buf_pool->page_hash);
772 @@ -1765,8 +1800,9 @@
773 All such blocks are either in buf_pool->zip_clean or
774 in buf_pool->flush_list. */
776 + mutex_enter(&buf_pool->zip_mutex);
777 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
778 - b = UT_LIST_GET_NEXT(list, b)) {
779 + b = UT_LIST_GET_NEXT(zip_list, b)) {
780 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
781 ut_ad(!b->in_flush_list);
782 ut_ad(b->in_LRU_list);
783 @@ -1776,10 +1812,11 @@
784 HASH_INSERT(buf_page_t, hash, page_hash,
785 buf_page_address_fold(b->space, b->offset), b);
787 + mutex_exit(&buf_pool->zip_mutex);
789 buf_flush_list_mutex_enter(buf_pool);
790 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
791 - b = UT_LIST_GET_NEXT(list, b)) {
792 + b = UT_LIST_GET_NEXT(flush_list, b)) {
793 ut_ad(b->in_flush_list);
794 ut_ad(b->in_LRU_list);
795 ut_ad(b->in_page_hash);
796 @@ -1806,7 +1843,9 @@
799 buf_flush_list_mutex_exit(buf_pool);
800 - buf_pool_mutex_exit(buf_pool);
801 + //buf_pool_mutex_exit(buf_pool);
802 + mutex_exit(&buf_pool->LRU_list_mutex);
803 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
806 /********************************************************************
807 @@ -1853,21 +1892,32 @@
810 buf_pool_t* buf_pool = buf_pool_get(space, offset);
811 + mutex_t* block_mutex;
813 - ut_ad(buf_pool_mutex_own(buf_pool));
814 + //ut_ad(buf_pool_mutex_own(buf_pool));
816 + rw_lock_x_lock(&buf_pool->page_hash_latch);
817 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
819 + block_mutex = buf_page_get_mutex_enter(bpage);
823 if (UNIV_LIKELY_NULL(bpage)) {
824 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
825 /* The page was loaded meanwhile. */
826 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
829 /* Add to an existing watch. */
830 bpage->buf_fix_count++;
831 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
832 + mutex_exit(block_mutex);
836 + /* buf_pool->watch is protected by zip_mutex for now */
837 + mutex_enter(&buf_pool->zip_mutex);
838 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
839 bpage = &buf_pool->watch[i];
841 @@ -1891,10 +1941,12 @@
842 bpage->space = space;
843 bpage->offset = offset;
844 bpage->buf_fix_count = 1;
846 + bpage->buf_pool_index = buf_pool_index(buf_pool);
847 ut_d(bpage->in_page_hash = TRUE);
848 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
850 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
851 + mutex_exit(&buf_pool->zip_mutex);
853 case BUF_BLOCK_ZIP_PAGE:
854 ut_ad(bpage->in_page_hash);
855 @@ -1912,6 +1964,8 @@
858 /* Fix compiler warning */
859 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
860 + mutex_exit(&buf_pool->zip_mutex);
864 @@ -1941,6 +1995,8 @@
868 + mutex_enter(&buf_pool->LRU_list_mutex);
869 + rw_lock_x_lock(&buf_pool->page_hash_latch);
870 buf_pool_mutex_enter(buf_pool);
871 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
873 @@ -1959,6 +2015,8 @@
874 buf_pool->n_chunks++;
877 + mutex_exit(&buf_pool->LRU_list_mutex);
878 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
879 buf_pool_mutex_exit(buf_pool);
882 @@ -2046,7 +2104,11 @@
884 buf_page_t* watch) /*!< in/out: sentinel for watch */
886 - ut_ad(buf_pool_mutex_own(buf_pool));
887 + //ut_ad(buf_pool_mutex_own(buf_pool));
888 +#ifdef UNIV_SYNC_DEBUG
889 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
891 + ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
893 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
894 ut_d(watch->in_page_hash = FALSE);
895 @@ -2068,28 +2130,31 @@
896 buf_pool_t* buf_pool = buf_pool_get(space, offset);
897 ulint fold = buf_page_address_fold(space, offset);
899 - buf_pool_mutex_enter(buf_pool);
900 + //buf_pool_mutex_enter(buf_pool);
901 + rw_lock_x_lock(&buf_pool->page_hash_latch);
902 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
903 /* The page must exist because buf_pool_watch_set()
904 increments buf_fix_count. */
907 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
908 - mutex_t* mutex = buf_page_get_mutex(bpage);
909 + mutex_t* mutex = buf_page_get_mutex_enter(bpage);
911 - mutex_enter(mutex);
912 ut_a(bpage->buf_fix_count > 0);
913 bpage->buf_fix_count--;
916 + mutex_enter(&buf_pool->zip_mutex);
917 ut_a(bpage->buf_fix_count > 0);
919 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
920 buf_pool_watch_remove(buf_pool, fold, bpage);
922 + mutex_exit(&buf_pool->zip_mutex);
925 - buf_pool_mutex_exit(buf_pool);
926 + //buf_pool_mutex_exit(buf_pool);
927 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
930 /****************************************************************//**
931 @@ -2109,14 +2174,16 @@
932 buf_pool_t* buf_pool = buf_pool_get(space, offset);
933 ulint fold = buf_page_address_fold(space, offset);
935 - buf_pool_mutex_enter(buf_pool);
936 + //buf_pool_mutex_enter(buf_pool);
937 + rw_lock_s_lock(&buf_pool->page_hash_latch);
939 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
940 /* The page must exist because buf_pool_watch_set()
941 increments buf_fix_count. */
943 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
944 - buf_pool_mutex_exit(buf_pool);
945 + //buf_pool_mutex_exit(buf_pool);
946 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
950 @@ -2133,13 +2200,15 @@
952 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
954 - buf_pool_mutex_enter(buf_pool);
955 + //buf_pool_mutex_enter(buf_pool);
956 + mutex_enter(&buf_pool->LRU_list_mutex);
958 ut_a(buf_page_in_file(bpage));
960 buf_LRU_make_block_young(bpage);
962 - buf_pool_mutex_exit(buf_pool);
963 + //buf_pool_mutex_exit(buf_pool);
964 + mutex_exit(&buf_pool->LRU_list_mutex);
967 /********************************************************************//**
968 @@ -2163,14 +2232,20 @@
969 ut_a(buf_page_in_file(bpage));
971 if (buf_page_peek_if_too_old(bpage)) {
972 - buf_pool_mutex_enter(buf_pool);
973 + //buf_pool_mutex_enter(buf_pool);
974 + mutex_enter(&buf_pool->LRU_list_mutex);
975 buf_LRU_make_block_young(bpage);
976 - buf_pool_mutex_exit(buf_pool);
977 + //buf_pool_mutex_exit(buf_pool);
978 + mutex_exit(&buf_pool->LRU_list_mutex);
979 } else if (!access_time) {
980 ulint time_ms = ut_time_ms();
981 - buf_pool_mutex_enter(buf_pool);
982 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
983 + //buf_pool_mutex_enter(buf_pool);
985 buf_page_set_accessed(bpage, time_ms);
986 - buf_pool_mutex_exit(buf_pool);
987 + mutex_exit(block_mutex);
989 + //buf_pool_mutex_exit(buf_pool);
993 @@ -2187,7 +2262,8 @@
995 buf_pool_t* buf_pool = buf_pool_get(space, offset);
997 - buf_pool_mutex_enter(buf_pool);
998 + //buf_pool_mutex_enter(buf_pool);
999 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1001 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1003 @@ -2196,7 +2272,8 @@
1004 block->check_index_page_at_flush = FALSE;
1007 - buf_pool_mutex_exit(buf_pool);
1008 + //buf_pool_mutex_exit(buf_pool);
1009 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1012 /********************************************************************//**
1013 @@ -2215,7 +2292,8 @@
1015 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1017 - buf_pool_mutex_enter(buf_pool);
1018 + //buf_pool_mutex_enter(buf_pool);
1019 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1021 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1023 @@ -2226,7 +2304,8 @@
1024 is_hashed = block->is_hashed;
1027 - buf_pool_mutex_exit(buf_pool);
1028 + //buf_pool_mutex_exit(buf_pool);
1029 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1033 @@ -2248,7 +2327,8 @@
1035 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1037 - buf_pool_mutex_enter(buf_pool);
1038 + //buf_pool_mutex_enter(buf_pool);
1039 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1041 bpage = buf_page_hash_get(buf_pool, space, offset);
1043 @@ -2257,7 +2337,8 @@
1044 bpage->file_page_was_freed = TRUE;
1047 - buf_pool_mutex_exit(buf_pool);
1048 + //buf_pool_mutex_exit(buf_pool);
1049 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1053 @@ -2278,7 +2359,8 @@
1055 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1057 - buf_pool_mutex_enter(buf_pool);
1058 + //buf_pool_mutex_enter(buf_pool);
1059 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1061 bpage = buf_page_hash_get(buf_pool, space, offset);
1063 @@ -2287,7 +2369,8 @@
1064 bpage->file_page_was_freed = FALSE;
1067 - buf_pool_mutex_exit(buf_pool);
1068 + //buf_pool_mutex_exit(buf_pool);
1069 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1073 @@ -2322,8 +2405,9 @@
1074 buf_pool->stat.n_page_gets++;
1077 - buf_pool_mutex_enter(buf_pool);
1078 + //buf_pool_mutex_enter(buf_pool);
1080 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1081 bpage = buf_page_hash_get(buf_pool, space, offset);
1083 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1084 @@ -2332,7 +2416,8 @@
1086 /* Page not in buf_pool: needs to be read from file */
1088 - buf_pool_mutex_exit(buf_pool);
1089 + //buf_pool_mutex_exit(buf_pool);
1090 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1092 buf_read_page(space, zip_size, offset);
1094 @@ -2344,10 +2429,15 @@
1095 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1096 /* There is no compressed page. */
1098 - buf_pool_mutex_exit(buf_pool);
1099 + //buf_pool_mutex_exit(buf_pool);
1100 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1104 + block_mutex = buf_page_get_mutex_enter(bpage);
1106 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1108 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1110 switch (buf_page_get_state(bpage)) {
1111 @@ -2356,19 +2446,19 @@
1112 case BUF_BLOCK_MEMORY:
1113 case BUF_BLOCK_REMOVE_HASH:
1114 case BUF_BLOCK_ZIP_FREE:
1116 + mutex_exit(block_mutex);
1118 case BUF_BLOCK_ZIP_PAGE:
1119 case BUF_BLOCK_ZIP_DIRTY:
1120 - block_mutex = &buf_pool->zip_mutex;
1121 - mutex_enter(block_mutex);
1122 + ut_a(block_mutex == &buf_pool->zip_mutex);
1123 bpage->buf_fix_count++;
1125 case BUF_BLOCK_FILE_PAGE:
1126 - block_mutex = &((buf_block_t*) bpage)->mutex;
1127 - mutex_enter(block_mutex);
1128 + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1130 /* Discard the uncompressed page frame if possible. */
1131 - if (buf_LRU_free_block(bpage, FALSE, NULL)
1132 + if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1135 mutex_exit(block_mutex);
1136 @@ -2387,7 +2477,7 @@
1137 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1138 access_time = buf_page_is_accessed(bpage);
1140 - buf_pool_mutex_exit(buf_pool);
1141 + //buf_pool_mutex_exit(buf_pool);
1143 mutex_exit(block_mutex);
1145 @@ -2696,7 +2786,7 @@
1146 const buf_block_t* block) /*!< in: pointer to block,
1149 - ut_ad(buf_pool_mutex_own(buf_pool));
1150 + //ut_ad(buf_pool_mutex_own(buf_pool));
1152 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1153 /* The pointer should be aligned. */
1154 @@ -2732,6 +2822,7 @@
1158 + mutex_t* block_mutex = NULL;
1159 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1162 @@ -2753,9 +2844,11 @@
1163 fold = buf_page_address_fold(space, offset);
1166 - buf_pool_mutex_enter(buf_pool);
1167 + //buf_pool_mutex_enter(buf_pool);
1170 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1172 /* If the guess is a compressed page descriptor that
1173 has been allocated by buf_buddy_alloc(), it may have
1174 been invalidated by buf_buddy_relocate(). In that
1175 @@ -2764,11 +2857,15 @@
1176 the guess may be pointing to a buffer pool chunk that
1177 has been released when resizing the buffer pool. */
1179 - if (!buf_block_is_uncompressed(buf_pool, block)
1180 + if (!block_mutex) {
1181 + block = guess = NULL;
1182 + } else if (!buf_block_is_uncompressed(buf_pool, block)
1183 || offset != block->page.offset
1184 || space != block->page.space
1185 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1187 + mutex_exit(block_mutex);
1189 block = guess = NULL;
1191 ut_ad(!block->page.in_zip_hash);
1192 @@ -2777,12 +2874,19 @@
1195 if (block == NULL) {
1196 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1197 block = (buf_block_t*) buf_page_hash_get_low(
1198 buf_pool, space, offset, fold);
1200 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1201 + ut_a(block_mutex);
1203 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1207 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1208 + mutex_exit(block_mutex);
1212 @@ -2794,12 +2898,14 @@
1213 space, offset, fold);
1215 if (UNIV_LIKELY_NULL(block)) {
1217 + block_mutex = buf_page_get_mutex((buf_page_t*)block);
1218 + ut_a(block_mutex);
1219 + ut_ad(mutex_own(block_mutex));
1224 - buf_pool_mutex_exit(buf_pool);
1225 + //buf_pool_mutex_exit(buf_pool);
1227 if (mode == BUF_GET_IF_IN_POOL
1228 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1229 @@ -2847,7 +2953,8 @@
1230 /* The page is being read to buffer pool,
1231 but we cannot wait around for the read to
1233 - buf_pool_mutex_exit(buf_pool);
1234 + //buf_pool_mutex_exit(buf_pool);
1235 + mutex_exit(block_mutex);
1239 @@ -2857,38 +2964,49 @@
1242 case BUF_BLOCK_FILE_PAGE:
1243 + if (block_mutex == &buf_pool->zip_mutex) {
1244 + /* it is wrong mutex... */
1245 + mutex_exit(block_mutex);
1250 case BUF_BLOCK_ZIP_PAGE:
1251 case BUF_BLOCK_ZIP_DIRTY:
1252 + ut_ad(block_mutex == &buf_pool->zip_mutex);
1253 bpage = &block->page;
1254 /* Protect bpage->buf_fix_count. */
1255 - mutex_enter(&buf_pool->zip_mutex);
1256 + //mutex_enter(&buf_pool->zip_mutex);
1258 if (bpage->buf_fix_count
1259 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1260 /* This condition often occurs when the buffer
1261 is not buffer-fixed, but I/O-fixed by
1262 buf_page_init_for_read(). */
1263 - mutex_exit(&buf_pool->zip_mutex);
1264 + //mutex_exit(&buf_pool->zip_mutex);
1266 /* The block is buffer-fixed or I/O-fixed.
1268 - buf_pool_mutex_exit(buf_pool);
1269 + //buf_pool_mutex_exit(buf_pool);
1270 + mutex_exit(block_mutex);
1271 os_thread_sleep(WAIT_FOR_READ);
1276 /* Allocate an uncompressed page. */
1277 - buf_pool_mutex_exit(buf_pool);
1278 - mutex_exit(&buf_pool->zip_mutex);
1279 + //buf_pool_mutex_exit(buf_pool);
1280 + //mutex_exit(&buf_pool->zip_mutex);
1281 + mutex_exit(block_mutex);
1283 block = buf_LRU_get_free_block(buf_pool, 0);
1285 + block_mutex = &block->mutex;
1287 - buf_pool_mutex_enter(buf_pool);
1288 - mutex_enter(&block->mutex);
1289 + //buf_pool_mutex_enter(buf_pool);
1290 + mutex_enter(&buf_pool->LRU_list_mutex);
1291 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1292 + mutex_enter(block_mutex);
1295 buf_page_t* hash_bpage;
1296 @@ -2901,35 +3019,47 @@
1297 while buf_pool->mutex was released.
1298 Free the block that was allocated. */
1300 - buf_LRU_block_free_non_file_page(block);
1301 - mutex_exit(&block->mutex);
1302 + buf_LRU_block_free_non_file_page(block, TRUE);
1303 + mutex_exit(block_mutex);
1305 block = (buf_block_t*) hash_bpage;
1307 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1308 + ut_a(block_mutex);
1310 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1311 + mutex_exit(&buf_pool->LRU_list_mutex);
1316 + mutex_enter(&buf_pool->zip_mutex);
1319 (bpage->buf_fix_count
1320 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1322 + mutex_exit(&buf_pool->zip_mutex);
1323 /* The block was buffer-fixed or I/O-fixed
1324 while buf_pool->mutex was not held by this thread.
1325 Free the block that was allocated and try again.
1326 This should be extremely unlikely. */
1328 - buf_LRU_block_free_non_file_page(block);
1329 - mutex_exit(&block->mutex);
1330 + buf_LRU_block_free_non_file_page(block, TRUE);
1331 + //mutex_exit(&block->mutex);
1333 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1334 + mutex_exit(&buf_pool->LRU_list_mutex);
1335 goto wait_until_unfixed;
1338 /* Move the compressed page from bpage to block,
1339 and uncompress it. */
1341 - mutex_enter(&buf_pool->zip_mutex);
1343 buf_relocate(bpage, &block->page);
1345 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1347 buf_block_init_low(block);
1348 block->lock_hash_val = lock_rec_hash(space, offset);
1350 @@ -2938,7 +3068,7 @@
1352 if (buf_page_get_state(&block->page)
1353 == BUF_BLOCK_ZIP_PAGE) {
1354 - UT_LIST_REMOVE(list, buf_pool->zip_clean,
1355 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1357 ut_ad(!block->page.in_flush_list);
1359 @@ -2955,19 +3085,24 @@
1360 /* Insert at the front of unzip_LRU list */
1361 buf_unzip_LRU_add_block(block, FALSE);
1363 + mutex_exit(&buf_pool->LRU_list_mutex);
1365 block->page.buf_fix_count = 1;
1366 buf_block_set_io_fix(block, BUF_IO_READ);
1367 rw_lock_x_lock_func(&block->lock, 0, file, line);
1369 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1371 - mutex_exit(&block->mutex);
1372 + mutex_exit(block_mutex);
1373 mutex_exit(&buf_pool->zip_mutex);
1375 + buf_pool_mutex_enter(buf_pool);
1376 buf_pool->n_pend_unzip++;
1377 + buf_pool_mutex_exit(buf_pool);
1379 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1380 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1382 - buf_pool_mutex_exit(buf_pool);
1383 + //buf_pool_mutex_exit(buf_pool);
1385 /* Decompress the page and apply buffered operations
1386 while not holding buf_pool->mutex or block->mutex. */
1387 @@ -2980,12 +3115,15 @@
1390 /* Unfix and unlatch the block. */
1391 - buf_pool_mutex_enter(buf_pool);
1392 - mutex_enter(&block->mutex);
1393 + //buf_pool_mutex_enter(buf_pool);
1394 + block_mutex = &block->mutex;
1395 + mutex_enter(block_mutex);
1396 block->page.buf_fix_count--;
1397 buf_block_set_io_fix(block, BUF_IO_NONE);
1398 - mutex_exit(&block->mutex);
1400 + buf_pool_mutex_enter(buf_pool);
1401 buf_pool->n_pend_unzip--;
1402 + buf_pool_mutex_exit(buf_pool);
1403 rw_lock_x_unlock(&block->lock);
1406 @@ -3001,7 +3139,7 @@
1408 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1410 - mutex_enter(&block->mutex);
1411 + //mutex_enter(&block->mutex);
1412 #if UNIV_WORD_SIZE == 4
1413 /* On 32-bit systems, there is no padding in buf_page_t. On
1414 other systems, Valgrind could complain about uninitialized pad
1415 @@ -3014,7 +3152,7 @@
1416 /* Try to evict the block from the buffer pool, to use the
1417 insert buffer (change buffer) as much as possible. */
1419 - if (buf_LRU_free_block(&block->page, TRUE, NULL)
1420 + if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
1422 mutex_exit(&block->mutex);
1423 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1424 @@ -3051,13 +3189,14 @@
1426 buf_block_buf_fix_inc(block, file, line);
1428 - mutex_exit(&block->mutex);
1429 + //mutex_exit(&block->mutex);
1431 /* Check if this is the first access to the page */
1433 access_time = buf_page_is_accessed(&block->page);
1435 - buf_pool_mutex_exit(buf_pool);
1436 + //buf_pool_mutex_exit(buf_pool);
1437 + mutex_exit(block_mutex);
1439 buf_page_set_accessed_make_young(&block->page, access_time);
1441 @@ -3290,9 +3429,11 @@
1442 buf_pool = buf_pool_from_block(block);
1444 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1445 - buf_pool_mutex_enter(buf_pool);
1446 + //buf_pool_mutex_enter(buf_pool);
1447 + mutex_enter(&buf_pool->LRU_list_mutex);
1448 buf_LRU_make_block_young(&block->page);
1449 - buf_pool_mutex_exit(buf_pool);
1450 + //buf_pool_mutex_exit(buf_pool);
1451 + mutex_exit(&buf_pool->LRU_list_mutex);
1452 } else if (!buf_page_is_accessed(&block->page)) {
1453 /* Above, we do a dirty read on purpose, to avoid
1454 mutex contention. The field buf_page_t::access_time
1455 @@ -3300,9 +3441,11 @@
1456 field must be protected by mutex, however. */
1457 ulint time_ms = ut_time_ms();
1459 - buf_pool_mutex_enter(buf_pool);
1460 + //buf_pool_mutex_enter(buf_pool);
1461 + mutex_enter(&block->mutex);
1462 buf_page_set_accessed(&block->page, time_ms);
1463 - buf_pool_mutex_exit(buf_pool);
1464 + //buf_pool_mutex_exit(buf_pool);
1465 + mutex_exit(&block->mutex);
1468 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1469 @@ -3369,18 +3512,21 @@
1471 ut_ad(mtr->state == MTR_ACTIVE);
1473 - buf_pool_mutex_enter(buf_pool);
1474 + //buf_pool_mutex_enter(buf_pool);
1475 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1476 block = buf_block_hash_get(buf_pool, space_id, page_no);
1478 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1479 - buf_pool_mutex_exit(buf_pool);
1480 + //buf_pool_mutex_exit(buf_pool);
1481 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1485 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1487 mutex_enter(&block->mutex);
1488 - buf_pool_mutex_exit(buf_pool);
1489 + //buf_pool_mutex_exit(buf_pool);
1490 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1492 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1493 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1494 @@ -3469,7 +3615,10 @@
1495 buf_page_t* hash_page;
1496 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1498 - ut_ad(buf_pool_mutex_own(buf_pool));
1499 + //ut_ad(buf_pool_mutex_own(buf_pool));
1500 +#ifdef UNIV_SYNC_DEBUG
1501 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1503 ut_ad(mutex_own(&(block->mutex)));
1504 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1506 @@ -3498,11 +3647,14 @@
1507 if (UNIV_LIKELY(!hash_page)) {
1508 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1509 /* Preserve the reference count. */
1510 - ulint buf_fix_count = hash_page->buf_fix_count;
1511 + ulint buf_fix_count;
1513 + mutex_enter(&buf_pool->zip_mutex);
1514 + buf_fix_count = hash_page->buf_fix_count;
1515 ut_a(buf_fix_count > 0);
1516 block->page.buf_fix_count += buf_fix_count;
1517 buf_pool_watch_remove(buf_pool, fold, hash_page);
1518 + mutex_exit(&buf_pool->zip_mutex);
1521 "InnoDB: Error: page %lu %lu already found"
1522 @@ -3512,7 +3664,8 @@
1523 (const void*) hash_page, (const void*) block);
1524 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1525 mutex_exit(&block->mutex);
1526 - buf_pool_mutex_exit(buf_pool);
1527 + //buf_pool_mutex_exit(buf_pool);
1528 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1532 @@ -3596,7 +3749,9 @@
1534 fold = buf_page_address_fold(space, offset);
1536 - buf_pool_mutex_enter(buf_pool);
1537 + //buf_pool_mutex_enter(buf_pool);
1538 + mutex_enter(&buf_pool->LRU_list_mutex);
1539 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1541 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1542 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1543 @@ -3605,9 +3760,15 @@
1546 mutex_enter(&block->mutex);
1547 - buf_LRU_block_free_non_file_page(block);
1548 + mutex_exit(&buf_pool->LRU_list_mutex);
1549 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1550 + buf_LRU_block_free_non_file_page(block, FALSE);
1551 mutex_exit(&block->mutex);
1554 + mutex_exit(&buf_pool->LRU_list_mutex);
1555 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1560 @@ -3630,6 +3791,8 @@
1562 buf_page_init(space, offset, fold, block);
1564 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1566 /* The block must be put to the LRU list, to the old blocks */
1567 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1569 @@ -3657,7 +3820,7 @@
1570 been added to buf_pool->LRU and
1571 buf_pool->page_hash. */
1572 mutex_exit(&block->mutex);
1573 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1574 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1575 mutex_enter(&block->mutex);
1576 block->page.zip.data = data;
1578 @@ -3670,6 +3833,7 @@
1579 buf_unzip_LRU_add_block(block, TRUE);
1582 + mutex_exit(&buf_pool->LRU_list_mutex);
1583 mutex_exit(&block->mutex);
1585 /* Defer buf_buddy_alloc() until after the block has
1586 @@ -3681,8 +3845,8 @@
1587 control block (bpage), in order to avoid the
1588 invocation of buf_buddy_relocate_block() on
1589 uninitialized data. */
1590 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1591 - bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1592 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1593 + bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1595 /* Initialize the buf_pool pointer. */
1596 bpage->buf_pool_index = buf_pool_index(buf_pool);
1597 @@ -3701,8 +3865,11 @@
1599 /* The block was added by some other thread. */
1601 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1602 - buf_buddy_free(buf_pool, data, zip_size);
1603 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1604 + buf_buddy_free(buf_pool, data, zip_size, TRUE);
1606 + mutex_exit(&buf_pool->LRU_list_mutex);
1607 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1611 @@ -3746,18 +3913,24 @@
1612 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1615 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1617 /* The block must be put to the LRU list, to the old blocks */
1618 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1619 buf_LRU_insert_zip_clean(bpage);
1621 + mutex_exit(&buf_pool->LRU_list_mutex);
1623 buf_page_set_io_fix(bpage, BUF_IO_READ);
1625 mutex_exit(&buf_pool->zip_mutex);
1628 + buf_pool_mutex_enter(buf_pool);
1629 buf_pool->n_pend_reads++;
1631 buf_pool_mutex_exit(buf_pool);
1633 + //buf_pool_mutex_exit(buf_pool);
1635 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1637 @@ -3799,7 +3972,9 @@
1639 fold = buf_page_address_fold(space, offset);
1641 - buf_pool_mutex_enter(buf_pool);
1642 + //buf_pool_mutex_enter(buf_pool);
1643 + mutex_enter(&buf_pool->LRU_list_mutex);
1644 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1646 block = (buf_block_t*) buf_page_hash_get_low(
1647 buf_pool, space, offset, fold);
1648 @@ -3815,7 +3990,9 @@
1649 #endif /* UNIV_DEBUG_FILE_ACCESSES */
1651 /* Page can be found in buf_pool */
1652 - buf_pool_mutex_exit(buf_pool);
1653 + //buf_pool_mutex_exit(buf_pool);
1654 + mutex_exit(&buf_pool->LRU_list_mutex);
1655 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1657 buf_block_free(free_block);
1659 @@ -3837,6 +4014,7 @@
1660 mutex_enter(&block->mutex);
1662 buf_page_init(space, offset, fold, block);
1663 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1665 /* The block must be put to the LRU list */
1666 buf_LRU_add_block(&block->page, FALSE);
1667 @@ -3863,7 +4041,7 @@
1668 the reacquisition of buf_pool->mutex. We also must
1669 defer this operation until after the block descriptor
1670 has been added to buf_pool->LRU and buf_pool->page_hash. */
1671 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1672 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1673 mutex_enter(&block->mutex);
1674 block->page.zip.data = data;
1676 @@ -3881,7 +4059,8 @@
1678 buf_page_set_accessed(&block->page, time_ms);
1680 - buf_pool_mutex_exit(buf_pool);
1681 + //buf_pool_mutex_exit(buf_pool);
1682 + mutex_exit(&buf_pool->LRU_list_mutex);
1684 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1686 @@ -3932,6 +4111,8 @@
1687 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1688 const ibool uncompressed = (buf_page_get_state(bpage)
1689 == BUF_BLOCK_FILE_PAGE);
1690 + ibool have_LRU_mutex = FALSE;
1691 + mutex_t* block_mutex;
1693 ut_a(buf_page_in_file(bpage));
1695 @@ -4065,8 +4246,26 @@
1699 + if (io_type == BUF_IO_WRITE
1700 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1701 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1702 + /* to keep consistency at buf_LRU_insert_zip_clean() */
1703 + have_LRU_mutex = TRUE; /* optimistic */
1706 + if (have_LRU_mutex)
1707 + mutex_enter(&buf_pool->LRU_list_mutex);
1708 + block_mutex = buf_page_get_mutex_enter(bpage);
1709 + ut_a(block_mutex);
1710 + if (io_type == BUF_IO_WRITE
1711 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1712 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1713 + && !have_LRU_mutex) {
1714 + mutex_exit(block_mutex);
1715 + have_LRU_mutex = TRUE;
1718 buf_pool_mutex_enter(buf_pool);
1719 - mutex_enter(buf_page_get_mutex(bpage));
1721 #ifdef UNIV_IBUF_COUNT_DEBUG
1722 if (io_type == BUF_IO_WRITE || uncompressed) {
1723 @@ -4089,6 +4288,7 @@
1724 the x-latch to this OS thread: do not let this confuse you in
1727 + ut_a(!have_LRU_mutex);
1728 ut_ad(buf_pool->n_pend_reads > 0);
1729 buf_pool->n_pend_reads--;
1730 buf_pool->stat.n_pages_read++;
1731 @@ -4106,6 +4306,9 @@
1733 buf_flush_write_complete(bpage);
1735 + if (have_LRU_mutex)
1736 + mutex_exit(&buf_pool->LRU_list_mutex);
1739 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1741 @@ -4128,8 +4331,8 @@
1743 #endif /* UNIV_DEBUG */
1745 - mutex_exit(buf_page_get_mutex(bpage));
1746 buf_pool_mutex_exit(buf_pool);
1747 + mutex_exit(block_mutex);
1750 /*********************************************************************//**
1751 @@ -4146,7 +4349,9 @@
1755 - buf_pool_mutex_enter(buf_pool);
1756 + //buf_pool_mutex_enter(buf_pool);
1757 + mutex_enter(&buf_pool->LRU_list_mutex);
1758 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1760 chunk = buf_pool->chunks;
1762 @@ -4163,7 +4368,9 @@
1766 - buf_pool_mutex_exit(buf_pool);
1767 + //buf_pool_mutex_exit(buf_pool);
1768 + mutex_exit(&buf_pool->LRU_list_mutex);
1769 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1773 @@ -4211,7 +4418,8 @@
1774 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1777 - buf_pool_mutex_enter(buf_pool);
1778 + //buf_pool_mutex_enter(buf_pool);
1779 + mutex_enter(&buf_pool->LRU_list_mutex);
1781 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1782 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1783 @@ -4224,7 +4432,8 @@
1784 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1785 buf_refresh_io_stats(buf_pool);
1787 - buf_pool_mutex_exit(buf_pool);
1788 + //buf_pool_mutex_exit(buf_pool);
1789 + mutex_exit(&buf_pool->LRU_list_mutex);
1792 /*********************************************************************//**
1793 @@ -4266,7 +4475,10 @@
1797 - buf_pool_mutex_enter(buf_pool);
1798 + //buf_pool_mutex_enter(buf_pool);
1799 + mutex_enter(&buf_pool->LRU_list_mutex);
1800 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1801 + /* for keep the new latch order, it cannot validate correctly... */
1803 chunk = buf_pool->chunks;
1805 @@ -4361,7 +4573,7 @@
1806 /* Check clean compressed-only blocks. */
1808 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1809 - b = UT_LIST_GET_NEXT(list, b)) {
1810 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1811 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1812 switch (buf_page_get_io_fix(b)) {
1814 @@ -4392,7 +4604,7 @@
1816 buf_flush_list_mutex_enter(buf_pool);
1817 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1818 - b = UT_LIST_GET_NEXT(list, b)) {
1819 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1820 ut_ad(b->in_flush_list);
1821 ut_a(b->oldest_modification);
1823 @@ -4451,6 +4663,8 @@
1826 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1827 + /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1829 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1830 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1831 (ulong) UT_LIST_GET_LEN(buf_pool->free),
1832 @@ -4461,8 +4675,11 @@
1833 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1834 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1835 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1838 - buf_pool_mutex_exit(buf_pool);
1839 + //buf_pool_mutex_exit(buf_pool);
1840 + mutex_exit(&buf_pool->LRU_list_mutex);
1841 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1843 ut_a(buf_LRU_validate());
1844 ut_a(buf_flush_validate(buf_pool));
1845 @@ -4518,7 +4735,9 @@
1846 index_ids = mem_alloc(size * sizeof *index_ids);
1847 counts = mem_alloc(sizeof(ulint) * size);
1849 - buf_pool_mutex_enter(buf_pool);
1850 + //buf_pool_mutex_enter(buf_pool);
1851 + mutex_enter(&buf_pool->LRU_list_mutex);
1852 + mutex_enter(&buf_pool->free_list_mutex);
1853 buf_flush_list_mutex_enter(buf_pool);
1856 @@ -4587,7 +4806,9 @@
1860 - buf_pool_mutex_exit(buf_pool);
1861 + //buf_pool_mutex_exit(buf_pool);
1862 + mutex_exit(&buf_pool->LRU_list_mutex);
1863 + mutex_exit(&buf_pool->free_list_mutex);
1865 for (i = 0; i < n_found; i++) {
1866 index = dict_index_get_if_in_cache(index_ids[i]);
1867 @@ -4644,7 +4865,7 @@
1869 ulint fixed_pages_number = 0;
1871 - buf_pool_mutex_enter(buf_pool);
1872 + //buf_pool_mutex_enter(buf_pool);
1874 chunk = buf_pool->chunks;
1876 @@ -4678,7 +4899,7 @@
1877 /* Traverse the lists of clean and dirty compressed-only blocks. */
1879 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1880 - b = UT_LIST_GET_NEXT(list, b)) {
1881 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1882 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1883 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1885 @@ -4690,7 +4911,7 @@
1887 buf_flush_list_mutex_enter(buf_pool);
1888 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1889 - b = UT_LIST_GET_NEXT(list, b)) {
1890 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1891 ut_ad(b->in_flush_list);
1893 switch (buf_page_get_state(b)) {
1894 @@ -4716,7 +4937,7 @@
1896 buf_flush_list_mutex_exit(buf_pool);
1897 mutex_exit(&buf_pool->zip_mutex);
1898 - buf_pool_mutex_exit(buf_pool);
1899 + //buf_pool_mutex_exit(buf_pool);
1901 return(fixed_pages_number);
1903 @@ -4810,6 +5031,8 @@
1907 + mutex_enter(&buf_pool->LRU_list_mutex);
1908 + mutex_enter(&buf_pool->free_list_mutex);
1909 buf_pool_mutex_enter(buf_pool);
1910 buf_flush_list_mutex_enter(buf_pool);
1912 @@ -4913,6 +5136,8 @@
1913 buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
1915 buf_refresh_io_stats(buf_pool);
1916 + mutex_exit(&buf_pool->LRU_list_mutex);
1917 + mutex_exit(&buf_pool->free_list_mutex);
1918 buf_pool_mutex_exit(buf_pool);
1921 @@ -5032,11 +5257,13 @@
1925 - buf_pool_mutex_enter(buf_pool);
1926 + //buf_pool_mutex_enter(buf_pool);
1927 + mutex_enter(&buf_pool->free_list_mutex);
1929 len = UT_LIST_GET_LEN(buf_pool->free);
1931 - buf_pool_mutex_exit(buf_pool);
1932 + //buf_pool_mutex_exit(buf_pool);
1933 + mutex_exit(&buf_pool->free_list_mutex);
1937 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1938 --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1939 +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
1942 ut_d(block->page.in_flush_list = TRUE);
1943 block->page.oldest_modification = lsn;
1944 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1945 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1947 #ifdef UNIV_DEBUG_VALGRIND
1949 @@ -373,14 +373,14 @@
1950 > block->page.oldest_modification) {
1951 ut_ad(b->in_flush_list);
1953 - b = UT_LIST_GET_NEXT(list, b);
1954 + b = UT_LIST_GET_NEXT(flush_list, b);
1958 if (prev_b == NULL) {
1959 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1960 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1962 - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1963 + UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1964 prev_b, &block->page);
1968 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1969 //ut_ad(buf_pool_mutex_own(buf_pool));
1971 - //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1972 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1973 //ut_ad(bpage->in_LRU_list);
1975 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1976 @@ -442,14 +442,14 @@
1977 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1980 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1981 - ut_ad(buf_pool_mutex_own(buf_pool));
1982 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1983 + //ut_ad(buf_pool_mutex_own(buf_pool));
1985 - ut_a(buf_page_in_file(bpage));
1986 + //ut_a(buf_page_in_file(bpage));
1987 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1988 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1990 - if (bpage->oldest_modification != 0
1991 + if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1992 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1993 ut_ad(bpage->in_flush_list);
1997 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1999 - ut_ad(buf_pool_mutex_own(buf_pool));
2000 + //ut_ad(buf_pool_mutex_own(buf_pool));
2001 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
2002 ut_ad(bpage->in_flush_list);
2004 @@ -498,11 +498,11 @@
2006 case BUF_BLOCK_ZIP_DIRTY:
2007 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
2008 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2009 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2010 buf_LRU_insert_zip_clean(bpage);
2012 case BUF_BLOCK_FILE_PAGE:
2013 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2014 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2019 buf_page_t* prev_b = NULL;
2020 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2022 - ut_ad(buf_pool_mutex_own(buf_pool));
2023 + //ut_ad(buf_pool_mutex_own(buf_pool));
2024 /* Must reside in the same buffer pool. */
2025 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2027 @@ -575,18 +575,18 @@
2028 because we assert on in_flush_list in comparison function. */
2029 ut_d(bpage->in_flush_list = FALSE);
2031 - prev = UT_LIST_GET_PREV(list, bpage);
2032 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2033 + prev = UT_LIST_GET_PREV(flush_list, bpage);
2034 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2037 ut_ad(prev->in_flush_list);
2038 UT_LIST_INSERT_AFTER(
2041 buf_pool->flush_list,
2047 buf_pool->flush_list,
2050 @@ -1055,7 +1055,7 @@
2053 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2054 - ut_ad(!buf_pool_mutex_own(buf_pool));
2055 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2058 #ifdef UNIV_LOG_DEBUG
2059 @@ -1069,7 +1069,8 @@
2060 io_fixed and oldest_modification != 0. Thus, it cannot be
2061 relocated in the buffer pool or removed from flush_list or
2063 - ut_ad(!buf_pool_mutex_own(buf_pool));
2064 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2065 + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2066 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2067 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2068 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2069 @@ -1232,12 +1233,18 @@
2070 ibool is_uncompressed;
2072 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2073 - ut_ad(buf_pool_mutex_own(buf_pool));
2074 + //ut_ad(buf_pool_mutex_own(buf_pool));
2075 +#ifdef UNIV_SYNC_DEBUG
2076 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2078 ut_ad(buf_page_in_file(bpage));
2080 block_mutex = buf_page_get_mutex(bpage);
2081 ut_ad(mutex_own(block_mutex));
2083 + buf_pool_mutex_enter(buf_pool);
2084 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2086 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2088 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2089 @@ -1399,14 +1406,16 @@
2091 buf_pool = buf_pool_get(space, i);
2093 - buf_pool_mutex_enter(buf_pool);
2094 + //buf_pool_mutex_enter(buf_pool);
2095 + rw_lock_s_lock(&buf_pool->page_hash_latch);
2097 /* We only want to flush pages from this buffer pool. */
2098 bpage = buf_page_hash_get(buf_pool, space, i);
2102 - buf_pool_mutex_exit(buf_pool);
2103 + //buf_pool_mutex_exit(buf_pool);
2104 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2108 @@ -1418,11 +1427,9 @@
2109 if (flush_type != BUF_FLUSH_LRU
2111 || buf_page_is_old(bpage)) {
2112 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2114 - mutex_enter(block_mutex);
2115 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2117 - if (buf_flush_ready_for_flush(bpage, flush_type)
2118 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2119 && (i == offset || !bpage->buf_fix_count)) {
2120 /* We only try to flush those
2121 neighbors != offset where the buf fix
2122 @@ -1438,11 +1445,12 @@
2123 ut_ad(!buf_pool_mutex_own(buf_pool));
2127 + } else if (block_mutex) {
2128 mutex_exit(block_mutex);
2131 - buf_pool_mutex_exit(buf_pool);
2132 + //buf_pool_mutex_exit(buf_pool);
2133 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2137 @@ -1475,21 +1483,25 @@
2138 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2139 #endif /* UNIV_DEBUG */
2141 - ut_ad(buf_pool_mutex_own(buf_pool));
2142 + //ut_ad(buf_pool_mutex_own(buf_pool));
2143 + ut_ad(flush_type != BUF_FLUSH_LRU
2144 + || mutex_own(&buf_pool->LRU_list_mutex));
2146 - block_mutex = buf_page_get_mutex(bpage);
2147 - mutex_enter(block_mutex);
2148 + block_mutex = buf_page_get_mutex_enter(bpage);
2150 - ut_a(buf_page_in_file(bpage));
2151 + //ut_a(buf_page_in_file(bpage));
2153 - if (buf_flush_ready_for_flush(bpage, flush_type)) {
2154 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2157 buf_pool_t* buf_pool;
2159 buf_pool = buf_pool_from_bpage(bpage);
2161 - buf_pool_mutex_exit(buf_pool);
2162 + //buf_pool_mutex_exit(buf_pool);
2163 + if (flush_type == BUF_FLUSH_LRU) {
2164 + mutex_exit(&buf_pool->LRU_list_mutex);
2167 /* These fields are protected by both the
2168 buffer pool mutex and block mutex. */
2169 @@ -1505,13 +1517,18 @@
2173 - buf_pool_mutex_enter(buf_pool);
2174 + //buf_pool_mutex_enter(buf_pool);
2175 + if (flush_type == BUF_FLUSH_LRU) {
2176 + mutex_enter(&buf_pool->LRU_list_mutex);
2180 + } else if (block_mutex) {
2181 mutex_exit(block_mutex);
2184 - ut_ad(buf_pool_mutex_own(buf_pool));
2185 + //ut_ad(buf_pool_mutex_own(buf_pool));
2186 + ut_ad(flush_type != BUF_FLUSH_LRU
2187 + || mutex_own(&buf_pool->LRU_list_mutex));
2191 @@ -1532,7 +1549,8 @@
2195 - ut_ad(buf_pool_mutex_own(buf_pool));
2196 + //ut_ad(buf_pool_mutex_own(buf_pool));
2197 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2200 /* Start from the end of the list looking for a
2201 @@ -1554,7 +1572,8 @@
2202 should be flushed, we factor in this value. */
2203 buf_lru_flush_page_count += count;
2205 - ut_ad(buf_pool_mutex_own(buf_pool));
2206 + //ut_ad(buf_pool_mutex_own(buf_pool));
2207 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2211 @@ -1582,9 +1601,10 @@
2215 + buf_page_t* prev_bpage = NULL;
2218 - ut_ad(buf_pool_mutex_own(buf_pool));
2219 + //ut_ad(buf_pool_mutex_own(buf_pool));
2221 /* If we have flushed enough, leave the loop */
2223 @@ -1603,6 +1623,7 @@
2226 ut_a(bpage->oldest_modification > 0);
2227 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2230 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2231 @@ -1644,9 +1665,17 @@
2235 - bpage = UT_LIST_GET_PREV(list, bpage);
2236 + bpage = UT_LIST_GET_PREV(flush_list, bpage);
2238 - ut_ad(!bpage || bpage->in_flush_list);
2239 + //ut_ad(!bpage || bpage->in_flush_list);
2240 + if (bpage != prev_bpage) {
2241 + /* the search might warp.. retrying */
2242 + buf_flush_list_mutex_exit(buf_pool);
2246 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2249 buf_flush_list_mutex_exit(buf_pool);
2251 @@ -1655,7 +1684,7 @@
2253 } while (count < min_n && bpage != NULL && len > 0);
2255 - ut_ad(buf_pool_mutex_own(buf_pool));
2256 + //ut_ad(buf_pool_mutex_own(buf_pool));
2260 @@ -1694,13 +1723,15 @@
2261 || sync_thread_levels_empty_gen(TRUE));
2262 #endif /* UNIV_SYNC_DEBUG */
2264 - buf_pool_mutex_enter(buf_pool);
2265 + //buf_pool_mutex_enter(buf_pool);
2267 /* Note: The buffer pool mutex is released and reacquired within
2268 the flush functions. */
2269 switch(flush_type) {
2271 + mutex_enter(&buf_pool->LRU_list_mutex);
2272 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2273 + mutex_exit(&buf_pool->LRU_list_mutex);
2275 case BUF_FLUSH_LIST:
2276 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2277 @@ -1709,7 +1740,7 @@
2281 - buf_pool_mutex_exit(buf_pool);
2282 + //buf_pool_mutex_exit(buf_pool);
2284 buf_flush_buffered_writes();
2286 @@ -1965,7 +1996,7 @@
2288 //buf_pool_mutex_enter(buf_pool);
2290 - buf_pool_mutex_enter(buf_pool);
2291 + mutex_enter(&buf_pool->LRU_list_mutex);
2293 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2295 @@ -1982,15 +2013,15 @@
2296 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2299 - block_mutex = buf_page_get_mutex(bpage);
2301 - mutex_enter(block_mutex);
2302 + block_mutex = buf_page_get_mutex_enter(bpage);
2304 - if (buf_flush_ready_for_replace(bpage)) {
2305 + if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2309 - mutex_exit(block_mutex);
2310 + if (block_mutex) {
2311 + mutex_exit(block_mutex);
2316 @@ -1999,7 +2030,7 @@
2318 //buf_pool_mutex_exit(buf_pool);
2320 - buf_pool_mutex_exit(buf_pool);
2321 + mutex_exit(&buf_pool->LRU_list_mutex);
2323 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2325 @@ -2198,7 +2229,7 @@
2327 ut_ad(buf_flush_list_mutex_own(buf_pool));
2329 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2330 + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2331 ut_ad(ut_list_node_313->in_flush_list));
2333 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2334 @@ -2238,7 +2269,7 @@
2335 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2338 - bpage = UT_LIST_GET_NEXT(list, bpage);
2339 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2341 ut_a(!bpage || om >= bpage->oldest_modification);
2343 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2344 --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2345 +++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2348 buf_LRU_block_free_hashed_page(
2349 /*===========================*/
2350 - buf_block_t* block); /*!< in: block, must contain a file page and
2351 + buf_block_t* block, /*!< in: block, must contain a file page and
2352 be in a state where it can be freed */
2353 + ibool have_page_hash_mutex);
2355 /******************************************************************//**
2356 Determines if the unzip_LRU list should be used for evicting a victim
2357 @@ -154,15 +155,20 @@
2359 buf_LRU_evict_from_unzip_LRU(
2360 /*=========================*/
2361 - buf_pool_t* buf_pool)
2362 + buf_pool_t* buf_pool,
2363 + ibool have_LRU_mutex)
2368 - ut_ad(buf_pool_mutex_own(buf_pool));
2369 + //ut_ad(buf_pool_mutex_own(buf_pool));
2371 + if (!have_LRU_mutex)
2372 + mutex_enter(&buf_pool->LRU_list_mutex);
2373 /* If the unzip_LRU list is empty, we can only use the LRU. */
2374 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2375 + if (!have_LRU_mutex)
2376 + mutex_exit(&buf_pool->LRU_list_mutex);
2380 @@ -171,14 +177,20 @@
2381 decompressed pages in the buffer pool. */
2382 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2383 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2384 + if (!have_LRU_mutex)
2385 + mutex_exit(&buf_pool->LRU_list_mutex);
2389 /* If eviction hasn't started yet, we assume by default
2390 that a workload is disk bound. */
2391 if (buf_pool->freed_page_clock == 0) {
2392 + if (!have_LRU_mutex)
2393 + mutex_exit(&buf_pool->LRU_list_mutex);
2396 + if (!have_LRU_mutex)
2397 + mutex_exit(&buf_pool->LRU_list_mutex);
2399 /* Calculate the average over past intervals, and add the values
2400 of the current interval. */
2401 @@ -246,19 +258,23 @@
2402 page_arr = ut_malloc(
2403 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2405 - buf_pool_mutex_enter(buf_pool);
2406 + //buf_pool_mutex_enter(buf_pool);
2407 + mutex_enter(&buf_pool->LRU_list_mutex);
2411 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2413 while (bpage != NULL) {
2414 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2415 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2416 buf_page_t* prev_bpage;
2418 - mutex_enter(block_mutex);
2419 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2421 + if (!block_mutex) {
2425 ut_a(buf_page_in_file(bpage));
2427 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2428 @@ -287,14 +303,16 @@
2430 /* Array full. We release the buf_pool->mutex to
2431 obey the latching order. */
2432 - buf_pool_mutex_exit(buf_pool);
2433 + //buf_pool_mutex_exit(buf_pool);
2434 + mutex_exit(&buf_pool->LRU_list_mutex);
2436 buf_LRU_drop_page_hash_batch(
2437 id, zip_size, page_arr, num_entries);
2441 - buf_pool_mutex_enter(buf_pool);
2442 + //buf_pool_mutex_enter(buf_pool);
2443 + mutex_enter(&buf_pool->LRU_list_mutex);
2445 mutex_exit(block_mutex);
2451 - buf_pool_mutex_exit(buf_pool);
2452 + //buf_pool_mutex_exit(buf_pool);
2453 + mutex_exit(&buf_pool->LRU_list_mutex);
2455 /* Drop any remaining batch of search hashed pages. */
2456 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2461 - buf_pool_mutex_enter(buf_pool);
2462 + //buf_pool_mutex_enter(buf_pool);
2463 + mutex_enter(&buf_pool->LRU_list_mutex);
2464 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2468 @@ -369,8 +390,16 @@
2472 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2473 - mutex_enter(block_mutex);
2474 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2476 + if (!block_mutex) {
2477 + /* It may be impossible case...
2478 + Something wrong, so will be scan_again */
2480 + all_freed = FALSE;
2482 + goto next_page_no_mutex;
2485 if (bpage->buf_fix_count > 0) {
2491 - buf_pool_mutex_exit(buf_pool);
2492 + //buf_pool_mutex_exit(buf_pool);
2493 + mutex_exit(&buf_pool->LRU_list_mutex);
2494 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2496 zip_size = buf_page_get_zip_size(bpage);
2497 page_no = buf_page_get_page_no(bpage);
2499 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2500 != BUF_BLOCK_ZIP_FREE) {
2501 buf_LRU_block_free_hashed_page((buf_block_t*)
2505 /* The block_mutex should have been
2506 released by buf_LRU_block_remove_hashed_page()
2511 - buf_pool_mutex_exit(buf_pool);
2512 + //buf_pool_mutex_exit(buf_pool);
2513 + mutex_exit(&buf_pool->LRU_list_mutex);
2514 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2517 os_thread_sleep(20000);
2520 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2522 - ut_ad(buf_pool_mutex_own(buf_pool));
2523 + //ut_ad(buf_pool_mutex_own(buf_pool));
2524 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2525 + ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2526 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2528 /* Find the first successor of bpage in the LRU list
2529 @@ -540,17 +575,17 @@
2532 b = UT_LIST_GET_NEXT(LRU, b);
2533 - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2534 + } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2536 /* Insert bpage before b, i.e., after the predecessor of b. */
2538 - b = UT_LIST_GET_PREV(list, b);
2539 + b = UT_LIST_GET_PREV(zip_list, b);
2543 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2544 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2546 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2547 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2551 @@ -563,18 +598,19 @@
2552 buf_LRU_free_from_unzip_LRU_list(
2553 /*=============================*/
2554 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2555 - ulint n_iterations) /*!< in: how many times this has
2556 + ulint n_iterations, /*!< in: how many times this has
2557 been called repeatedly without
2558 result: a high value means that
2559 we should search farther; we will
2560 search n_iterations / 5 of the
2561 unzip_LRU list, or nothing if
2562 n_iterations >= 5 */
2563 + ibool have_LRU_mutex)
2568 - ut_ad(buf_pool_mutex_own(buf_pool));
2569 + //ut_ad(buf_pool_mutex_own(buf_pool));
2571 /* Theoratically it should be much easier to find a victim
2572 from unzip_LRU as we can choose even a dirty block (as we'll
2574 if we have done five iterations so far. */
2576 if (UNIV_UNLIKELY(n_iterations >= 5)
2577 - || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2578 + || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2582 @@ -592,18 +628,25 @@
2583 distance = 100 + (n_iterations
2584 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2587 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2588 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2589 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2591 enum buf_lru_free_block_status freed;
2593 + mutex_enter(&block->mutex);
2594 + if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2595 + || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2596 + mutex_exit(&block->mutex);
2600 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2601 ut_ad(block->in_unzip_LRU_list);
2602 ut_ad(block->page.in_LRU_list);
2604 - mutex_enter(&block->mutex);
2605 - freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2606 + freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2607 mutex_exit(&block->mutex);
2610 @@ -637,21 +680,23 @@
2611 buf_LRU_free_from_common_LRU_list(
2612 /*==============================*/
2613 buf_pool_t* buf_pool,
2614 - ulint n_iterations)
2615 + ulint n_iterations,
2616 /*!< in: how many times this has been called
2617 repeatedly without result: a high value means
2618 that we should search farther; if
2619 n_iterations < 10, then we search
2620 n_iterations / 10 * buf_pool->curr_size
2621 pages from the end of the LRU list */
2622 + ibool have_LRU_mutex)
2627 - ut_ad(buf_pool_mutex_own(buf_pool));
2628 + //ut_ad(buf_pool_mutex_own(buf_pool));
2630 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2633 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2634 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2635 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2636 @@ -659,14 +704,23 @@
2637 enum buf_lru_free_block_status freed;
2639 mutex_t* block_mutex
2640 - = buf_page_get_mutex(bpage);
2641 + = buf_page_get_mutex_enter(bpage);
2643 + if (!block_mutex) {
2647 + if (!bpage->in_LRU_list
2648 + || !buf_page_in_file(bpage)) {
2649 + mutex_exit(block_mutex);
2653 ut_ad(buf_page_in_file(bpage));
2654 ut_ad(bpage->in_LRU_list);
2656 - mutex_enter(block_mutex);
2657 accessed = buf_page_is_accessed(bpage);
2658 - freed = buf_LRU_free_block(bpage, TRUE, NULL);
2659 + freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2660 mutex_exit(block_mutex);
2663 @@ -718,16 +772,23 @@
2664 n_iterations / 5 of the unzip_LRU list. */
2666 ibool freed = FALSE;
2667 + ibool have_LRU_mutex = FALSE;
2669 - buf_pool_mutex_enter(buf_pool);
2670 + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2671 + have_LRU_mutex = TRUE;
2673 + //buf_pool_mutex_enter(buf_pool);
2674 + if (have_LRU_mutex)
2675 + mutex_enter(&buf_pool->LRU_list_mutex);
2677 - freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2678 + freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2681 freed = buf_LRU_free_from_common_LRU_list(
2682 - buf_pool, n_iterations);
2683 + buf_pool, n_iterations, have_LRU_mutex);
2686 + buf_pool_mutex_enter(buf_pool);
2688 buf_pool->LRU_flush_ended = 0;
2689 } else if (buf_pool->LRU_flush_ended > 0) {
2693 buf_pool_mutex_exit(buf_pool);
2694 + if (have_LRU_mutex)
2695 + mutex_exit(&buf_pool->LRU_list_mutex);
2701 buf_pool = buf_pool_from_array(i);
2703 - buf_pool_mutex_enter(buf_pool);
2704 + //buf_pool_mutex_enter(buf_pool);
2705 + mutex_enter(&buf_pool->LRU_list_mutex);
2706 + mutex_enter(&buf_pool->free_list_mutex);
2708 if (!recv_recovery_on
2709 && UT_LIST_GET_LEN(buf_pool->free)
2714 - buf_pool_mutex_exit(buf_pool);
2715 + //buf_pool_mutex_exit(buf_pool);
2716 + mutex_exit(&buf_pool->LRU_list_mutex);
2717 + mutex_exit(&buf_pool->free_list_mutex);
2721 @@ -823,9 +890,10 @@
2725 - ut_ad(buf_pool_mutex_own(buf_pool));
2726 + //ut_ad(buf_pool_mutex_own(buf_pool));
2728 - block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2729 + mutex_enter(&buf_pool->free_list_mutex);
2730 + block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2735 ut_ad(!block->page.in_flush_list);
2736 ut_ad(!block->page.in_LRU_list);
2737 ut_a(!buf_page_in_file(&block->page));
2738 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2739 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2741 + mutex_exit(&buf_pool->free_list_mutex);
2743 mutex_enter(&block->mutex);
2746 ut_ad(buf_pool_from_block(block) == buf_pool);
2748 mutex_exit(&block->mutex);
2750 + mutex_exit(&buf_pool->free_list_mutex);
2755 ibool mon_value_was = FALSE;
2756 ibool started_monitor = FALSE;
2758 - buf_pool_mutex_enter(buf_pool);
2759 + //buf_pool_mutex_enter(buf_pool);
2761 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2762 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2763 @@ -951,8 +1023,10 @@
2765 page_zip_set_size(&block->page.zip, zip_size);
2767 + mutex_enter(&buf_pool->LRU_list_mutex);
2768 block->page.zip.data = buf_buddy_alloc(
2769 - buf_pool, zip_size, &lru);
2770 + buf_pool, zip_size, &lru, FALSE);
2771 + mutex_exit(&buf_pool->LRU_list_mutex);
2773 UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2775 @@ -960,7 +1034,7 @@
2776 block->page.zip.data = NULL;
2779 - buf_pool_mutex_exit(buf_pool);
2780 + //buf_pool_mutex_exit(buf_pool);
2782 if (started_monitor) {
2783 srv_print_innodb_monitor = mon_value_was;
2784 @@ -972,7 +1046,7 @@
2785 /* If no block was in the free list, search from the end of the LRU
2786 list and try to free a block there */
2788 - buf_pool_mutex_exit(buf_pool);
2789 + //buf_pool_mutex_exit(buf_pool);
2791 freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2793 @@ -1058,7 +1132,8 @@
2796 ut_a(buf_pool->LRU_old);
2797 - ut_ad(buf_pool_mutex_own(buf_pool));
2798 + //ut_ad(buf_pool_mutex_own(buf_pool));
2799 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2800 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2801 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2802 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2803 @@ -1124,7 +1199,8 @@
2807 - ut_ad(buf_pool_mutex_own(buf_pool));
2808 + //ut_ad(buf_pool_mutex_own(buf_pool));
2809 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2810 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2812 /* We first initialize all blocks in the LRU list as old and then use
2813 @@ -1159,13 +1235,14 @@
2816 ut_ad(buf_page_in_file(bpage));
2817 - ut_ad(buf_pool_mutex_own(buf_pool));
2818 + //ut_ad(buf_pool_mutex_own(buf_pool));
2819 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2821 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2822 buf_block_t* block = (buf_block_t*) bpage;
2824 ut_ad(block->in_unzip_LRU_list);
2825 - ut_d(block->in_unzip_LRU_list = FALSE);
2826 + block->in_unzip_LRU_list = FALSE;
2828 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2830 @@ -1183,7 +1260,8 @@
2834 - ut_ad(buf_pool_mutex_own(buf_pool));
2835 + //ut_ad(buf_pool_mutex_own(buf_pool));
2836 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2838 ut_a(buf_page_in_file(bpage));
2840 @@ -1260,12 +1338,13 @@
2844 - ut_ad(buf_pool_mutex_own(buf_pool));
2845 + //ut_ad(buf_pool_mutex_own(buf_pool));
2846 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2848 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2850 ut_ad(!block->in_unzip_LRU_list);
2851 - ut_d(block->in_unzip_LRU_list = TRUE);
2852 + block->in_unzip_LRU_list = TRUE;
2855 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2856 @@ -1286,7 +1365,8 @@
2860 - ut_ad(buf_pool_mutex_own(buf_pool));
2861 + //ut_ad(buf_pool_mutex_own(buf_pool));
2862 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2864 ut_a(buf_page_in_file(bpage));
2866 @@ -1337,7 +1417,8 @@
2870 - ut_ad(buf_pool_mutex_own(buf_pool));
2871 + //ut_ad(buf_pool_mutex_own(buf_pool));
2872 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2874 ut_a(buf_page_in_file(bpage));
2875 ut_ad(!bpage->in_LRU_list);
2876 @@ -1416,7 +1497,8 @@
2878 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2880 - ut_ad(buf_pool_mutex_own(buf_pool));
2881 + //ut_ad(buf_pool_mutex_own(buf_pool));
2882 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2885 buf_pool->stat.n_pages_made_young++;
2886 @@ -1458,19 +1540,20 @@
2887 buf_page_t* bpage, /*!< in: block to be freed */
2888 ibool zip, /*!< in: TRUE if should remove also the
2889 compressed page of an uncompressed page */
2890 - ibool* buf_pool_mutex_released)
2891 + ibool* buf_pool_mutex_released,
2892 /*!< in: pointer to a variable that will
2893 be assigned TRUE if buf_pool_mutex
2894 was temporarily released, or NULL */
2895 + ibool have_LRU_mutex)
2897 buf_page_t* b = NULL;
2898 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2899 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2901 - ut_ad(buf_pool_mutex_own(buf_pool));
2902 + //ut_ad(buf_pool_mutex_own(buf_pool));
2903 ut_ad(mutex_own(block_mutex));
2904 ut_ad(buf_page_in_file(bpage));
2905 - ut_ad(bpage->in_LRU_list);
2906 + //ut_ad(bpage->in_LRU_list);
2907 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2908 #if UNIV_WORD_SIZE == 4
2909 /* On 32-bit systems, there is no padding in buf_page_t. On
2910 @@ -1479,7 +1562,7 @@
2911 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2914 - if (!buf_page_can_relocate(bpage)) {
2915 + if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2917 /* Do not free buffer-fixed or I/O-fixed blocks. */
2918 return(BUF_LRU_NOT_FREED);
2919 @@ -1511,15 +1594,15 @@
2920 If it cannot be allocated (without freeing a block
2921 from the LRU list), refuse to free bpage. */
2923 - buf_pool_mutex_exit_forbid(buf_pool);
2924 - b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2925 - buf_pool_mutex_exit_allow(buf_pool);
2926 + //buf_pool_mutex_exit_forbid(buf_pool);
2927 + b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2928 + //buf_pool_mutex_exit_allow(buf_pool);
2930 if (UNIV_UNLIKELY(!b)) {
2931 return(BUF_LRU_CANNOT_RELOCATE);
2934 - memcpy(b, bpage, sizeof *b);
2935 + //memcpy(b, bpage, sizeof *b);
2939 @@ -1530,6 +1613,39 @@
2941 #endif /* UNIV_DEBUG */
2943 + /* not to break latch order, must re-enter block_mutex */
2944 + mutex_exit(block_mutex);
2946 + if (!have_LRU_mutex)
2947 + mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2948 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2949 + mutex_enter(block_mutex);
2951 + /* recheck states of block */
2952 + if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2953 + || !buf_page_can_relocate(bpage)) {
2956 + buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2958 + if (!have_LRU_mutex)
2959 + mutex_exit(&buf_pool->LRU_list_mutex);
2960 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2961 + return(BUF_LRU_NOT_FREED);
2962 + } else if (zip || !bpage->zip.data) {
2963 + if (bpage->oldest_modification)
2965 + } else if (bpage->oldest_modification) {
2966 + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2967 + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2973 + memcpy(b, bpage, sizeof *b);
2976 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2977 != BUF_BLOCK_ZIP_FREE) {
2978 ut_a(bpage->buf_fix_count == 0);
2979 @@ -1546,6 +1662,10 @@
2983 + while (prev_b && !prev_b->in_LRU_list) {
2984 + prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2987 b->state = b->oldest_modification
2988 ? BUF_BLOCK_ZIP_DIRTY
2989 : BUF_BLOCK_ZIP_PAGE;
2990 @@ -1642,7 +1762,9 @@
2991 *buf_pool_mutex_released = TRUE;
2994 - buf_pool_mutex_exit(buf_pool);
2995 + //buf_pool_mutex_exit(buf_pool);
2996 + mutex_exit(&buf_pool->LRU_list_mutex);
2997 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2998 mutex_exit(block_mutex);
3000 /* Remove possible adaptive hash index on the page.
3001 @@ -1674,7 +1796,9 @@
3002 : BUF_NO_CHECKSUM_MAGIC);
3005 - buf_pool_mutex_enter(buf_pool);
3006 + //buf_pool_mutex_enter(buf_pool);
3007 + if (have_LRU_mutex)
3008 + mutex_enter(&buf_pool->LRU_list_mutex);
3009 mutex_enter(block_mutex);
3012 @@ -1684,13 +1808,17 @@
3013 mutex_exit(&buf_pool->zip_mutex);
3016 - buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
3017 + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3019 /* The block_mutex should have been released by
3020 buf_LRU_block_remove_hashed_page() when it returns
3021 BUF_BLOCK_ZIP_FREE. */
3022 ut_ad(block_mutex == &buf_pool->zip_mutex);
3023 mutex_enter(block_mutex);
3025 + if (!have_LRU_mutex)
3026 + mutex_exit(&buf_pool->LRU_list_mutex);
3027 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3030 return(BUF_LRU_FREED);
3031 @@ -1702,13 +1830,14 @@
3033 buf_LRU_block_free_non_file_page(
3034 /*=============================*/
3035 - buf_block_t* block) /*!< in: block, must not contain a file page */
3036 + buf_block_t* block, /*!< in: block, must not contain a file page */
3037 + ibool have_page_hash_mutex)
3040 buf_pool_t* buf_pool = buf_pool_from_block(block);
3043 - ut_ad(buf_pool_mutex_own(buf_pool));
3044 + //ut_ad(buf_pool_mutex_own(buf_pool));
3045 ut_ad(mutex_own(&block->mutex));
3047 switch (buf_block_get_state(block)) {
3048 @@ -1742,18 +1871,21 @@
3050 block->page.zip.data = NULL;
3051 mutex_exit(&block->mutex);
3052 - buf_pool_mutex_exit_forbid(buf_pool);
3053 + //buf_pool_mutex_exit_forbid(buf_pool);
3056 - buf_pool, data, page_zip_get_size(&block->page.zip));
3057 + buf_pool, data, page_zip_get_size(&block->page.zip),
3058 + have_page_hash_mutex);
3060 - buf_pool_mutex_exit_allow(buf_pool);
3061 + //buf_pool_mutex_exit_allow(buf_pool);
3062 mutex_enter(&block->mutex);
3063 page_zip_set_size(&block->page.zip, 0);
3066 - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3067 + mutex_enter(&buf_pool->free_list_mutex);
3068 + UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3069 ut_d(block->page.in_free_list = TRUE);
3070 + mutex_exit(&buf_pool->free_list_mutex);
3072 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3074 @@ -1783,7 +1915,11 @@
3075 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3078 - ut_ad(buf_pool_mutex_own(buf_pool));
3079 + //ut_ad(buf_pool_mutex_own(buf_pool));
3080 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3081 +#ifdef UNIV_SYNC_DEBUG
3082 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3084 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3086 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3087 @@ -1891,7 +2027,9 @@
3089 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3090 mutex_exit(buf_page_get_mutex(bpage));
3091 - buf_pool_mutex_exit(buf_pool);
3092 + //buf_pool_mutex_exit(buf_pool);
3093 + mutex_exit(&buf_pool->LRU_list_mutex);
3094 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3098 @@ -1912,17 +2050,17 @@
3099 ut_a(bpage->zip.data);
3100 ut_a(buf_page_get_zip_size(bpage));
3102 - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3103 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3105 mutex_exit(&buf_pool->zip_mutex);
3106 - buf_pool_mutex_exit_forbid(buf_pool);
3107 + //buf_pool_mutex_exit_forbid(buf_pool);
3110 buf_pool, bpage->zip.data,
3111 - page_zip_get_size(&bpage->zip));
3112 + page_zip_get_size(&bpage->zip), TRUE);
3114 - buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3115 - buf_pool_mutex_exit_allow(buf_pool);
3116 + buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3117 + //buf_pool_mutex_exit_allow(buf_pool);
3119 UNIV_MEM_UNDESC(bpage);
3120 return(BUF_BLOCK_ZIP_FREE);
3121 @@ -1945,13 +2083,13 @@
3122 ut_ad(!bpage->in_flush_list);
3123 ut_ad(!bpage->in_LRU_list);
3124 mutex_exit(&((buf_block_t*) bpage)->mutex);
3125 - buf_pool_mutex_exit_forbid(buf_pool);
3126 + //buf_pool_mutex_exit_forbid(buf_pool);
3130 - page_zip_get_size(&bpage->zip));
3131 + page_zip_get_size(&bpage->zip), TRUE);
3133 - buf_pool_mutex_exit_allow(buf_pool);
3134 + //buf_pool_mutex_exit_allow(buf_pool);
3135 mutex_enter(&((buf_block_t*) bpage)->mutex);
3136 page_zip_set_size(&bpage->zip, 0);
3138 @@ -1977,18 +2115,19 @@
3140 buf_LRU_block_free_hashed_page(
3141 /*===========================*/
3142 - buf_block_t* block) /*!< in: block, must contain a file page and
3143 + buf_block_t* block, /*!< in: block, must contain a file page and
3144 be in a state where it can be freed */
3145 + ibool have_page_hash_mutex)
3148 - buf_pool_t* buf_pool = buf_pool_from_block(block);
3149 - ut_ad(buf_pool_mutex_own(buf_pool));
3150 + //buf_pool_t* buf_pool = buf_pool_from_block(block);
3151 + //ut_ad(buf_pool_mutex_own(buf_pool));
3153 ut_ad(mutex_own(&block->mutex));
3155 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3157 - buf_LRU_block_free_non_file_page(block);
3158 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3161 /**********************************************************************//**
3162 @@ -2015,7 +2154,8 @@
3166 - buf_pool_mutex_enter(buf_pool);
3167 + //buf_pool_mutex_enter(buf_pool);
3168 + mutex_enter(&buf_pool->LRU_list_mutex);
3170 if (ratio != buf_pool->LRU_old_ratio) {
3171 buf_pool->LRU_old_ratio = ratio;
3172 @@ -2027,7 +2167,8 @@
3176 - buf_pool_mutex_exit(buf_pool);
3177 + //buf_pool_mutex_exit(buf_pool);
3178 + mutex_exit(&buf_pool->LRU_list_mutex);
3180 buf_pool->LRU_old_ratio = ratio;
3182 @@ -2124,7 +2265,8 @@
3186 - buf_pool_mutex_enter(buf_pool);
3187 + //buf_pool_mutex_enter(buf_pool);
3188 + mutex_enter(&buf_pool->LRU_list_mutex);
3190 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3192 @@ -2185,16 +2327,22 @@
3194 ut_a(buf_pool->LRU_old_len == old_len);
3196 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3197 + mutex_exit(&buf_pool->LRU_list_mutex);
3198 + mutex_enter(&buf_pool->free_list_mutex);
3200 + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3201 ut_ad(ut_list_node_313->in_free_list));
3203 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3205 - bpage = UT_LIST_GET_NEXT(list, bpage)) {
3206 + bpage = UT_LIST_GET_NEXT(free, bpage)) {
3208 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3211 + mutex_exit(&buf_pool->free_list_mutex);
3212 + mutex_enter(&buf_pool->LRU_list_mutex);
3214 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3215 ut_ad(ut_list_node_313->in_unzip_LRU_list
3216 && ut_list_node_313->page.in_LRU_list));
3217 @@ -2208,7 +2356,8 @@
3218 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3221 - buf_pool_mutex_exit(buf_pool);
3222 + //buf_pool_mutex_exit(buf_pool);
3223 + mutex_exit(&buf_pool->LRU_list_mutex);
3226 /**********************************************************************//**
3227 @@ -2244,7 +2393,8 @@
3228 const buf_page_t* bpage;
3231 - buf_pool_mutex_enter(buf_pool);
3232 + //buf_pool_mutex_enter(buf_pool);
3233 + mutex_enter(&buf_pool->LRU_list_mutex);
3235 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3237 @@ -2301,7 +2451,8 @@
3238 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3241 - buf_pool_mutex_exit(buf_pool);
3242 + //buf_pool_mutex_exit(buf_pool);
3243 + mutex_exit(&buf_pool->LRU_list_mutex);
3246 /**********************************************************************//**
3247 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3248 --- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3249 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3254 + buf_pool_mutex_exit(buf_pool);
3256 /* Check that almost all pages in the area have been accessed; if
3257 offset == low, the accesses must be in a descending order, otherwise,
3262 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3263 for (i = low; i < high; i++) {
3264 bpage = buf_page_hash_get(buf_pool, space, i);
3268 if (fail_count > threshold) {
3269 /* Too many failures: return */
3270 - buf_pool_mutex_exit(buf_pool);
3271 + //buf_pool_mutex_exit(buf_pool);
3272 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3277 bpage = buf_page_hash_get(buf_pool, space, offset);
3279 if (bpage == NULL) {
3280 - buf_pool_mutex_exit(buf_pool);
3281 + //buf_pool_mutex_exit(buf_pool);
3282 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3287 pred_offset = fil_page_get_prev(frame);
3288 succ_offset = fil_page_get_next(frame);
3290 - buf_pool_mutex_exit(buf_pool);
3291 + //buf_pool_mutex_exit(buf_pool);
3292 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3294 if ((offset == low) && (succ_offset == offset + 1)) {
3296 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3297 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3298 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
3299 @@ -245,6 +245,10 @@
3300 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3301 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3302 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3303 + {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3304 + {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3305 + {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3306 + {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3307 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3308 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3309 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3311 {&archive_lock_key, "archive_lock", 0},
3312 # endif /* UNIV_LOG_ARCHIVE */
3313 {&btr_search_latch_key, "btr_search_latch", 0},
3314 + {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3315 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3316 {&buf_block_lock_key, "buf_block_lock", 0},
3317 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3318 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3319 --- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3320 +++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
3321 @@ -1566,7 +1566,8 @@
3323 buf_pool = buf_pool_from_array(i);
3325 - buf_pool_mutex_enter(buf_pool);
3326 + //buf_pool_mutex_enter(buf_pool);
3327 + mutex_enter(&buf_pool->zip_free_mutex);
3329 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3330 buf_buddy_stat_t* buddy_stat;
3331 @@ -1596,7 +1597,8 @@
3335 - buf_pool_mutex_exit(buf_pool);
3336 + //buf_pool_mutex_exit(buf_pool);
3337 + mutex_exit(&buf_pool->zip_free_mutex);
3341 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3342 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3343 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3344 @@ -3705,9 +3705,11 @@
3345 ulint fold = buf_page_address_fold(space, page_no);
3346 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3348 - buf_pool_mutex_enter(buf_pool);
3349 + //buf_pool_mutex_enter(buf_pool);
3350 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3351 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3352 - buf_pool_mutex_exit(buf_pool);
3353 + //buf_pool_mutex_exit(buf_pool);
3354 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3356 if (UNIV_LIKELY_NULL(bpage)) {
3357 /* A buffer pool watch has been set or the
3358 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3359 --- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3360 +++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3362 buf_pool_t* buf_pool,
3363 /*!< buffer pool in which the block resides */
3364 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3365 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3366 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3367 TRUE if storage was allocated from the LRU list
3368 and buf_pool->mutex was temporarily released,
3369 or NULL if the LRU list should not be used */
3370 + ibool have_page_hash_mutex)
3371 __attribute__((malloc));
3373 /**********************************************************************//**
3375 /*!< buffer pool in which the block resides */
3376 void* buf, /*!< in: block to be freed, must not be
3377 pointed to by the buffer pool */
3378 - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3379 + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3380 + ibool have_page_hash_mutex)
3381 __attribute__((nonnull));
3384 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3385 --- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3386 +++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3388 /*!< in: buffer pool in which the page resides */
3389 ulint i, /*!< in: index of buf_pool->zip_free[],
3390 or BUF_BUDDY_SIZES */
3391 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3392 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3393 TRUE if storage was allocated from the LRU list
3394 and buf_pool->mutex was temporarily released,
3395 or NULL if the LRU list should not be used */
3396 + ibool have_page_hash_mutex)
3397 __attribute__((malloc));
3399 /**********************************************************************//**
3401 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3402 void* buf, /*!< in: block to be freed, must not be
3403 pointed to by the buffer pool */
3404 - ulint i) /*!< in: index of buf_pool->zip_free[],
3405 + ulint i, /*!< in: index of buf_pool->zip_free[],
3406 or BUF_BUDDY_SIZES */
3407 + ibool have_page_hash_mutex)
3408 __attribute__((nonnull));
3410 /**********************************************************************//**
3411 @@ -102,16 +104,17 @@
3413 ulint size, /*!< in: block size, up to
3415 - ibool* lru) /*!< in: pointer to a variable
3416 + ibool* lru, /*!< in: pointer to a variable
3417 that will be assigned TRUE if
3418 storage was allocated from the
3419 LRU list and buf_pool->mutex was
3420 temporarily released, or NULL if
3421 the LRU list should not be used */
3422 + ibool have_page_hash_mutex)
3424 - ut_ad(buf_pool_mutex_own(buf_pool));
3425 + //ut_ad(buf_pool_mutex_own(buf_pool));
3427 - return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3428 + return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3431 /**********************************************************************//**
3432 @@ -123,12 +126,25 @@
3433 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3434 void* buf, /*!< in: block to be freed, must not be
3435 pointed to by the buffer pool */
3436 - ulint size) /*!< in: block size, up to
3437 + ulint size, /*!< in: block size, up to
3439 + ibool have_page_hash_mutex)
3441 - ut_ad(buf_pool_mutex_own(buf_pool));
3442 + //ut_ad(buf_pool_mutex_own(buf_pool));
3444 + if (!have_page_hash_mutex) {
3445 + mutex_enter(&buf_pool->LRU_list_mutex);
3446 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3449 - buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3450 + mutex_enter(&buf_pool->zip_free_mutex);
3451 + buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3452 + mutex_exit(&buf_pool->zip_free_mutex);
3454 + if (!have_page_hash_mutex) {
3455 + mutex_exit(&buf_pool->LRU_list_mutex);
3456 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3460 #ifdef UNIV_MATERIALIZE
3461 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3462 --- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3463 +++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
3464 @@ -132,6 +132,20 @@
3465 /*==========================*/
3467 /********************************************************************//**
3471 +buf_pool_page_hash_x_lock_all(void);
3472 +/*================================*/
3474 +/********************************************************************//**
3478 +buf_pool_page_hash_x_unlock_all(void);
3479 +/*==================================*/
3481 +/********************************************************************//**
3482 Creates the buffer pool.
3483 @return own: buf_pool object, NULL if not enough memory or error */
3485 @@ -761,6 +775,15 @@
3486 const buf_page_t* bpage) /*!< in: pointer to control block */
3487 __attribute__((pure));
3489 +/*************************************************************************
3490 +Gets the mutex of a block and enter the mutex with consistency. */
3493 +buf_page_get_mutex_enter(
3494 +/*=========================*/
3495 + const buf_page_t* bpage) /*!< in: pointer to control block */
3496 + __attribute__((pure));
3498 /*********************************************************************//**
3499 Get the flush type of a page.
3500 @return flush type */
3501 @@ -1242,7 +1265,7 @@
3502 All these are protected by buf_pool->mutex. */
3505 - UT_LIST_NODE_T(buf_page_t) list;
3506 + /* UT_LIST_NODE_T(buf_page_t) list; */
3507 /*!< based on state, this is a
3508 list node, protected either by
3509 buf_pool->mutex or by
3510 @@ -1270,6 +1293,10 @@
3511 BUF_BLOCK_REMOVE_HASH or
3512 BUF_BLOCK_READY_IN_USE. */
3514 + /* resplit for optimistic use */
3515 + UT_LIST_NODE_T(buf_page_t) free;
3516 + UT_LIST_NODE_T(buf_page_t) flush_list;
3517 + UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3519 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3520 when buf_pool->flush_list_mutex is
3521 @@ -1362,11 +1389,11 @@
3522 a block is in the unzip_LRU list
3523 if page.state == BUF_BLOCK_FILE_PAGE
3524 and page.zip.data != NULL */
3526 +//#ifdef UNIV_DEBUG
3527 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3528 decompressed LRU list;
3529 used in debugging */
3530 -#endif /* UNIV_DEBUG */
3531 +//#endif /* UNIV_DEBUG */
3532 mutex_t mutex; /*!< mutex protecting this block:
3533 state (also protected by the buffer
3534 pool mutex), io_fix, buf_fix_count,
3535 @@ -1532,6 +1559,11 @@
3536 pool instance, protects compressed
3537 only pages (of type buf_page_t, not
3539 + mutex_t LRU_list_mutex;
3540 + rw_lock_t page_hash_latch;
3541 + mutex_t free_list_mutex;
3542 + mutex_t zip_free_mutex;
3543 + mutex_t zip_hash_mutex;
3544 ulint instance_no; /*!< Array index of this buffer
3546 ulint old_pool_size; /*!< Old pool size in bytes */
3547 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3548 --- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3549 +++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3551 case BUF_BLOCK_ZIP_FREE:
3552 /* This is a free page in buf_pool->zip_free[].
3553 Such pages should only be accessed by the buddy allocator. */
3555 + /* ut_error; */ /* optimistic */
3557 case BUF_BLOCK_ZIP_PAGE:
3558 case BUF_BLOCK_ZIP_DIRTY:
3559 @@ -317,9 +317,14 @@
3561 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3563 + if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3564 + /* TODO: this code is the interim. should be confirmed later. */
3565 + return(&buf_pool->zip_mutex);
3568 switch (buf_page_get_state(bpage)) {
3569 case BUF_BLOCK_ZIP_FREE:
3571 + /* ut_error; */ /* optimistic */
3573 case BUF_BLOCK_ZIP_PAGE:
3574 case BUF_BLOCK_ZIP_DIRTY:
3575 @@ -329,6 +334,28 @@
3579 +/*************************************************************************
3580 +Gets the mutex of a block and enter the mutex with consistency. */
3583 +buf_page_get_mutex_enter(
3584 +/*=========================*/
3585 + const buf_page_t* bpage) /*!< in: pointer to control block */
3587 + mutex_t* block_mutex;
3590 + block_mutex = buf_page_get_mutex(bpage);
3592 + return block_mutex;
3594 + mutex_enter(block_mutex);
3595 + if (block_mutex == buf_page_get_mutex(bpage))
3596 + return block_mutex;
3597 + mutex_exit(block_mutex);
3601 /*********************************************************************//**
3602 Get the flush type of a page.
3603 @return flush type */
3605 enum buf_io_fix io_fix) /*!< in: io_fix state */
3608 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3609 - ut_ad(buf_pool_mutex_own(buf_pool));
3610 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3611 + //ut_ad(buf_pool_mutex_own(buf_pool));
3613 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3615 @@ -456,14 +483,14 @@
3616 const buf_page_t* bpage) /*!< control block being relocated */
3619 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3620 - ut_ad(buf_pool_mutex_own(buf_pool));
3621 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3622 + //ut_ad(buf_pool_mutex_own(buf_pool));
3624 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3625 ut_ad(buf_page_in_file(bpage));
3626 - ut_ad(bpage->in_LRU_list);
3627 + //ut_ad(bpage->in_LRU_list);
3629 - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3630 + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3631 && bpage->buf_fix_count == 0);
3635 const buf_page_t* bpage) /*!< in: control block */
3638 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3639 - ut_ad(buf_pool_mutex_own(buf_pool));
3640 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3641 + //ut_ad(buf_pool_mutex_own(buf_pool));
3643 ut_ad(buf_page_in_file(bpage));
3646 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3647 #endif /* UNIV_DEBUG */
3648 ut_a(buf_page_in_file(bpage));
3649 - ut_ad(buf_pool_mutex_own(buf_pool));
3650 + //ut_ad(buf_pool_mutex_own(buf_pool));
3651 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3652 ut_ad(bpage->in_LRU_list);
3654 #ifdef UNIV_LRU_DEBUG
3655 @@ -545,9 +573,10 @@
3656 ulint time_ms) /*!< in: ut_time_ms() */
3659 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3660 - ut_ad(buf_pool_mutex_own(buf_pool));
3661 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3662 + //ut_ad(buf_pool_mutex_own(buf_pool));
3664 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3665 ut_a(buf_page_in_file(bpage));
3667 if (!bpage->access_time) {
3668 @@ -761,19 +790,19 @@
3670 buf_block_t* block) /*!< in, own: block to be freed */
3672 - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3673 + //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3675 - buf_pool_mutex_enter(buf_pool);
3676 + //buf_pool_mutex_enter(buf_pool);
3678 mutex_enter(&block->mutex);
3680 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3682 - buf_LRU_block_free_non_file_page(block);
3683 + buf_LRU_block_free_non_file_page(block, FALSE);
3685 mutex_exit(&block->mutex);
3687 - buf_pool_mutex_exit(buf_pool);
3688 + //buf_pool_mutex_exit(buf_pool);
3690 #endif /* !UNIV_HOTBACKUP */
3692 @@ -821,17 +850,17 @@
3696 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
3698 - mutex_enter(block_mutex);
3699 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3701 - if (buf_page_in_file(bpage)) {
3702 + if (block_mutex && buf_page_in_file(bpage)) {
3703 lsn = bpage->newest_modification;
3708 - mutex_exit(block_mutex);
3709 + if (block_mutex) {
3710 + mutex_exit(block_mutex);
3716 #ifdef UNIV_SYNC_DEBUG
3717 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3719 - ut_ad((buf_pool_mutex_own(buf_pool)
3720 + ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3721 && (block->page.buf_fix_count == 0))
3722 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3723 #endif /* UNIV_SYNC_DEBUG */
3724 @@ -979,7 +1008,11 @@
3728 - ut_ad(buf_pool_mutex_own(buf_pool));
3729 + //ut_ad(buf_pool_mutex_own(buf_pool));
3730 +#ifdef UNIV_SYNC_DEBUG
3731 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3732 + || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3734 ut_ad(fold == buf_page_address_fold(space, offset));
3736 /* Look for the page in the hash table */
3737 @@ -1064,11 +1097,13 @@
3738 const buf_page_t* bpage;
3739 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3741 - buf_pool_mutex_enter(buf_pool);
3742 + //buf_pool_mutex_enter(buf_pool);
3743 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3745 bpage = buf_page_hash_get(buf_pool, space, offset);
3747 - buf_pool_mutex_exit(buf_pool);
3748 + //buf_pool_mutex_exit(buf_pool);
3749 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3751 return(bpage != NULL);
3753 @@ -1196,4 +1231,38 @@
3754 buf_pool_mutex_exit(buf_pool);
3758 +/********************************************************************//**
3762 +buf_pool_page_hash_x_lock_all(void)
3763 +/*===============================*/
3767 + for (i = 0; i < srv_buf_pool_instances; i++) {
3768 + buf_pool_t* buf_pool;
3770 + buf_pool = buf_pool_from_array(i);
3771 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3775 +/********************************************************************//**
3779 +buf_pool_page_hash_x_unlock_all(void)
3780 +/*=================================*/
3784 + for (i = 0; i < srv_buf_pool_instances; i++) {
3785 + buf_pool_t* buf_pool;
3787 + buf_pool = buf_pool_from_array(i);
3788 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3791 #endif /* !UNIV_HOTBACKUP */
3792 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3793 --- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3794 +++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
3795 @@ -113,10 +113,11 @@
3796 buf_page_t* bpage, /*!< in: block to be freed */
3797 ibool zip, /*!< in: TRUE if should remove also the
3798 compressed page of an uncompressed page */
3799 - ibool* buf_pool_mutex_released);
3800 + ibool* buf_pool_mutex_released,
3801 /*!< in: pointer to a variable that will
3802 be assigned TRUE if buf_pool->mutex
3803 was temporarily released, or NULL */
3804 + ibool have_LRU_mutex);
3805 /******************************************************************//**
3806 Try to free a replaceable block.
3807 @return TRUE if found and freed */
3810 buf_LRU_block_free_non_file_page(
3811 /*=============================*/
3812 - buf_block_t* block); /*!< in: block, must not contain a file page */
3813 + buf_block_t* block, /*!< in: block, must not contain a file page */
3814 + ibool have_page_hash_mutex);
3815 /******************************************************************//**
3816 Adds a block to the LRU list. */
3818 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3819 --- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3820 +++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3822 extern mysql_pfs_key_t archive_lock_key;
3823 # endif /* UNIV_LOG_ARCHIVE */
3824 extern mysql_pfs_key_t btr_search_latch_key;
3825 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3826 extern mysql_pfs_key_t buf_block_lock_key;
3827 # ifdef UNIV_SYNC_DEBUG
3828 extern mysql_pfs_key_t buf_block_debug_latch_key;
3829 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3830 --- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3831 +++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3833 extern mysql_pfs_key_t buffer_block_mutex_key;
3834 extern mysql_pfs_key_t buf_pool_mutex_key;
3835 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3836 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3837 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3838 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3839 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3840 extern mysql_pfs_key_t cache_last_read_mutex_key;
3841 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3842 extern mysql_pfs_key_t dict_sys_mutex_key;
3844 #define SYNC_TRX_LOCK_HEAP 298
3845 #define SYNC_TRX_SYS_HEADER 290
3846 #define SYNC_LOG 170
3847 -#define SYNC_LOG_FLUSH_ORDER 147
3848 +#define SYNC_LOG_FLUSH_ORDER 156
3849 #define SYNC_RECV 168
3850 #define SYNC_WORK_QUEUE 162
3851 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3852 @@ -670,8 +674,13 @@
3853 SYNC_SEARCH_SYS, as memory allocation
3854 can call routines there! Otherwise
3855 the level is SYNC_MEM_HASH. */
3856 +#define SYNC_BUF_LRU_LIST 158
3857 +#define SYNC_BUF_PAGE_HASH 157
3858 +#define SYNC_BUF_BLOCK 155 /* Block mutex */
3859 +#define SYNC_BUF_FREE_LIST 153
3860 +#define SYNC_BUF_ZIP_FREE 152
3861 +#define SYNC_BUF_ZIP_HASH 151
3862 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3863 -#define SYNC_BUF_BLOCK 146 /* Block mutex */
3864 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3865 #define SYNC_DOUBLEWRITE 140
3866 #define SYNC_ANY_LATCH 135
3868 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3869 when atomic operations are not enabled */
3871 - ulint waiters; /*!< This ulint is set to 1 if there are (or
3872 + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3873 may be) threads waiting in the global wait
3874 array for this mutex to be released.
3875 Otherwise, this is 0. */
3876 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3877 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3878 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
3879 @@ -3060,7 +3060,7 @@
3880 level += log_sys->max_checkpoint_age
3881 - (lsn - oldest_modification);
3883 - bpage = UT_LIST_GET_NEXT(list, bpage);
3884 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3888 @@ -3145,7 +3145,7 @@
3892 - bpage = UT_LIST_GET_NEXT(list, bpage);
3893 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3897 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3898 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3899 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3901 mutex->lock_word = 0;
3903 mutex->event = os_event_create(NULL);
3904 - mutex_set_waiters(mutex, 0);
3905 + mutex->waiters = 0;
3907 mutex->magic_n = MUTEX_MAGIC_N;
3908 #endif /* UNIV_DEBUG */
3909 @@ -444,6 +444,15 @@
3910 mutex_t* mutex, /*!< in: mutex */
3911 ulint n) /*!< in: value to set */
3913 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3917 + os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3919 + os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3922 volatile ulint* ptr; /* declared volatile to ensure that
3923 the value is stored to memory */
3927 *ptr = n; /* Here we assume that the write of a single
3928 word in memory is atomic */
3932 /******************************************************************//**
3933 @@ -1193,7 +1203,12 @@
3937 + case SYNC_BUF_LRU_LIST:
3938 case SYNC_BUF_FLUSH_LIST:
3939 + case SYNC_BUF_PAGE_HASH:
3940 + case SYNC_BUF_FREE_LIST:
3941 + case SYNC_BUF_ZIP_FREE:
3942 + case SYNC_BUF_ZIP_HASH:
3944 /* We can have multiple mutexes of this type therefore we
3945 can only check whether the greater than condition holds. */
3946 @@ -1211,7 +1226,8 @@
3947 buffer block (block->mutex or buf_pool->zip_mutex). */
3948 if (!sync_thread_levels_g(array, level, FALSE)) {
3949 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3950 - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3951 + /* the exact rule is not fixed yet, for now */
3952 + //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));