1 # name : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
15 - buf_pool_mutex_enter(buf_pool);
16 + //buf_pool_mutex_enter(buf_pool);
17 + mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
20 /* Only free the block if it is still allocated to
21 @@ -3946,17 +3947,22 @@
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
25 - if (buf_LRU_free_block(&block->page, all, NULL)
26 + if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
28 - && all && block->page.zip.data) {
29 + && all && block->page.zip.data
30 + /* Now, buf_LRU_free_block() may release mutex temporarily */
31 + && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
32 + && buf_block_get_space(block) == space
33 + && buf_block_get_page_no(block) == page_no) {
34 /* Attempt to deallocate the uncompressed page
35 if the whole block cannot be deallocted. */
37 - buf_LRU_free_block(&block->page, FALSE, NULL);
38 + buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
42 - buf_pool_mutex_exit(buf_pool);
43 + //buf_pool_mutex_exit(buf_pool);
44 + mutex_exit(&buf_pool->LRU_list_mutex);
45 mutex_exit(&block->mutex);
48 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
49 --- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
50 +++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
52 rec_offs_init(offsets_);
54 rw_lock_x_lock(&btr_search_latch);
55 - buf_pool_mutex_enter_all();
56 + buf_pool_page_hash_x_lock_all();
58 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
60 @@ -1951,11 +1951,11 @@
61 /* We release btr_search_latch every once in a while to
62 give other queries a chance to run. */
63 if ((i != 0) && ((i % chunk_size) == 0)) {
64 - buf_pool_mutex_exit_all();
65 + buf_pool_page_hash_x_unlock_all();
66 rw_lock_x_unlock(&btr_search_latch);
68 rw_lock_x_lock(&btr_search_latch);
69 - buf_pool_mutex_enter_all();
70 + buf_pool_page_hash_x_lock_all();
73 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
74 @@ -2066,11 +2066,11 @@
75 /* We release btr_search_latch every once in a while to
76 give other queries a chance to run. */
78 - buf_pool_mutex_exit_all();
79 + buf_pool_page_hash_x_unlock_all();
80 rw_lock_x_unlock(&btr_search_latch);
82 rw_lock_x_lock(&btr_search_latch);
83 - buf_pool_mutex_enter_all();
84 + buf_pool_page_hash_x_lock_all();
87 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
92 - buf_pool_mutex_exit_all();
93 + buf_pool_page_hash_x_unlock_all();
94 rw_lock_x_unlock(&btr_search_latch);
95 if (UNIV_LIKELY_NULL(heap)) {
97 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
98 --- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
99 +++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
101 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
102 #endif /* UNIV_DEBUG_VALGRIND */
104 - ut_ad(buf_pool_mutex_own(buf_pool));
105 + //ut_ad(buf_pool_mutex_own(buf_pool));
106 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
107 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
108 ut_ad(buf_pool->zip_free[i].start != bpage);
109 - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
110 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
112 #ifdef UNIV_DEBUG_VALGRIND
113 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
115 buf_pool->zip_free[] */
117 #ifdef UNIV_DEBUG_VALGRIND
118 - buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
119 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
120 + buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
121 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
123 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
124 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
126 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
127 #endif /* UNIV_DEBUG_VALGRIND */
129 - ut_ad(buf_pool_mutex_own(buf_pool));
130 + //ut_ad(buf_pool_mutex_own(buf_pool));
131 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
132 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
133 - UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
134 + UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
136 #ifdef UNIV_DEBUG_VALGRIND
137 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
138 @@ -128,12 +130,13 @@
142 - ut_ad(buf_pool_mutex_own(buf_pool));
143 + //ut_ad(buf_pool_mutex_own(buf_pool));
144 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
145 ut_a(i < BUF_BUDDY_SIZES);
147 #ifndef UNIV_DEBUG_VALGRIND
148 /* Valgrind would complain about accessing free memory. */
149 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
150 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
151 ut_ad(buf_page_get_state(ut_list_node_313)
152 == BUF_BLOCK_ZIP_FREE)));
153 #endif /* !UNIV_DEBUG_VALGRIND */
154 @@ -177,16 +180,19 @@
155 buf_buddy_block_free(
156 /*=================*/
157 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
158 - void* buf) /*!< in: buffer frame to deallocate */
159 + void* buf, /*!< in: buffer frame to deallocate */
160 + ibool have_page_hash_mutex)
162 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
166 - ut_ad(buf_pool_mutex_own(buf_pool));
167 + //ut_ad(buf_pool_mutex_own(buf_pool));
168 ut_ad(!mutex_own(&buf_pool->zip_mutex));
169 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
171 + mutex_enter(&buf_pool->zip_hash_mutex);
173 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
174 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
175 && bpage->in_zip_hash && !bpage->in_page_hash),
176 @@ -198,12 +204,14 @@
177 ut_d(bpage->in_zip_hash = FALSE);
178 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
180 + mutex_exit(&buf_pool->zip_hash_mutex);
182 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
183 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
185 block = (buf_block_t*) bpage;
186 mutex_enter(&block->mutex);
187 - buf_LRU_block_free_non_file_page(block);
188 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
189 mutex_exit(&block->mutex);
191 ut_ad(buf_pool->buddy_n_frames > 0);
194 buf_pool_t* buf_pool = buf_pool_from_block(block);
195 const ulint fold = BUF_POOL_ZIP_FOLD(block);
196 - ut_ad(buf_pool_mutex_own(buf_pool));
197 + //ut_ad(buf_pool_mutex_own(buf_pool));
198 ut_ad(!mutex_own(&buf_pool->zip_mutex));
199 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
202 ut_ad(!block->page.in_page_hash);
203 ut_ad(!block->page.in_zip_hash);
204 ut_d(block->page.in_zip_hash = TRUE);
206 + mutex_enter(&buf_pool->zip_hash_mutex);
207 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
208 + mutex_exit(&buf_pool->zip_hash_mutex);
210 ut_d(buf_pool->buddy_n_frames++);
213 bpage->state = BUF_BLOCK_ZIP_FREE;
214 #ifndef UNIV_DEBUG_VALGRIND
215 /* Valgrind would complain about accessing free memory. */
216 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
217 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
218 ut_ad(buf_page_get_state(
220 == BUF_BLOCK_ZIP_FREE)));
221 @@ -291,25 +302,29 @@
222 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
223 ulint i, /*!< in: index of buf_pool->zip_free[],
224 or BUF_BUDDY_SIZES */
225 - ibool* lru) /*!< in: pointer to a variable that
226 + ibool* lru, /*!< in: pointer to a variable that
227 will be assigned TRUE if storage was
228 allocated from the LRU list and
229 buf_pool->mutex was temporarily
230 released, or NULL if the LRU list
231 should not be used */
232 + ibool have_page_hash_mutex)
236 - ut_ad(buf_pool_mutex_own(buf_pool));
237 + //ut_ad(buf_pool_mutex_own(buf_pool));
238 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
239 ut_ad(!mutex_own(&buf_pool->zip_mutex));
241 if (i < BUF_BUDDY_SIZES) {
242 /* Try to allocate from the buddy system. */
243 + mutex_enter(&buf_pool->zip_free_mutex);
244 block = buf_buddy_alloc_zip(buf_pool, i);
249 + mutex_exit(&buf_pool->zip_free_mutex);
252 /* Try allocating from the buf_pool->free list. */
253 @@ -326,19 +341,30 @@
256 /* Try replacing an uncompressed page in the buffer pool. */
257 - buf_pool_mutex_exit(buf_pool);
258 + //buf_pool_mutex_exit(buf_pool);
259 + mutex_exit(&buf_pool->LRU_list_mutex);
260 + if (have_page_hash_mutex) {
261 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
263 block = buf_LRU_get_free_block(buf_pool, 0);
265 - buf_pool_mutex_enter(buf_pool);
266 + //buf_pool_mutex_enter(buf_pool);
267 + mutex_enter(&buf_pool->LRU_list_mutex);
268 + if (have_page_hash_mutex) {
269 + rw_lock_x_lock(&buf_pool->page_hash_latch);
273 buf_buddy_block_register(block);
275 + mutex_enter(&buf_pool->zip_free_mutex);
276 block = buf_buddy_alloc_from(
277 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
280 buf_pool->buddy_stat[i].used++;
281 + mutex_exit(&buf_pool->zip_free_mutex);
288 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
290 - ut_ad(buf_pool_mutex_own(buf_pool));
291 + //ut_ad(buf_pool_mutex_own(buf_pool));
292 +#ifdef UNIV_SYNC_DEBUG
293 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
296 switch (buf_page_get_state(bpage)) {
297 case BUF_BLOCK_ZIP_FREE:
299 case BUF_BLOCK_FILE_PAGE:
300 case BUF_BLOCK_MEMORY:
301 case BUF_BLOCK_REMOVE_HASH:
303 + /* ut_error; */ /* optimistic */
304 case BUF_BLOCK_ZIP_DIRTY:
305 /* Cannot relocate dirty pages. */
310 mutex_enter(&buf_pool->zip_mutex);
311 + mutex_enter(&buf_pool->zip_free_mutex);
313 if (!buf_page_can_relocate(bpage)) {
314 mutex_exit(&buf_pool->zip_mutex);
315 + mutex_exit(&buf_pool->zip_free_mutex);
319 + if (bpage != buf_page_hash_get(buf_pool,
320 + bpage->space, bpage->offset)) {
321 + mutex_exit(&buf_pool->zip_mutex);
322 + mutex_exit(&buf_pool->zip_free_mutex);
326 @@ -384,18 +422,19 @@
327 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
329 /* relocate buf_pool->zip_clean */
330 - b = UT_LIST_GET_PREV(list, dpage);
331 - UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
332 + b = UT_LIST_GET_PREV(zip_list, dpage);
333 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
336 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
337 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
339 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
340 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
343 UNIV_MEM_INVALID(bpage, sizeof *bpage);
345 mutex_exit(&buf_pool->zip_mutex);
346 + mutex_exit(&buf_pool->zip_free_mutex);
350 @@ -409,14 +448,16 @@
351 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
352 void* src, /*!< in: block to relocate */
353 void* dst, /*!< in: free block to relocate to */
354 - ulint i) /*!< in: index of
355 + ulint i, /*!< in: index of
356 buf_pool->zip_free[] */
357 + ibool have_page_hash_mutex)
360 const ulint size = BUF_BUDDY_LOW << i;
361 ullint usec = ut_time_us(NULL);
363 - ut_ad(buf_pool_mutex_own(buf_pool));
364 + //ut_ad(buf_pool_mutex_own(buf_pool));
365 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
366 ut_ad(!mutex_own(&buf_pool->zip_mutex));
367 ut_ad(!ut_align_offset(src, size));
368 ut_ad(!ut_align_offset(dst, size));
370 if (size >= PAGE_ZIP_MIN_SIZE) {
371 /* This is a compressed page. */
373 + ulint space, page_no;
375 + if (!have_page_hash_mutex) {
376 + mutex_exit(&buf_pool->zip_free_mutex);
377 + mutex_enter(&buf_pool->LRU_list_mutex);
378 + rw_lock_x_lock(&buf_pool->page_hash_latch);
381 /* The src block may be split into smaller blocks,
382 some of which may be free. Thus, the
384 pool), so there is nothing wrong about this. The
385 mach_read_from_4() calls here will only trigger bogus
386 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
387 - ulint space = mach_read_from_4(
388 + space = mach_read_from_4(
389 (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
390 - ulint page_no = mach_read_from_4(
391 + page_no = mach_read_from_4(
392 (const byte*) src + FIL_PAGE_OFFSET);
393 /* Suppress Valgrind warnings about conditional jump
394 on uninitialized value. */
396 added to buf_pool->page_hash yet. Obviously,
397 it cannot be relocated. */
399 + if (!have_page_hash_mutex) {
400 + mutex_enter(&buf_pool->zip_free_mutex);
401 + mutex_exit(&buf_pool->LRU_list_mutex);
402 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
407 @@ -473,18 +526,27 @@
408 For the sake of simplicity, give up. */
409 ut_ad(page_zip_get_size(&bpage->zip) < size);
411 + if (!have_page_hash_mutex) {
412 + mutex_enter(&buf_pool->zip_free_mutex);
413 + mutex_exit(&buf_pool->LRU_list_mutex);
414 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
419 + /* To keep latch order */
420 + if (have_page_hash_mutex)
421 + mutex_exit(&buf_pool->zip_free_mutex);
423 /* The block must have been allocated, but it may
424 contain uninitialized data. */
425 UNIV_MEM_ASSERT_W(src, size);
427 - mutex = buf_page_get_mutex(bpage);
428 + mutex = buf_page_get_mutex_enter(bpage);
430 - mutex_enter(mutex);
431 + mutex_enter(&buf_pool->zip_free_mutex);
433 - if (buf_page_can_relocate(bpage)) {
434 + if (mutex && buf_page_can_relocate(bpage)) {
435 /* Relocate the compressed page. */
436 ut_a(bpage->zip.data == src);
437 memcpy(dst, src, size);
438 @@ -499,10 +561,22 @@
439 buddy_stat->relocated_usec
440 += ut_time_us(NULL) - usec;
443 + if (!have_page_hash_mutex) {
444 + mutex_exit(&buf_pool->LRU_list_mutex);
445 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
451 + if (!have_page_hash_mutex) {
452 + mutex_exit(&buf_pool->LRU_list_mutex);
453 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
459 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
460 /* This must be a buf_page_t object. */
461 #if UNIV_WORD_SIZE == 4
462 @@ -511,10 +585,31 @@
463 about uninitialized pad bytes. */
464 UNIV_MEM_ASSERT_RW(src, size);
467 + mutex_exit(&buf_pool->zip_free_mutex);
469 + if (!have_page_hash_mutex) {
470 + mutex_enter(&buf_pool->LRU_list_mutex);
471 + rw_lock_x_lock(&buf_pool->page_hash_latch);
474 if (buf_buddy_relocate_block(src, dst)) {
475 + mutex_enter(&buf_pool->zip_free_mutex);
477 + if (!have_page_hash_mutex) {
478 + mutex_exit(&buf_pool->LRU_list_mutex);
479 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
485 + mutex_enter(&buf_pool->zip_free_mutex);
487 + if (!have_page_hash_mutex) {
488 + mutex_exit(&buf_pool->LRU_list_mutex);
489 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
494 @@ -529,13 +624,15 @@
495 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
496 void* buf, /*!< in: block to be freed, must not be
497 pointed to by the buffer pool */
498 - ulint i) /*!< in: index of buf_pool->zip_free[],
499 + ulint i, /*!< in: index of buf_pool->zip_free[],
500 or BUF_BUDDY_SIZES */
501 + ibool have_page_hash_mutex)
506 - ut_ad(buf_pool_mutex_own(buf_pool));
507 + //ut_ad(buf_pool_mutex_own(buf_pool));
508 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
509 ut_ad(!mutex_own(&buf_pool->zip_mutex));
510 ut_ad(i <= BUF_BUDDY_SIZES);
511 ut_ad(buf_pool->buddy_stat[i].used > 0);
513 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
515 if (i == BUF_BUDDY_SIZES) {
516 - buf_buddy_block_free(buf_pool, buf);
517 + mutex_exit(&buf_pool->zip_free_mutex);
518 + buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
519 + mutex_enter(&buf_pool->zip_free_mutex);
527 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
528 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
529 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
532 @@ -600,13 +699,13 @@
533 #ifndef UNIV_DEBUG_VALGRIND
535 /* Valgrind would complain about accessing free memory. */
536 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
537 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
538 ut_ad(buf_page_get_state(ut_list_node_313)
539 == BUF_BLOCK_ZIP_FREE)));
540 #endif /* UNIV_DEBUG_VALGRIND */
542 /* The buddy is not free. Is there a free block of this size? */
543 - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
544 + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
547 /* Remove the block from the free list, because a successful
549 buf_buddy_remove_from_free(buf_pool, bpage, i);
551 /* Try to relocate the buddy of buf to the free block. */
552 - if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
553 + if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
555 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
557 @@ -636,14 +735,14 @@
559 (Parts of the buddy can be free in
560 buf_pool->zip_free[j] with j < i.) */
561 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
562 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
563 ut_ad(buf_page_get_state(
565 == BUF_BLOCK_ZIP_FREE
566 && ut_list_node_313 != buddy)));
567 #endif /* !UNIV_DEBUG_VALGRIND */
569 - if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
570 + if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
573 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
574 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
575 --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
576 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
578 #ifdef UNIV_PFS_RWLOCK
579 /* Keys to register buffer block related rwlocks and mutexes with
580 performance schema */
581 +UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
582 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
583 # ifdef UNIV_SYNC_DEBUG
584 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
586 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
587 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
588 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
589 +UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
590 +UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
591 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
592 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
593 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
594 #endif /* UNIV_PFS_MUTEX */
597 block->page.in_zip_hash = FALSE;
598 block->page.in_flush_list = FALSE;
599 block->page.in_free_list = FALSE;
600 - block->in_unzip_LRU_list = FALSE;
601 #endif /* UNIV_DEBUG */
602 block->page.in_LRU_list = FALSE;
603 + block->in_unzip_LRU_list = FALSE;
604 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
605 block->n_pointers = 0;
606 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
608 memset(block->frame, '\0', UNIV_PAGE_SIZE);
610 /* Add the block to the free list */
611 - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
612 + mutex_enter(&buf_pool->free_list_mutex);
613 + UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
615 ut_d(block->page.in_free_list = TRUE);
616 + mutex_exit(&buf_pool->free_list_mutex);
617 ut_ad(buf_pool_from_block(block) == buf_pool);
620 @@ -1038,7 +1045,8 @@
621 buf_chunk_t* chunk = buf_pool->chunks;
624 - ut_ad(buf_pool_mutex_own(buf_pool));
625 + //ut_ad(buf_pool_mutex_own(buf_pool));
626 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
627 for (n = buf_pool->n_chunks; n--; chunk++) {
629 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
630 @@ -1138,7 +1146,7 @@
632 const buf_block_t* block_end;
634 - ut_ad(buf_pool_mutex_own(buf_pool));
635 + //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
637 block_end = chunk->blocks + chunk->size;
639 @@ -1150,8 +1158,10 @@
640 ut_ad(!block->in_unzip_LRU_list);
641 ut_ad(!block->page.in_flush_list);
642 /* Remove the block from the free list. */
643 + mutex_enter(&buf_pool->free_list_mutex);
644 ut_ad(block->page.in_free_list);
645 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
646 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
647 + mutex_exit(&buf_pool->free_list_mutex);
649 /* Free the latches. */
650 mutex_free(&block->mutex);
651 @@ -1208,9 +1218,21 @@
652 ------------------------------- */
653 mutex_create(buf_pool_mutex_key,
654 &buf_pool->mutex, SYNC_BUF_POOL);
655 + mutex_create(buf_pool_LRU_list_mutex_key,
656 + &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
657 + rw_lock_create(buf_pool_page_hash_key,
658 + &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
659 + mutex_create(buf_pool_free_list_mutex_key,
660 + &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
661 + mutex_create(buf_pool_zip_free_mutex_key,
662 + &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
663 + mutex_create(buf_pool_zip_hash_mutex_key,
664 + &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
665 mutex_create(buf_pool_zip_mutex_key,
666 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
668 + mutex_enter(&buf_pool->LRU_list_mutex);
669 + rw_lock_x_lock(&buf_pool->page_hash_latch);
670 buf_pool_mutex_enter(buf_pool);
672 if (buf_pool_size > 0) {
673 @@ -1223,6 +1245,8 @@
677 + mutex_exit(&buf_pool->LRU_list_mutex);
678 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
679 buf_pool_mutex_exit(buf_pool);
682 @@ -1253,6 +1277,8 @@
684 /* All fields are initialized by mem_zalloc(). */
686 + mutex_exit(&buf_pool->LRU_list_mutex);
687 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
688 buf_pool_mutex_exit(buf_pool);
691 @@ -1467,7 +1493,11 @@
693 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
695 - ut_ad(buf_pool_mutex_own(buf_pool));
696 + //ut_ad(buf_pool_mutex_own(buf_pool));
697 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
698 +#ifdef UNIV_SYNC_DEBUG
699 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
701 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
702 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
703 ut_a(bpage->buf_fix_count == 0);
704 @@ -1554,7 +1584,8 @@
707 btr_search_disable(); /* Empty the adaptive hash index again */
708 - buf_pool_mutex_enter(buf_pool);
709 + //buf_pool_mutex_enter(buf_pool);
710 + mutex_enter(&buf_pool->LRU_list_mutex);
713 if (buf_pool->n_chunks <= 1) {
714 @@ -1625,7 +1656,7 @@
716 buf_LRU_make_block_old(&block->page);
718 - } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
719 + } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
723 @@ -1633,7 +1664,8 @@
724 mutex_exit(&block->mutex);
727 - buf_pool_mutex_exit(buf_pool);
728 + //buf_pool_mutex_exit(buf_pool);
729 + mutex_exit(&buf_pool->LRU_list_mutex);
731 /* Request for a flush of the chunk if it helps.
732 Do not flush if there are non-free blocks, since
733 @@ -1683,7 +1715,8 @@
735 buf_pool->old_pool_size = buf_pool->curr_pool_size;
737 - buf_pool_mutex_exit(buf_pool);
738 + //buf_pool_mutex_exit(buf_pool);
739 + mutex_exit(&buf_pool->LRU_list_mutex);
743 @@ -1724,7 +1757,9 @@
744 hash_table_t* zip_hash;
745 hash_table_t* page_hash;
747 - buf_pool_mutex_enter(buf_pool);
748 + //buf_pool_mutex_enter(buf_pool);
749 + mutex_enter(&buf_pool->LRU_list_mutex);
750 + rw_lock_x_lock(&buf_pool->page_hash_latch);
752 /* Free, create, and populate the hash table. */
753 hash_table_free(buf_pool->page_hash);
754 @@ -1765,8 +1800,9 @@
755 All such blocks are either in buf_pool->zip_clean or
756 in buf_pool->flush_list. */
758 + mutex_enter(&buf_pool->zip_mutex);
759 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
760 - b = UT_LIST_GET_NEXT(list, b)) {
761 + b = UT_LIST_GET_NEXT(zip_list, b)) {
762 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
763 ut_ad(!b->in_flush_list);
764 ut_ad(b->in_LRU_list);
765 @@ -1776,10 +1812,11 @@
766 HASH_INSERT(buf_page_t, hash, page_hash,
767 buf_page_address_fold(b->space, b->offset), b);
769 + mutex_exit(&buf_pool->zip_mutex);
771 buf_flush_list_mutex_enter(buf_pool);
772 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
773 - b = UT_LIST_GET_NEXT(list, b)) {
774 + b = UT_LIST_GET_NEXT(flush_list, b)) {
775 ut_ad(b->in_flush_list);
776 ut_ad(b->in_LRU_list);
777 ut_ad(b->in_page_hash);
778 @@ -1806,7 +1843,9 @@
781 buf_flush_list_mutex_exit(buf_pool);
782 - buf_pool_mutex_exit(buf_pool);
783 + //buf_pool_mutex_exit(buf_pool);
784 + mutex_exit(&buf_pool->LRU_list_mutex);
785 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
788 /********************************************************************
789 @@ -1853,21 +1892,32 @@
792 buf_pool_t* buf_pool = buf_pool_get(space, offset);
793 + mutex_t* block_mutex;
795 - ut_ad(buf_pool_mutex_own(buf_pool));
796 + //ut_ad(buf_pool_mutex_own(buf_pool));
798 + rw_lock_x_lock(&buf_pool->page_hash_latch);
799 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
801 + block_mutex = buf_page_get_mutex_enter(bpage);
805 if (UNIV_LIKELY_NULL(bpage)) {
806 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
807 /* The page was loaded meanwhile. */
808 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
811 /* Add to an existing watch. */
812 bpage->buf_fix_count++;
813 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
814 + mutex_exit(block_mutex);
818 + /* buf_pool->watch is protected by zip_mutex for now */
819 + mutex_enter(&buf_pool->zip_mutex);
820 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
821 bpage = &buf_pool->watch[i];
823 @@ -1891,10 +1941,12 @@
824 bpage->space = space;
825 bpage->offset = offset;
826 bpage->buf_fix_count = 1;
828 + bpage->buf_pool_index = buf_pool_index(buf_pool);
829 ut_d(bpage->in_page_hash = TRUE);
830 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
832 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
833 + mutex_exit(&buf_pool->zip_mutex);
835 case BUF_BLOCK_ZIP_PAGE:
836 ut_ad(bpage->in_page_hash);
837 @@ -1912,6 +1964,8 @@
840 /* Fix compiler warning */
841 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
842 + mutex_exit(&buf_pool->zip_mutex);
846 @@ -1941,6 +1995,8 @@
850 + mutex_enter(&buf_pool->LRU_list_mutex);
851 + rw_lock_x_lock(&buf_pool->page_hash_latch);
852 buf_pool_mutex_enter(buf_pool);
853 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
855 @@ -1959,6 +2015,8 @@
856 buf_pool->n_chunks++;
859 + mutex_exit(&buf_pool->LRU_list_mutex);
860 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
861 buf_pool_mutex_exit(buf_pool);
864 @@ -2046,7 +2104,11 @@
866 buf_page_t* watch) /*!< in/out: sentinel for watch */
868 - ut_ad(buf_pool_mutex_own(buf_pool));
869 + //ut_ad(buf_pool_mutex_own(buf_pool));
870 +#ifdef UNIV_SYNC_DEBUG
871 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
873 + ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
875 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
876 ut_d(watch->in_page_hash = FALSE);
877 @@ -2068,28 +2130,31 @@
878 buf_pool_t* buf_pool = buf_pool_get(space, offset);
879 ulint fold = buf_page_address_fold(space, offset);
881 - buf_pool_mutex_enter(buf_pool);
882 + //buf_pool_mutex_enter(buf_pool);
883 + rw_lock_x_lock(&buf_pool->page_hash_latch);
884 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
885 /* The page must exist because buf_pool_watch_set()
886 increments buf_fix_count. */
889 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
890 - mutex_t* mutex = buf_page_get_mutex(bpage);
891 + mutex_t* mutex = buf_page_get_mutex_enter(bpage);
893 - mutex_enter(mutex);
894 ut_a(bpage->buf_fix_count > 0);
895 bpage->buf_fix_count--;
898 + mutex_enter(&buf_pool->zip_mutex);
899 ut_a(bpage->buf_fix_count > 0);
901 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
902 buf_pool_watch_remove(buf_pool, fold, bpage);
904 + mutex_exit(&buf_pool->zip_mutex);
907 - buf_pool_mutex_exit(buf_pool);
908 + //buf_pool_mutex_exit(buf_pool);
909 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
912 /****************************************************************//**
913 @@ -2109,14 +2174,16 @@
914 buf_pool_t* buf_pool = buf_pool_get(space, offset);
915 ulint fold = buf_page_address_fold(space, offset);
917 - buf_pool_mutex_enter(buf_pool);
918 + //buf_pool_mutex_enter(buf_pool);
919 + rw_lock_s_lock(&buf_pool->page_hash_latch);
921 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
922 /* The page must exist because buf_pool_watch_set()
923 increments buf_fix_count. */
925 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
926 - buf_pool_mutex_exit(buf_pool);
927 + //buf_pool_mutex_exit(buf_pool);
928 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
932 @@ -2133,13 +2200,15 @@
934 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
936 - buf_pool_mutex_enter(buf_pool);
937 + //buf_pool_mutex_enter(buf_pool);
938 + mutex_enter(&buf_pool->LRU_list_mutex);
940 ut_a(buf_page_in_file(bpage));
942 buf_LRU_make_block_young(bpage);
944 - buf_pool_mutex_exit(buf_pool);
945 + //buf_pool_mutex_exit(buf_pool);
946 + mutex_exit(&buf_pool->LRU_list_mutex);
949 /********************************************************************//**
950 @@ -2163,14 +2232,20 @@
951 ut_a(buf_page_in_file(bpage));
953 if (buf_page_peek_if_too_old(bpage)) {
954 - buf_pool_mutex_enter(buf_pool);
955 + //buf_pool_mutex_enter(buf_pool);
956 + mutex_enter(&buf_pool->LRU_list_mutex);
957 buf_LRU_make_block_young(bpage);
958 - buf_pool_mutex_exit(buf_pool);
959 + //buf_pool_mutex_exit(buf_pool);
960 + mutex_exit(&buf_pool->LRU_list_mutex);
961 } else if (!access_time) {
962 ulint time_ms = ut_time_ms();
963 - buf_pool_mutex_enter(buf_pool);
964 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
965 + //buf_pool_mutex_enter(buf_pool);
967 buf_page_set_accessed(bpage, time_ms);
968 - buf_pool_mutex_exit(buf_pool);
969 + mutex_exit(block_mutex);
971 + //buf_pool_mutex_exit(buf_pool);
975 @@ -2187,7 +2262,8 @@
977 buf_pool_t* buf_pool = buf_pool_get(space, offset);
979 - buf_pool_mutex_enter(buf_pool);
980 + //buf_pool_mutex_enter(buf_pool);
981 + rw_lock_s_lock(&buf_pool->page_hash_latch);
983 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
985 @@ -2196,7 +2272,8 @@
986 block->check_index_page_at_flush = FALSE;
989 - buf_pool_mutex_exit(buf_pool);
990 + //buf_pool_mutex_exit(buf_pool);
991 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
994 /********************************************************************//**
995 @@ -2215,7 +2292,8 @@
997 buf_pool_t* buf_pool = buf_pool_get(space, offset);
999 - buf_pool_mutex_enter(buf_pool);
1000 + //buf_pool_mutex_enter(buf_pool);
1001 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1003 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1005 @@ -2226,7 +2304,8 @@
1006 is_hashed = block->is_hashed;
1009 - buf_pool_mutex_exit(buf_pool);
1010 + //buf_pool_mutex_exit(buf_pool);
1011 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1015 @@ -2248,7 +2327,8 @@
1017 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1019 - buf_pool_mutex_enter(buf_pool);
1020 + //buf_pool_mutex_enter(buf_pool);
1021 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1023 bpage = buf_page_hash_get(buf_pool, space, offset);
1025 @@ -2257,7 +2337,8 @@
1026 bpage->file_page_was_freed = TRUE;
1029 - buf_pool_mutex_exit(buf_pool);
1030 + //buf_pool_mutex_exit(buf_pool);
1031 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1035 @@ -2278,7 +2359,8 @@
1037 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1039 - buf_pool_mutex_enter(buf_pool);
1040 + //buf_pool_mutex_enter(buf_pool);
1041 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1043 bpage = buf_page_hash_get(buf_pool, space, offset);
1045 @@ -2287,7 +2369,8 @@
1046 bpage->file_page_was_freed = FALSE;
1049 - buf_pool_mutex_exit(buf_pool);
1050 + //buf_pool_mutex_exit(buf_pool);
1051 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1055 @@ -2322,8 +2405,9 @@
1056 buf_pool->stat.n_page_gets++;
1059 - buf_pool_mutex_enter(buf_pool);
1060 + //buf_pool_mutex_enter(buf_pool);
1062 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1063 bpage = buf_page_hash_get(buf_pool, space, offset);
1065 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1066 @@ -2332,7 +2416,8 @@
1068 /* Page not in buf_pool: needs to be read from file */
1070 - buf_pool_mutex_exit(buf_pool);
1071 + //buf_pool_mutex_exit(buf_pool);
1072 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1074 buf_read_page(space, zip_size, offset);
1076 @@ -2344,10 +2429,15 @@
1077 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1078 /* There is no compressed page. */
1080 - buf_pool_mutex_exit(buf_pool);
1081 + //buf_pool_mutex_exit(buf_pool);
1082 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1086 + block_mutex = buf_page_get_mutex_enter(bpage);
1088 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1090 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1092 switch (buf_page_get_state(bpage)) {
1093 @@ -2356,19 +2446,19 @@
1094 case BUF_BLOCK_MEMORY:
1095 case BUF_BLOCK_REMOVE_HASH:
1096 case BUF_BLOCK_ZIP_FREE:
1098 + mutex_exit(block_mutex);
1100 case BUF_BLOCK_ZIP_PAGE:
1101 case BUF_BLOCK_ZIP_DIRTY:
1102 - block_mutex = &buf_pool->zip_mutex;
1103 - mutex_enter(block_mutex);
1104 + ut_a(block_mutex == &buf_pool->zip_mutex);
1105 bpage->buf_fix_count++;
1107 case BUF_BLOCK_FILE_PAGE:
1108 - block_mutex = &((buf_block_t*) bpage)->mutex;
1109 - mutex_enter(block_mutex);
1110 + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1112 /* Discard the uncompressed page frame if possible. */
1113 - if (buf_LRU_free_block(bpage, FALSE, NULL)
1114 + if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
1117 mutex_exit(block_mutex);
1118 @@ -2387,7 +2477,7 @@
1119 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1120 access_time = buf_page_is_accessed(bpage);
1122 - buf_pool_mutex_exit(buf_pool);
1123 + //buf_pool_mutex_exit(buf_pool);
1125 mutex_exit(block_mutex);
1127 @@ -2696,7 +2786,7 @@
1128 const buf_block_t* block) /*!< in: pointer to block,
1131 - ut_ad(buf_pool_mutex_own(buf_pool));
1132 + //ut_ad(buf_pool_mutex_own(buf_pool));
1134 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1135 /* The pointer should be aligned. */
1136 @@ -2732,6 +2822,7 @@
1140 + mutex_t* block_mutex = NULL;
1141 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1144 @@ -2754,9 +2845,11 @@
1145 fold = buf_page_address_fold(space, offset);
1148 - buf_pool_mutex_enter(buf_pool);
1149 + //buf_pool_mutex_enter(buf_pool);
1152 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1154 /* If the guess is a compressed page descriptor that
1155 has been allocated by buf_buddy_alloc(), it may have
1156 been invalidated by buf_buddy_relocate(). In that
1157 @@ -2765,11 +2858,15 @@
1158 the guess may be pointing to a buffer pool chunk that
1159 has been released when resizing the buffer pool. */
1161 - if (!buf_block_is_uncompressed(buf_pool, block)
1162 + if (!block_mutex) {
1163 + block = guess = NULL;
1164 + } else if (!buf_block_is_uncompressed(buf_pool, block)
1165 || offset != block->page.offset
1166 || space != block->page.space
1167 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1169 + mutex_exit(block_mutex);
1171 block = guess = NULL;
1173 ut_ad(!block->page.in_zip_hash);
1174 @@ -2778,12 +2875,19 @@
1177 if (block == NULL) {
1178 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1179 block = (buf_block_t*) buf_page_hash_get_low(
1180 buf_pool, space, offset, fold);
1182 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1183 + ut_a(block_mutex);
1185 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1189 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1190 + mutex_exit(block_mutex);
1194 @@ -2795,12 +2899,14 @@
1195 space, offset, fold);
1197 if (UNIV_LIKELY_NULL(block)) {
1199 + block_mutex = buf_page_get_mutex((buf_page_t*)block);
1200 + ut_a(block_mutex);
1201 + ut_ad(mutex_own(block_mutex));
1206 - buf_pool_mutex_exit(buf_pool);
1207 + //buf_pool_mutex_exit(buf_pool);
1209 if (mode == BUF_GET_IF_IN_POOL
1210 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1211 @@ -2848,7 +2954,8 @@
1212 /* The page is being read to buffer pool,
1213 but we cannot wait around for the read to
1215 - buf_pool_mutex_exit(buf_pool);
1216 + //buf_pool_mutex_exit(buf_pool);
1217 + mutex_exit(block_mutex);
1221 @@ -2858,38 +2965,49 @@
1224 case BUF_BLOCK_FILE_PAGE:
1225 + if (block_mutex == &buf_pool->zip_mutex) {
1226 + /* it is wrong mutex... */
1227 + mutex_exit(block_mutex);
1232 case BUF_BLOCK_ZIP_PAGE:
1233 case BUF_BLOCK_ZIP_DIRTY:
1234 + ut_ad(block_mutex == &buf_pool->zip_mutex);
1235 bpage = &block->page;
1236 /* Protect bpage->buf_fix_count. */
1237 - mutex_enter(&buf_pool->zip_mutex);
1238 + //mutex_enter(&buf_pool->zip_mutex);
1240 if (bpage->buf_fix_count
1241 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1242 /* This condition often occurs when the buffer
1243 is not buffer-fixed, but I/O-fixed by
1244 buf_page_init_for_read(). */
1245 - mutex_exit(&buf_pool->zip_mutex);
1246 + //mutex_exit(&buf_pool->zip_mutex);
1248 /* The block is buffer-fixed or I/O-fixed.
1250 - buf_pool_mutex_exit(buf_pool);
1251 + //buf_pool_mutex_exit(buf_pool);
1252 + mutex_exit(block_mutex);
1253 os_thread_sleep(WAIT_FOR_READ);
1258 /* Allocate an uncompressed page. */
1259 - buf_pool_mutex_exit(buf_pool);
1260 - mutex_exit(&buf_pool->zip_mutex);
1261 + //buf_pool_mutex_exit(buf_pool);
1262 + //mutex_exit(&buf_pool->zip_mutex);
1263 + mutex_exit(block_mutex);
1265 block = buf_LRU_get_free_block(buf_pool, 0);
1267 + block_mutex = &block->mutex;
1269 - buf_pool_mutex_enter(buf_pool);
1270 - mutex_enter(&block->mutex);
1271 + //buf_pool_mutex_enter(buf_pool);
1272 + mutex_enter(&buf_pool->LRU_list_mutex);
1273 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1274 + mutex_enter(block_mutex);
1277 buf_page_t* hash_bpage;
1278 @@ -2902,35 +3020,47 @@
1279 while buf_pool->mutex was released.
1280 Free the block that was allocated. */
1282 - buf_LRU_block_free_non_file_page(block);
1283 - mutex_exit(&block->mutex);
1284 + buf_LRU_block_free_non_file_page(block, TRUE);
1285 + mutex_exit(block_mutex);
1287 block = (buf_block_t*) hash_bpage;
1289 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1290 + ut_a(block_mutex);
1292 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1293 + mutex_exit(&buf_pool->LRU_list_mutex);
1298 + mutex_enter(&buf_pool->zip_mutex);
1301 (bpage->buf_fix_count
1302 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1304 + mutex_exit(&buf_pool->zip_mutex);
1305 /* The block was buffer-fixed or I/O-fixed
1306 while buf_pool->mutex was not held by this thread.
1307 Free the block that was allocated and try again.
1308 This should be extremely unlikely. */
1310 - buf_LRU_block_free_non_file_page(block);
1311 - mutex_exit(&block->mutex);
1312 + buf_LRU_block_free_non_file_page(block, TRUE);
1313 + //mutex_exit(&block->mutex);
1315 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1316 + mutex_exit(&buf_pool->LRU_list_mutex);
1317 goto wait_until_unfixed;
1320 /* Move the compressed page from bpage to block,
1321 and uncompress it. */
1323 - mutex_enter(&buf_pool->zip_mutex);
1325 buf_relocate(bpage, &block->page);
1327 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1329 buf_block_init_low(block);
1330 block->lock_hash_val = lock_rec_hash(space, offset);
1332 @@ -2939,7 +3069,7 @@
1334 if (buf_page_get_state(&block->page)
1335 == BUF_BLOCK_ZIP_PAGE) {
1336 - UT_LIST_REMOVE(list, buf_pool->zip_clean,
1337 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1339 ut_ad(!block->page.in_flush_list);
1341 @@ -2956,19 +3086,24 @@
1342 /* Insert at the front of unzip_LRU list */
1343 buf_unzip_LRU_add_block(block, FALSE);
1345 + mutex_exit(&buf_pool->LRU_list_mutex);
1347 block->page.buf_fix_count = 1;
1348 buf_block_set_io_fix(block, BUF_IO_READ);
1349 rw_lock_x_lock_func(&block->lock, 0, file, line);
1351 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1353 - mutex_exit(&block->mutex);
1354 + mutex_exit(block_mutex);
1355 mutex_exit(&buf_pool->zip_mutex);
1357 + buf_pool_mutex_enter(buf_pool);
1358 buf_pool->n_pend_unzip++;
1359 + buf_pool_mutex_exit(buf_pool);
1361 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1362 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1364 - buf_pool_mutex_exit(buf_pool);
1365 + //buf_pool_mutex_exit(buf_pool);
1367 /* Decompress the page and apply buffered operations
1368 while not holding buf_pool->mutex or block->mutex. */
1369 @@ -2981,12 +3116,15 @@
1372 /* Unfix and unlatch the block. */
1373 - buf_pool_mutex_enter(buf_pool);
1374 - mutex_enter(&block->mutex);
1375 + //buf_pool_mutex_enter(buf_pool);
1376 + block_mutex = &block->mutex;
1377 + mutex_enter(block_mutex);
1378 block->page.buf_fix_count--;
1379 buf_block_set_io_fix(block, BUF_IO_NONE);
1380 - mutex_exit(&block->mutex);
1382 + buf_pool_mutex_enter(buf_pool);
1383 buf_pool->n_pend_unzip--;
1384 + buf_pool_mutex_exit(buf_pool);
1385 rw_lock_x_unlock(&block->lock);
1388 @@ -3002,7 +3140,7 @@
1390 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1392 - mutex_enter(&block->mutex);
1393 + //mutex_enter(&block->mutex);
1394 #if UNIV_WORD_SIZE == 4
1395 /* On 32-bit systems, there is no padding in buf_page_t. On
1396 other systems, Valgrind could complain about uninitialized pad
1397 @@ -3015,7 +3153,7 @@
1398 /* Try to evict the block from the buffer pool, to use the
1399 insert buffer (change buffer) as much as possible. */
1401 - if (buf_LRU_free_block(&block->page, TRUE, NULL)
1402 + if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
1404 mutex_exit(&block->mutex);
1405 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1406 @@ -3052,13 +3190,14 @@
1408 buf_block_buf_fix_inc(block, file, line);
1410 - mutex_exit(&block->mutex);
1411 + //mutex_exit(&block->mutex);
1413 /* Check if this is the first access to the page */
1415 access_time = buf_page_is_accessed(&block->page);
1417 - buf_pool_mutex_exit(buf_pool);
1418 + //buf_pool_mutex_exit(buf_pool);
1419 + mutex_exit(block_mutex);
1421 buf_page_set_accessed_make_young(&block->page, access_time);
1423 @@ -3291,9 +3430,11 @@
1424 buf_pool = buf_pool_from_block(block);
1426 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1427 - buf_pool_mutex_enter(buf_pool);
1428 + //buf_pool_mutex_enter(buf_pool);
1429 + mutex_enter(&buf_pool->LRU_list_mutex);
1430 buf_LRU_make_block_young(&block->page);
1431 - buf_pool_mutex_exit(buf_pool);
1432 + //buf_pool_mutex_exit(buf_pool);
1433 + mutex_exit(&buf_pool->LRU_list_mutex);
1434 } else if (!buf_page_is_accessed(&block->page)) {
1435 /* Above, we do a dirty read on purpose, to avoid
1436 mutex contention. The field buf_page_t::access_time
1437 @@ -3301,9 +3442,11 @@
1438 field must be protected by mutex, however. */
1439 ulint time_ms = ut_time_ms();
1441 - buf_pool_mutex_enter(buf_pool);
1442 + //buf_pool_mutex_enter(buf_pool);
1443 + mutex_enter(&block->mutex);
1444 buf_page_set_accessed(&block->page, time_ms);
1445 - buf_pool_mutex_exit(buf_pool);
1446 + //buf_pool_mutex_exit(buf_pool);
1447 + mutex_exit(&block->mutex);
1450 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1451 @@ -3370,18 +3513,21 @@
1453 ut_ad(mtr->state == MTR_ACTIVE);
1455 - buf_pool_mutex_enter(buf_pool);
1456 + //buf_pool_mutex_enter(buf_pool);
1457 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1458 block = buf_block_hash_get(buf_pool, space_id, page_no);
1460 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1461 - buf_pool_mutex_exit(buf_pool);
1462 + //buf_pool_mutex_exit(buf_pool);
1463 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1467 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1469 mutex_enter(&block->mutex);
1470 - buf_pool_mutex_exit(buf_pool);
1471 + //buf_pool_mutex_exit(buf_pool);
1472 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1474 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1475 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1476 @@ -3470,7 +3616,10 @@
1477 buf_page_t* hash_page;
1478 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1480 - ut_ad(buf_pool_mutex_own(buf_pool));
1481 + //ut_ad(buf_pool_mutex_own(buf_pool));
1482 +#ifdef UNIV_SYNC_DEBUG
1483 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1485 ut_ad(mutex_own(&(block->mutex)));
1486 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1488 @@ -3499,11 +3648,14 @@
1489 if (UNIV_LIKELY(!hash_page)) {
1490 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1491 /* Preserve the reference count. */
1492 - ulint buf_fix_count = hash_page->buf_fix_count;
1493 + ulint buf_fix_count;
1495 + mutex_enter(&buf_pool->zip_mutex);
1496 + buf_fix_count = hash_page->buf_fix_count;
1497 ut_a(buf_fix_count > 0);
1498 block->page.buf_fix_count += buf_fix_count;
1499 buf_pool_watch_remove(buf_pool, fold, hash_page);
1500 + mutex_exit(&buf_pool->zip_mutex);
1503 "InnoDB: Error: page %lu %lu already found"
1504 @@ -3513,7 +3665,8 @@
1505 (const void*) hash_page, (const void*) block);
1506 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1507 mutex_exit(&block->mutex);
1508 - buf_pool_mutex_exit(buf_pool);
1509 + //buf_pool_mutex_exit(buf_pool);
1510 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1514 @@ -3597,7 +3750,9 @@
1516 fold = buf_page_address_fold(space, offset);
1518 - buf_pool_mutex_enter(buf_pool);
1519 + //buf_pool_mutex_enter(buf_pool);
1520 + mutex_enter(&buf_pool->LRU_list_mutex);
1521 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1523 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1524 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1525 @@ -3606,9 +3761,15 @@
1528 mutex_enter(&block->mutex);
1529 - buf_LRU_block_free_non_file_page(block);
1530 + mutex_exit(&buf_pool->LRU_list_mutex);
1531 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1532 + buf_LRU_block_free_non_file_page(block, FALSE);
1533 mutex_exit(&block->mutex);
1536 + mutex_exit(&buf_pool->LRU_list_mutex);
1537 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1542 @@ -3631,6 +3792,8 @@
1544 buf_page_init(space, offset, fold, block);
1546 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1548 /* The block must be put to the LRU list, to the old blocks */
1549 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1551 @@ -3658,7 +3821,7 @@
1552 been added to buf_pool->LRU and
1553 buf_pool->page_hash. */
1554 mutex_exit(&block->mutex);
1555 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1556 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1557 mutex_enter(&block->mutex);
1558 block->page.zip.data = data;
1560 @@ -3671,6 +3834,7 @@
1561 buf_unzip_LRU_add_block(block, TRUE);
1564 + mutex_exit(&buf_pool->LRU_list_mutex);
1565 mutex_exit(&block->mutex);
1567 /* Defer buf_buddy_alloc() until after the block has
1568 @@ -3682,8 +3846,8 @@
1569 control block (bpage), in order to avoid the
1570 invocation of buf_buddy_relocate_block() on
1571 uninitialized data. */
1572 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1573 - bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1574 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1575 + bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1577 /* Initialize the buf_pool pointer. */
1578 bpage->buf_pool_index = buf_pool_index(buf_pool);
1579 @@ -3702,8 +3866,11 @@
1581 /* The block was added by some other thread. */
1583 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1584 - buf_buddy_free(buf_pool, data, zip_size);
1585 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1586 + buf_buddy_free(buf_pool, data, zip_size, TRUE);
1588 + mutex_exit(&buf_pool->LRU_list_mutex);
1589 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1593 @@ -3747,18 +3914,24 @@
1594 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1597 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1599 /* The block must be put to the LRU list, to the old blocks */
1600 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1601 buf_LRU_insert_zip_clean(bpage);
1603 + mutex_exit(&buf_pool->LRU_list_mutex);
1605 buf_page_set_io_fix(bpage, BUF_IO_READ);
1607 mutex_exit(&buf_pool->zip_mutex);
1610 + buf_pool_mutex_enter(buf_pool);
1611 buf_pool->n_pend_reads++;
1613 buf_pool_mutex_exit(buf_pool);
1615 + //buf_pool_mutex_exit(buf_pool);
1617 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1619 @@ -3800,7 +3973,9 @@
1621 fold = buf_page_address_fold(space, offset);
1623 - buf_pool_mutex_enter(buf_pool);
1624 + //buf_pool_mutex_enter(buf_pool);
1625 + mutex_enter(&buf_pool->LRU_list_mutex);
1626 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1628 block = (buf_block_t*) buf_page_hash_get_low(
1629 buf_pool, space, offset, fold);
1630 @@ -3816,7 +3991,9 @@
1631 #endif /* UNIV_DEBUG_FILE_ACCESSES */
1633 /* Page can be found in buf_pool */
1634 - buf_pool_mutex_exit(buf_pool);
1635 + //buf_pool_mutex_exit(buf_pool);
1636 + mutex_exit(&buf_pool->LRU_list_mutex);
1637 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1639 buf_block_free(free_block);
1641 @@ -3838,6 +4015,7 @@
1642 mutex_enter(&block->mutex);
1644 buf_page_init(space, offset, fold, block);
1645 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1647 /* The block must be put to the LRU list */
1648 buf_LRU_add_block(&block->page, FALSE);
1649 @@ -3864,7 +4042,7 @@
1650 the reacquisition of buf_pool->mutex. We also must
1651 defer this operation until after the block descriptor
1652 has been added to buf_pool->LRU and buf_pool->page_hash. */
1653 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1654 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1655 mutex_enter(&block->mutex);
1656 block->page.zip.data = data;
1658 @@ -3882,7 +4060,8 @@
1660 buf_page_set_accessed(&block->page, time_ms);
1662 - buf_pool_mutex_exit(buf_pool);
1663 + //buf_pool_mutex_exit(buf_pool);
1664 + mutex_exit(&buf_pool->LRU_list_mutex);
1666 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1668 @@ -3933,6 +4112,8 @@
1669 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1670 const ibool uncompressed = (buf_page_get_state(bpage)
1671 == BUF_BLOCK_FILE_PAGE);
1672 + ibool have_LRU_mutex = FALSE;
1673 + mutex_t* block_mutex;
1675 ut_a(buf_page_in_file(bpage));
1677 @@ -4066,8 +4247,26 @@
1681 + if (io_type == BUF_IO_WRITE
1682 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1683 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1684 + /* to keep consistency at buf_LRU_insert_zip_clean() */
1685 + have_LRU_mutex = TRUE; /* optimistic */
1688 + if (have_LRU_mutex)
1689 + mutex_enter(&buf_pool->LRU_list_mutex);
1690 + block_mutex = buf_page_get_mutex_enter(bpage);
1691 + ut_a(block_mutex);
1692 + if (io_type == BUF_IO_WRITE
1693 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1694 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1695 + && !have_LRU_mutex) {
1696 + mutex_exit(block_mutex);
1697 + have_LRU_mutex = TRUE;
1700 buf_pool_mutex_enter(buf_pool);
1701 - mutex_enter(buf_page_get_mutex(bpage));
1703 #ifdef UNIV_IBUF_COUNT_DEBUG
1704 if (io_type == BUF_IO_WRITE || uncompressed) {
1705 @@ -4090,6 +4289,7 @@
1706 the x-latch to this OS thread: do not let this confuse you in
1709 + ut_a(!have_LRU_mutex);
1710 ut_ad(buf_pool->n_pend_reads > 0);
1711 buf_pool->n_pend_reads--;
1712 buf_pool->stat.n_pages_read++;
1713 @@ -4107,6 +4307,9 @@
1715 buf_flush_write_complete(bpage);
1717 + if (have_LRU_mutex)
1718 + mutex_exit(&buf_pool->LRU_list_mutex);
1721 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1723 @@ -4129,8 +4332,8 @@
1725 #endif /* UNIV_DEBUG */
1727 - mutex_exit(buf_page_get_mutex(bpage));
1728 buf_pool_mutex_exit(buf_pool);
1729 + mutex_exit(block_mutex);
1732 /*********************************************************************//**
1733 @@ -4147,7 +4350,9 @@
1737 - buf_pool_mutex_enter(buf_pool);
1738 + //buf_pool_mutex_enter(buf_pool);
1739 + mutex_enter(&buf_pool->LRU_list_mutex);
1740 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1742 chunk = buf_pool->chunks;
1744 @@ -4164,7 +4369,9 @@
1748 - buf_pool_mutex_exit(buf_pool);
1749 + //buf_pool_mutex_exit(buf_pool);
1750 + mutex_exit(&buf_pool->LRU_list_mutex);
1751 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1755 @@ -4212,7 +4419,8 @@
1756 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1759 - buf_pool_mutex_enter(buf_pool);
1760 + //buf_pool_mutex_enter(buf_pool);
1761 + mutex_enter(&buf_pool->LRU_list_mutex);
1763 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1764 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1765 @@ -4225,7 +4433,8 @@
1766 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1767 buf_refresh_io_stats(buf_pool);
1769 - buf_pool_mutex_exit(buf_pool);
1770 + //buf_pool_mutex_exit(buf_pool);
1771 + mutex_exit(&buf_pool->LRU_list_mutex);
1774 /*********************************************************************//**
1775 @@ -4267,7 +4476,10 @@
1779 - buf_pool_mutex_enter(buf_pool);
1780 + //buf_pool_mutex_enter(buf_pool);
1781 + mutex_enter(&buf_pool->LRU_list_mutex);
1782 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1783 + /* for keep the new latch order, it cannot validate correctly... */
1785 chunk = buf_pool->chunks;
1787 @@ -4362,7 +4574,7 @@
1788 /* Check clean compressed-only blocks. */
1790 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1791 - b = UT_LIST_GET_NEXT(list, b)) {
1792 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1793 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1794 switch (buf_page_get_io_fix(b)) {
1796 @@ -4393,7 +4605,7 @@
1798 buf_flush_list_mutex_enter(buf_pool);
1799 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1800 - b = UT_LIST_GET_NEXT(list, b)) {
1801 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1802 ut_ad(b->in_flush_list);
1803 ut_a(b->oldest_modification);
1805 @@ -4452,6 +4664,8 @@
1808 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1809 + /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1811 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1812 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1813 (ulong) UT_LIST_GET_LEN(buf_pool->free),
1814 @@ -4462,8 +4676,11 @@
1815 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1816 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1817 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1820 - buf_pool_mutex_exit(buf_pool);
1821 + //buf_pool_mutex_exit(buf_pool);
1822 + mutex_exit(&buf_pool->LRU_list_mutex);
1823 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1825 ut_a(buf_LRU_validate());
1826 ut_a(buf_flush_validate(buf_pool));
1827 @@ -4519,7 +4736,9 @@
1828 index_ids = mem_alloc(size * sizeof *index_ids);
1829 counts = mem_alloc(sizeof(ulint) * size);
1831 - buf_pool_mutex_enter(buf_pool);
1832 + //buf_pool_mutex_enter(buf_pool);
1833 + mutex_enter(&buf_pool->LRU_list_mutex);
1834 + mutex_enter(&buf_pool->free_list_mutex);
1835 buf_flush_list_mutex_enter(buf_pool);
1838 @@ -4588,7 +4807,9 @@
1842 - buf_pool_mutex_exit(buf_pool);
1843 + //buf_pool_mutex_exit(buf_pool);
1844 + mutex_exit(&buf_pool->LRU_list_mutex);
1845 + mutex_exit(&buf_pool->free_list_mutex);
1847 for (i = 0; i < n_found; i++) {
1848 index = dict_index_get_if_in_cache(index_ids[i]);
1849 @@ -4645,7 +4866,7 @@
1851 ulint fixed_pages_number = 0;
1853 - buf_pool_mutex_enter(buf_pool);
1854 + //buf_pool_mutex_enter(buf_pool);
1856 chunk = buf_pool->chunks;
1858 @@ -4679,7 +4900,7 @@
1859 /* Traverse the lists of clean and dirty compressed-only blocks. */
1861 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1862 - b = UT_LIST_GET_NEXT(list, b)) {
1863 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1864 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1865 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1867 @@ -4691,7 +4912,7 @@
1869 buf_flush_list_mutex_enter(buf_pool);
1870 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1871 - b = UT_LIST_GET_NEXT(list, b)) {
1872 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1873 ut_ad(b->in_flush_list);
1875 switch (buf_page_get_state(b)) {
1876 @@ -4717,7 +4938,7 @@
1878 buf_flush_list_mutex_exit(buf_pool);
1879 mutex_exit(&buf_pool->zip_mutex);
1880 - buf_pool_mutex_exit(buf_pool);
1881 + //buf_pool_mutex_exit(buf_pool);
1883 return(fixed_pages_number);
1885 @@ -4873,6 +5094,8 @@
1886 /* Find appropriate pool_info to store stats for this buffer pool */
1887 pool_info = &all_pool_info[pool_id];
1889 + mutex_enter(&buf_pool->LRU_list_mutex);
1890 + mutex_enter(&buf_pool->free_list_mutex);
1891 buf_pool_mutex_enter(buf_pool);
1892 buf_flush_list_mutex_enter(buf_pool);
1894 @@ -4983,6 +5206,8 @@
1895 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
1897 buf_refresh_io_stats(buf_pool);
1898 + mutex_exit(&buf_pool->LRU_list_mutex);
1899 + mutex_exit(&buf_pool->free_list_mutex);
1900 buf_pool_mutex_exit(buf_pool);
1903 @@ -5224,11 +5449,13 @@
1907 - buf_pool_mutex_enter(buf_pool);
1908 + //buf_pool_mutex_enter(buf_pool);
1909 + mutex_enter(&buf_pool->free_list_mutex);
1911 len = UT_LIST_GET_LEN(buf_pool->free);
1913 - buf_pool_mutex_exit(buf_pool);
1914 + //buf_pool_mutex_exit(buf_pool);
1915 + mutex_exit(&buf_pool->free_list_mutex);
1919 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1920 --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1921 +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
1924 ut_d(block->page.in_flush_list = TRUE);
1925 block->page.oldest_modification = lsn;
1926 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1927 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1929 #ifdef UNIV_DEBUG_VALGRIND
1931 @@ -401,14 +401,14 @@
1932 > block->page.oldest_modification) {
1933 ut_ad(b->in_flush_list);
1935 - b = UT_LIST_GET_NEXT(list, b);
1936 + b = UT_LIST_GET_NEXT(flush_list, b);
1940 if (prev_b == NULL) {
1941 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1942 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1944 - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1945 + UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1946 prev_b, &block->page);
1950 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1951 //ut_ad(buf_pool_mutex_own(buf_pool));
1953 - //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1954 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1955 //ut_ad(bpage->in_LRU_list);
1957 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1958 @@ -470,14 +470,14 @@
1959 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1962 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1963 - ut_ad(buf_pool_mutex_own(buf_pool));
1964 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1965 + //ut_ad(buf_pool_mutex_own(buf_pool));
1967 - ut_a(buf_page_in_file(bpage));
1968 + //ut_a(buf_page_in_file(bpage));
1969 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1970 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1972 - if (bpage->oldest_modification != 0
1973 + if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1974 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1975 ut_ad(bpage->in_flush_list);
1979 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1981 - ut_ad(buf_pool_mutex_own(buf_pool));
1982 + //ut_ad(buf_pool_mutex_own(buf_pool));
1983 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1984 ut_ad(bpage->in_flush_list);
1986 @@ -526,11 +526,11 @@
1988 case BUF_BLOCK_ZIP_DIRTY:
1989 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1990 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1991 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1992 buf_LRU_insert_zip_clean(bpage);
1994 case BUF_BLOCK_FILE_PAGE:
1995 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1996 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2001 buf_page_t* prev_b = NULL;
2002 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2004 - ut_ad(buf_pool_mutex_own(buf_pool));
2005 + //ut_ad(buf_pool_mutex_own(buf_pool));
2006 /* Must reside in the same buffer pool. */
2007 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2009 @@ -603,18 +603,18 @@
2010 because we assert on in_flush_list in comparison function. */
2011 ut_d(bpage->in_flush_list = FALSE);
2013 - prev = UT_LIST_GET_PREV(list, bpage);
2014 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2015 + prev = UT_LIST_GET_PREV(flush_list, bpage);
2016 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2019 ut_ad(prev->in_flush_list);
2020 UT_LIST_INSERT_AFTER(
2023 buf_pool->flush_list,
2029 buf_pool->flush_list,
2032 @@ -1083,7 +1083,7 @@
2035 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2036 - ut_ad(!buf_pool_mutex_own(buf_pool));
2037 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2040 #ifdef UNIV_LOG_DEBUG
2041 @@ -1097,7 +1097,8 @@
2042 io_fixed and oldest_modification != 0. Thus, it cannot be
2043 relocated in the buffer pool or removed from flush_list or
2045 - ut_ad(!buf_pool_mutex_own(buf_pool));
2046 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2047 + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2048 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2049 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2050 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2051 @@ -1260,12 +1261,18 @@
2052 ibool is_uncompressed;
2054 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2055 - ut_ad(buf_pool_mutex_own(buf_pool));
2056 + //ut_ad(buf_pool_mutex_own(buf_pool));
2057 +#ifdef UNIV_SYNC_DEBUG
2058 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2060 ut_ad(buf_page_in_file(bpage));
2062 block_mutex = buf_page_get_mutex(bpage);
2063 ut_ad(mutex_own(block_mutex));
2065 + buf_pool_mutex_enter(buf_pool);
2066 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2068 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2070 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2071 @@ -1427,14 +1434,16 @@
2073 buf_pool = buf_pool_get(space, i);
2075 - buf_pool_mutex_enter(buf_pool);
2076 + //buf_pool_mutex_enter(buf_pool);
2077 + rw_lock_s_lock(&buf_pool->page_hash_latch);
2079 /* We only want to flush pages from this buffer pool. */
2080 bpage = buf_page_hash_get(buf_pool, space, i);
2084 - buf_pool_mutex_exit(buf_pool);
2085 + //buf_pool_mutex_exit(buf_pool);
2086 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2090 @@ -1446,11 +1455,9 @@
2091 if (flush_type != BUF_FLUSH_LRU
2093 || buf_page_is_old(bpage)) {
2094 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2096 - mutex_enter(block_mutex);
2097 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2099 - if (buf_flush_ready_for_flush(bpage, flush_type)
2100 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2101 && (i == offset || !bpage->buf_fix_count)) {
2102 /* We only try to flush those
2103 neighbors != offset where the buf fix
2104 @@ -1466,11 +1473,12 @@
2105 ut_ad(!buf_pool_mutex_own(buf_pool));
2109 + } else if (block_mutex) {
2110 mutex_exit(block_mutex);
2113 - buf_pool_mutex_exit(buf_pool);
2114 + //buf_pool_mutex_exit(buf_pool);
2115 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2119 @@ -1503,21 +1511,25 @@
2120 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2121 #endif /* UNIV_DEBUG */
2123 - ut_ad(buf_pool_mutex_own(buf_pool));
2124 + //ut_ad(buf_pool_mutex_own(buf_pool));
2125 + ut_ad(flush_type != BUF_FLUSH_LRU
2126 + || mutex_own(&buf_pool->LRU_list_mutex));
2128 - block_mutex = buf_page_get_mutex(bpage);
2129 - mutex_enter(block_mutex);
2130 + block_mutex = buf_page_get_mutex_enter(bpage);
2132 - ut_a(buf_page_in_file(bpage));
2133 + //ut_a(buf_page_in_file(bpage));
2135 - if (buf_flush_ready_for_flush(bpage, flush_type)) {
2136 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2139 buf_pool_t* buf_pool;
2141 buf_pool = buf_pool_from_bpage(bpage);
2143 - buf_pool_mutex_exit(buf_pool);
2144 + //buf_pool_mutex_exit(buf_pool);
2145 + if (flush_type == BUF_FLUSH_LRU) {
2146 + mutex_exit(&buf_pool->LRU_list_mutex);
2149 /* These fields are protected by both the
2150 buffer pool mutex and block mutex. */
2151 @@ -1533,13 +1545,18 @@
2155 - buf_pool_mutex_enter(buf_pool);
2156 + //buf_pool_mutex_enter(buf_pool);
2157 + if (flush_type == BUF_FLUSH_LRU) {
2158 + mutex_enter(&buf_pool->LRU_list_mutex);
2162 + } else if (block_mutex) {
2163 mutex_exit(block_mutex);
2166 - ut_ad(buf_pool_mutex_own(buf_pool));
2167 + //ut_ad(buf_pool_mutex_own(buf_pool));
2168 + ut_ad(flush_type != BUF_FLUSH_LRU
2169 + || mutex_own(&buf_pool->LRU_list_mutex));
2173 @@ -1560,7 +1577,8 @@
2177 - ut_ad(buf_pool_mutex_own(buf_pool));
2178 + //ut_ad(buf_pool_mutex_own(buf_pool));
2179 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2182 /* Start from the end of the list looking for a
2183 @@ -1582,7 +1600,8 @@
2184 should be flushed, we factor in this value. */
2185 buf_lru_flush_page_count += count;
2187 - ut_ad(buf_pool_mutex_own(buf_pool));
2188 + //ut_ad(buf_pool_mutex_own(buf_pool));
2189 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2193 @@ -1610,9 +1629,10 @@
2197 + buf_page_t* prev_bpage = NULL;
2200 - ut_ad(buf_pool_mutex_own(buf_pool));
2201 + //ut_ad(buf_pool_mutex_own(buf_pool));
2203 /* If we have flushed enough, leave the loop */
2205 @@ -1631,6 +1651,7 @@
2208 ut_a(bpage->oldest_modification > 0);
2209 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2212 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2213 @@ -1672,9 +1693,17 @@
2217 - bpage = UT_LIST_GET_PREV(list, bpage);
2218 + bpage = UT_LIST_GET_PREV(flush_list, bpage);
2220 - ut_ad(!bpage || bpage->in_flush_list);
2221 + //ut_ad(!bpage || bpage->in_flush_list);
2222 + if (bpage != prev_bpage) {
2223 + /* the search might warp.. retrying */
2224 + buf_flush_list_mutex_exit(buf_pool);
2228 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2231 buf_flush_list_mutex_exit(buf_pool);
2233 @@ -1683,7 +1712,7 @@
2235 } while (count < min_n && bpage != NULL && len > 0);
2237 - ut_ad(buf_pool_mutex_own(buf_pool));
2238 + //ut_ad(buf_pool_mutex_own(buf_pool));
2242 @@ -1722,13 +1751,15 @@
2243 || sync_thread_levels_empty_gen(TRUE));
2244 #endif /* UNIV_SYNC_DEBUG */
2246 - buf_pool_mutex_enter(buf_pool);
2247 + //buf_pool_mutex_enter(buf_pool);
2249 /* Note: The buffer pool mutex is released and reacquired within
2250 the flush functions. */
2251 switch(flush_type) {
2253 + mutex_enter(&buf_pool->LRU_list_mutex);
2254 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2255 + mutex_exit(&buf_pool->LRU_list_mutex);
2257 case BUF_FLUSH_LIST:
2258 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2259 @@ -1737,7 +1768,7 @@
2263 - buf_pool_mutex_exit(buf_pool);
2264 + //buf_pool_mutex_exit(buf_pool);
2266 buf_flush_buffered_writes();
2268 @@ -1993,7 +2024,7 @@
2270 //buf_pool_mutex_enter(buf_pool);
2272 - buf_pool_mutex_enter(buf_pool);
2273 + mutex_enter(&buf_pool->LRU_list_mutex);
2275 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2277 @@ -2010,15 +2041,15 @@
2278 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2281 - block_mutex = buf_page_get_mutex(bpage);
2283 - mutex_enter(block_mutex);
2284 + block_mutex = buf_page_get_mutex_enter(bpage);
2286 - if (buf_flush_ready_for_replace(bpage)) {
2287 + if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2291 - mutex_exit(block_mutex);
2292 + if (block_mutex) {
2293 + mutex_exit(block_mutex);
2298 @@ -2027,7 +2058,7 @@
2300 //buf_pool_mutex_exit(buf_pool);
2302 - buf_pool_mutex_exit(buf_pool);
2303 + mutex_exit(&buf_pool->LRU_list_mutex);
2305 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2307 @@ -2226,7 +2257,7 @@
2309 ut_ad(buf_flush_list_mutex_own(buf_pool));
2311 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2312 + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2313 ut_ad(ut_list_node_313->in_flush_list));
2315 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2316 @@ -2266,7 +2297,7 @@
2317 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2320 - bpage = UT_LIST_GET_NEXT(list, bpage);
2321 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2323 ut_a(!bpage || om >= bpage->oldest_modification);
2325 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2326 --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2327 +++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2330 buf_LRU_block_free_hashed_page(
2331 /*===========================*/
2332 - buf_block_t* block); /*!< in: block, must contain a file page and
2333 + buf_block_t* block, /*!< in: block, must contain a file page and
2334 be in a state where it can be freed */
2335 + ibool have_page_hash_mutex);
2337 /******************************************************************//**
2338 Determines if the unzip_LRU list should be used for evicting a victim
2339 @@ -154,15 +155,20 @@
2341 buf_LRU_evict_from_unzip_LRU(
2342 /*=========================*/
2343 - buf_pool_t* buf_pool)
2344 + buf_pool_t* buf_pool,
2345 + ibool have_LRU_mutex)
2350 - ut_ad(buf_pool_mutex_own(buf_pool));
2351 + //ut_ad(buf_pool_mutex_own(buf_pool));
2353 + if (!have_LRU_mutex)
2354 + mutex_enter(&buf_pool->LRU_list_mutex);
2355 /* If the unzip_LRU list is empty, we can only use the LRU. */
2356 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2357 + if (!have_LRU_mutex)
2358 + mutex_exit(&buf_pool->LRU_list_mutex);
2362 @@ -171,14 +177,20 @@
2363 decompressed pages in the buffer pool. */
2364 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2365 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2366 + if (!have_LRU_mutex)
2367 + mutex_exit(&buf_pool->LRU_list_mutex);
2371 /* If eviction hasn't started yet, we assume by default
2372 that a workload is disk bound. */
2373 if (buf_pool->freed_page_clock == 0) {
2374 + if (!have_LRU_mutex)
2375 + mutex_exit(&buf_pool->LRU_list_mutex);
2378 + if (!have_LRU_mutex)
2379 + mutex_exit(&buf_pool->LRU_list_mutex);
2381 /* Calculate the average over past intervals, and add the values
2382 of the current interval. */
2383 @@ -246,19 +258,23 @@
2384 page_arr = ut_malloc(
2385 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2387 - buf_pool_mutex_enter(buf_pool);
2388 + //buf_pool_mutex_enter(buf_pool);
2389 + mutex_enter(&buf_pool->LRU_list_mutex);
2393 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2395 while (bpage != NULL) {
2396 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2397 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2398 buf_page_t* prev_bpage;
2400 - mutex_enter(block_mutex);
2401 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2403 + if (!block_mutex) {
2407 ut_a(buf_page_in_file(bpage));
2409 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2410 @@ -287,14 +303,16 @@
2412 /* Array full. We release the buf_pool->mutex to
2413 obey the latching order. */
2414 - buf_pool_mutex_exit(buf_pool);
2415 + //buf_pool_mutex_exit(buf_pool);
2416 + mutex_exit(&buf_pool->LRU_list_mutex);
2418 buf_LRU_drop_page_hash_batch(
2419 id, zip_size, page_arr, num_entries);
2423 - buf_pool_mutex_enter(buf_pool);
2424 + //buf_pool_mutex_enter(buf_pool);
2425 + mutex_enter(&buf_pool->LRU_list_mutex);
2427 mutex_exit(block_mutex);
2433 - buf_pool_mutex_exit(buf_pool);
2434 + //buf_pool_mutex_exit(buf_pool);
2435 + mutex_exit(&buf_pool->LRU_list_mutex);
2437 /* Drop any remaining batch of search hashed pages. */
2438 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2443 - buf_pool_mutex_enter(buf_pool);
2444 + //buf_pool_mutex_enter(buf_pool);
2445 + mutex_enter(&buf_pool->LRU_list_mutex);
2446 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2450 @@ -369,8 +390,16 @@
2454 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2455 - mutex_enter(block_mutex);
2456 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2458 + if (!block_mutex) {
2459 + /* It may be impossible case...
2460 + Something wrong, so will be scan_again */
2462 + all_freed = FALSE;
2464 + goto next_page_no_mutex;
2467 if (bpage->buf_fix_count > 0) {
2473 - buf_pool_mutex_exit(buf_pool);
2474 + //buf_pool_mutex_exit(buf_pool);
2475 + mutex_exit(&buf_pool->LRU_list_mutex);
2476 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2478 zip_size = buf_page_get_zip_size(bpage);
2479 page_no = buf_page_get_page_no(bpage);
2481 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2482 != BUF_BLOCK_ZIP_FREE) {
2483 buf_LRU_block_free_hashed_page((buf_block_t*)
2487 /* The block_mutex should have been
2488 released by buf_LRU_block_remove_hashed_page()
2493 - buf_pool_mutex_exit(buf_pool);
2494 + //buf_pool_mutex_exit(buf_pool);
2495 + mutex_exit(&buf_pool->LRU_list_mutex);
2496 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2499 os_thread_sleep(20000);
2502 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2504 - ut_ad(buf_pool_mutex_own(buf_pool));
2505 + //ut_ad(buf_pool_mutex_own(buf_pool));
2506 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2507 + ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2508 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2510 /* Find the first successor of bpage in the LRU list
2511 @@ -540,17 +575,17 @@
2514 b = UT_LIST_GET_NEXT(LRU, b);
2515 - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2516 + } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2518 /* Insert bpage before b, i.e., after the predecessor of b. */
2520 - b = UT_LIST_GET_PREV(list, b);
2521 + b = UT_LIST_GET_PREV(zip_list, b);
2525 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2526 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2528 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2529 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2533 @@ -563,18 +598,19 @@
2534 buf_LRU_free_from_unzip_LRU_list(
2535 /*=============================*/
2536 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2537 - ulint n_iterations) /*!< in: how many times this has
2538 + ulint n_iterations, /*!< in: how many times this has
2539 been called repeatedly without
2540 result: a high value means that
2541 we should search farther; we will
2542 search n_iterations / 5 of the
2543 unzip_LRU list, or nothing if
2544 n_iterations >= 5 */
2545 + ibool have_LRU_mutex)
2550 - ut_ad(buf_pool_mutex_own(buf_pool));
2551 + //ut_ad(buf_pool_mutex_own(buf_pool));
2553 /* Theoratically it should be much easier to find a victim
2554 from unzip_LRU as we can choose even a dirty block (as we'll
2556 if we have done five iterations so far. */
2558 if (UNIV_UNLIKELY(n_iterations >= 5)
2559 - || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2560 + || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2564 @@ -592,18 +628,25 @@
2565 distance = 100 + (n_iterations
2566 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2569 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2570 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2571 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2573 enum buf_lru_free_block_status freed;
2575 + mutex_enter(&block->mutex);
2576 + if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2577 + || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2578 + mutex_exit(&block->mutex);
2582 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2583 ut_ad(block->in_unzip_LRU_list);
2584 ut_ad(block->page.in_LRU_list);
2586 - mutex_enter(&block->mutex);
2587 - freed = buf_LRU_free_block(&block->page, FALSE, NULL);
2588 + freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
2589 mutex_exit(&block->mutex);
2592 @@ -637,21 +680,23 @@
2593 buf_LRU_free_from_common_LRU_list(
2594 /*==============================*/
2595 buf_pool_t* buf_pool,
2596 - ulint n_iterations)
2597 + ulint n_iterations,
2598 /*!< in: how many times this has been called
2599 repeatedly without result: a high value means
2600 that we should search farther; if
2601 n_iterations < 10, then we search
2602 n_iterations / 10 * buf_pool->curr_size
2603 pages from the end of the LRU list */
2604 + ibool have_LRU_mutex)
2609 - ut_ad(buf_pool_mutex_own(buf_pool));
2610 + //ut_ad(buf_pool_mutex_own(buf_pool));
2612 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2615 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2616 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2617 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2618 @@ -659,14 +704,23 @@
2619 enum buf_lru_free_block_status freed;
2621 mutex_t* block_mutex
2622 - = buf_page_get_mutex(bpage);
2623 + = buf_page_get_mutex_enter(bpage);
2625 + if (!block_mutex) {
2629 + if (!bpage->in_LRU_list
2630 + || !buf_page_in_file(bpage)) {
2631 + mutex_exit(block_mutex);
2635 ut_ad(buf_page_in_file(bpage));
2636 ut_ad(bpage->in_LRU_list);
2638 - mutex_enter(block_mutex);
2639 accessed = buf_page_is_accessed(bpage);
2640 - freed = buf_LRU_free_block(bpage, TRUE, NULL);
2641 + freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
2642 mutex_exit(block_mutex);
2645 @@ -718,16 +772,23 @@
2646 n_iterations / 5 of the unzip_LRU list. */
2648 ibool freed = FALSE;
2649 + ibool have_LRU_mutex = FALSE;
2651 - buf_pool_mutex_enter(buf_pool);
2652 + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2653 + have_LRU_mutex = TRUE;
2655 + //buf_pool_mutex_enter(buf_pool);
2656 + if (have_LRU_mutex)
2657 + mutex_enter(&buf_pool->LRU_list_mutex);
2659 - freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2660 + freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2663 freed = buf_LRU_free_from_common_LRU_list(
2664 - buf_pool, n_iterations);
2665 + buf_pool, n_iterations, have_LRU_mutex);
2668 + buf_pool_mutex_enter(buf_pool);
2670 buf_pool->LRU_flush_ended = 0;
2671 } else if (buf_pool->LRU_flush_ended > 0) {
2675 buf_pool_mutex_exit(buf_pool);
2676 + if (have_LRU_mutex)
2677 + mutex_exit(&buf_pool->LRU_list_mutex);
2683 buf_pool = buf_pool_from_array(i);
2685 - buf_pool_mutex_enter(buf_pool);
2686 + //buf_pool_mutex_enter(buf_pool);
2687 + mutex_enter(&buf_pool->LRU_list_mutex);
2688 + mutex_enter(&buf_pool->free_list_mutex);
2690 if (!recv_recovery_on
2691 && UT_LIST_GET_LEN(buf_pool->free)
2696 - buf_pool_mutex_exit(buf_pool);
2697 + //buf_pool_mutex_exit(buf_pool);
2698 + mutex_exit(&buf_pool->LRU_list_mutex);
2699 + mutex_exit(&buf_pool->free_list_mutex);
2703 @@ -823,9 +890,10 @@
2707 - ut_ad(buf_pool_mutex_own(buf_pool));
2708 + //ut_ad(buf_pool_mutex_own(buf_pool));
2710 - block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2711 + mutex_enter(&buf_pool->free_list_mutex);
2712 + block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2717 ut_ad(!block->page.in_flush_list);
2718 ut_ad(!block->page.in_LRU_list);
2719 ut_a(!buf_page_in_file(&block->page));
2720 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2721 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2723 + mutex_exit(&buf_pool->free_list_mutex);
2725 mutex_enter(&block->mutex);
2728 ut_ad(buf_pool_from_block(block) == buf_pool);
2730 mutex_exit(&block->mutex);
2732 + mutex_exit(&buf_pool->free_list_mutex);
2737 ibool mon_value_was = FALSE;
2738 ibool started_monitor = FALSE;
2740 - buf_pool_mutex_enter(buf_pool);
2741 + //buf_pool_mutex_enter(buf_pool);
2743 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2744 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2745 @@ -951,8 +1023,10 @@
2747 page_zip_set_size(&block->page.zip, zip_size);
2749 + mutex_enter(&buf_pool->LRU_list_mutex);
2750 block->page.zip.data = buf_buddy_alloc(
2751 - buf_pool, zip_size, &lru);
2752 + buf_pool, zip_size, &lru, FALSE);
2753 + mutex_exit(&buf_pool->LRU_list_mutex);
2755 UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
2757 @@ -960,7 +1034,7 @@
2758 block->page.zip.data = NULL;
2761 - buf_pool_mutex_exit(buf_pool);
2762 + //buf_pool_mutex_exit(buf_pool);
2764 if (started_monitor) {
2765 srv_print_innodb_monitor = mon_value_was;
2766 @@ -972,7 +1046,7 @@
2767 /* If no block was in the free list, search from the end of the LRU
2768 list and try to free a block there */
2770 - buf_pool_mutex_exit(buf_pool);
2771 + //buf_pool_mutex_exit(buf_pool);
2773 freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
2775 @@ -1058,7 +1132,8 @@
2778 ut_a(buf_pool->LRU_old);
2779 - ut_ad(buf_pool_mutex_own(buf_pool));
2780 + //ut_ad(buf_pool_mutex_own(buf_pool));
2781 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2782 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2783 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2784 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2785 @@ -1124,7 +1199,8 @@
2789 - ut_ad(buf_pool_mutex_own(buf_pool));
2790 + //ut_ad(buf_pool_mutex_own(buf_pool));
2791 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2792 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2794 /* We first initialize all blocks in the LRU list as old and then use
2795 @@ -1159,13 +1235,14 @@
2798 ut_ad(buf_page_in_file(bpage));
2799 - ut_ad(buf_pool_mutex_own(buf_pool));
2800 + //ut_ad(buf_pool_mutex_own(buf_pool));
2801 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2803 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2804 buf_block_t* block = (buf_block_t*) bpage;
2806 ut_ad(block->in_unzip_LRU_list);
2807 - ut_d(block->in_unzip_LRU_list = FALSE);
2808 + block->in_unzip_LRU_list = FALSE;
2810 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2812 @@ -1183,7 +1260,8 @@
2816 - ut_ad(buf_pool_mutex_own(buf_pool));
2817 + //ut_ad(buf_pool_mutex_own(buf_pool));
2818 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2820 ut_a(buf_page_in_file(bpage));
2822 @@ -1260,12 +1338,13 @@
2826 - ut_ad(buf_pool_mutex_own(buf_pool));
2827 + //ut_ad(buf_pool_mutex_own(buf_pool));
2828 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2830 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2832 ut_ad(!block->in_unzip_LRU_list);
2833 - ut_d(block->in_unzip_LRU_list = TRUE);
2834 + block->in_unzip_LRU_list = TRUE;
2837 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2838 @@ -1286,7 +1365,8 @@
2842 - ut_ad(buf_pool_mutex_own(buf_pool));
2843 + //ut_ad(buf_pool_mutex_own(buf_pool));
2844 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2846 ut_a(buf_page_in_file(bpage));
2848 @@ -1337,7 +1417,8 @@
2852 - ut_ad(buf_pool_mutex_own(buf_pool));
2853 + //ut_ad(buf_pool_mutex_own(buf_pool));
2854 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2856 ut_a(buf_page_in_file(bpage));
2857 ut_ad(!bpage->in_LRU_list);
2858 @@ -1416,7 +1497,8 @@
2860 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2862 - ut_ad(buf_pool_mutex_own(buf_pool));
2863 + //ut_ad(buf_pool_mutex_own(buf_pool));
2864 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2867 buf_pool->stat.n_pages_made_young++;
2868 @@ -1458,19 +1540,20 @@
2869 buf_page_t* bpage, /*!< in: block to be freed */
2870 ibool zip, /*!< in: TRUE if should remove also the
2871 compressed page of an uncompressed page */
2872 - ibool* buf_pool_mutex_released)
2873 + ibool* buf_pool_mutex_released,
2874 /*!< in: pointer to a variable that will
2875 be assigned TRUE if buf_pool_mutex
2876 was temporarily released, or NULL */
2877 + ibool have_LRU_mutex)
2879 buf_page_t* b = NULL;
2880 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2881 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2883 - ut_ad(buf_pool_mutex_own(buf_pool));
2884 + //ut_ad(buf_pool_mutex_own(buf_pool));
2885 ut_ad(mutex_own(block_mutex));
2886 ut_ad(buf_page_in_file(bpage));
2887 - ut_ad(bpage->in_LRU_list);
2888 + //ut_ad(bpage->in_LRU_list);
2889 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2890 #if UNIV_WORD_SIZE == 4
2891 /* On 32-bit systems, there is no padding in buf_page_t. On
2892 @@ -1479,7 +1562,7 @@
2893 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2896 - if (!buf_page_can_relocate(bpage)) {
2897 + if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2899 /* Do not free buffer-fixed or I/O-fixed blocks. */
2900 return(BUF_LRU_NOT_FREED);
2901 @@ -1511,15 +1594,15 @@
2902 If it cannot be allocated (without freeing a block
2903 from the LRU list), refuse to free bpage. */
2905 - buf_pool_mutex_exit_forbid(buf_pool);
2906 - b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2907 - buf_pool_mutex_exit_allow(buf_pool);
2908 + //buf_pool_mutex_exit_forbid(buf_pool);
2909 + b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2910 + //buf_pool_mutex_exit_allow(buf_pool);
2912 if (UNIV_UNLIKELY(!b)) {
2913 return(BUF_LRU_CANNOT_RELOCATE);
2916 - memcpy(b, bpage, sizeof *b);
2917 + //memcpy(b, bpage, sizeof *b);
2921 @@ -1530,6 +1613,39 @@
2923 #endif /* UNIV_DEBUG */
2925 + /* not to break latch order, must re-enter block_mutex */
2926 + mutex_exit(block_mutex);
2928 + if (!have_LRU_mutex)
2929 + mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2930 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2931 + mutex_enter(block_mutex);
2933 + /* recheck states of block */
2934 + if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2935 + || !buf_page_can_relocate(bpage)) {
2938 + buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2940 + if (!have_LRU_mutex)
2941 + mutex_exit(&buf_pool->LRU_list_mutex);
2942 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2943 + return(BUF_LRU_NOT_FREED);
2944 + } else if (zip || !bpage->zip.data) {
2945 + if (bpage->oldest_modification)
2947 + } else if (bpage->oldest_modification) {
2948 + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2949 + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2955 + memcpy(b, bpage, sizeof *b);
2958 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2959 != BUF_BLOCK_ZIP_FREE) {
2960 ut_a(bpage->buf_fix_count == 0);
2961 @@ -1546,6 +1662,10 @@
2965 + while (prev_b && !prev_b->in_LRU_list) {
2966 + prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2969 b->state = b->oldest_modification
2970 ? BUF_BLOCK_ZIP_DIRTY
2971 : BUF_BLOCK_ZIP_PAGE;
2972 @@ -1642,7 +1762,9 @@
2973 *buf_pool_mutex_released = TRUE;
2976 - buf_pool_mutex_exit(buf_pool);
2977 + //buf_pool_mutex_exit(buf_pool);
2978 + mutex_exit(&buf_pool->LRU_list_mutex);
2979 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2980 mutex_exit(block_mutex);
2982 /* Remove possible adaptive hash index on the page.
2983 @@ -1674,7 +1796,9 @@
2984 : BUF_NO_CHECKSUM_MAGIC);
2987 - buf_pool_mutex_enter(buf_pool);
2988 + //buf_pool_mutex_enter(buf_pool);
2989 + if (have_LRU_mutex)
2990 + mutex_enter(&buf_pool->LRU_list_mutex);
2991 mutex_enter(block_mutex);
2994 @@ -1684,13 +1808,17 @@
2995 mutex_exit(&buf_pool->zip_mutex);
2998 - buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2999 + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
3001 /* The block_mutex should have been released by
3002 buf_LRU_block_remove_hashed_page() when it returns
3003 BUF_BLOCK_ZIP_FREE. */
3004 ut_ad(block_mutex == &buf_pool->zip_mutex);
3005 mutex_enter(block_mutex);
3007 + if (!have_LRU_mutex)
3008 + mutex_exit(&buf_pool->LRU_list_mutex);
3009 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3012 return(BUF_LRU_FREED);
3013 @@ -1702,13 +1830,14 @@
3015 buf_LRU_block_free_non_file_page(
3016 /*=============================*/
3017 - buf_block_t* block) /*!< in: block, must not contain a file page */
3018 + buf_block_t* block, /*!< in: block, must not contain a file page */
3019 + ibool have_page_hash_mutex)
3022 buf_pool_t* buf_pool = buf_pool_from_block(block);
3025 - ut_ad(buf_pool_mutex_own(buf_pool));
3026 + //ut_ad(buf_pool_mutex_own(buf_pool));
3027 ut_ad(mutex_own(&block->mutex));
3029 switch (buf_block_get_state(block)) {
3030 @@ -1742,18 +1871,21 @@
3032 block->page.zip.data = NULL;
3033 mutex_exit(&block->mutex);
3034 - buf_pool_mutex_exit_forbid(buf_pool);
3035 + //buf_pool_mutex_exit_forbid(buf_pool);
3038 - buf_pool, data, page_zip_get_size(&block->page.zip));
3039 + buf_pool, data, page_zip_get_size(&block->page.zip),
3040 + have_page_hash_mutex);
3042 - buf_pool_mutex_exit_allow(buf_pool);
3043 + //buf_pool_mutex_exit_allow(buf_pool);
3044 mutex_enter(&block->mutex);
3045 page_zip_set_size(&block->page.zip, 0);
3048 - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3049 + mutex_enter(&buf_pool->free_list_mutex);
3050 + UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3051 ut_d(block->page.in_free_list = TRUE);
3052 + mutex_exit(&buf_pool->free_list_mutex);
3054 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3056 @@ -1783,7 +1915,11 @@
3057 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3060 - ut_ad(buf_pool_mutex_own(buf_pool));
3061 + //ut_ad(buf_pool_mutex_own(buf_pool));
3062 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3063 +#ifdef UNIV_SYNC_DEBUG
3064 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3066 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3068 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3069 @@ -1891,7 +2027,9 @@
3071 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3072 mutex_exit(buf_page_get_mutex(bpage));
3073 - buf_pool_mutex_exit(buf_pool);
3074 + //buf_pool_mutex_exit(buf_pool);
3075 + mutex_exit(&buf_pool->LRU_list_mutex);
3076 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3080 @@ -1912,17 +2050,17 @@
3081 ut_a(bpage->zip.data);
3082 ut_a(buf_page_get_zip_size(bpage));
3084 - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3085 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3087 mutex_exit(&buf_pool->zip_mutex);
3088 - buf_pool_mutex_exit_forbid(buf_pool);
3089 + //buf_pool_mutex_exit_forbid(buf_pool);
3092 buf_pool, bpage->zip.data,
3093 - page_zip_get_size(&bpage->zip));
3094 + page_zip_get_size(&bpage->zip), TRUE);
3096 - buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3097 - buf_pool_mutex_exit_allow(buf_pool);
3098 + buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3099 + //buf_pool_mutex_exit_allow(buf_pool);
3101 UNIV_MEM_UNDESC(bpage);
3102 return(BUF_BLOCK_ZIP_FREE);
3103 @@ -1945,13 +2083,13 @@
3104 ut_ad(!bpage->in_flush_list);
3105 ut_ad(!bpage->in_LRU_list);
3106 mutex_exit(&((buf_block_t*) bpage)->mutex);
3107 - buf_pool_mutex_exit_forbid(buf_pool);
3108 + //buf_pool_mutex_exit_forbid(buf_pool);
3112 - page_zip_get_size(&bpage->zip));
3113 + page_zip_get_size(&bpage->zip), TRUE);
3115 - buf_pool_mutex_exit_allow(buf_pool);
3116 + //buf_pool_mutex_exit_allow(buf_pool);
3117 mutex_enter(&((buf_block_t*) bpage)->mutex);
3118 page_zip_set_size(&bpage->zip, 0);
3120 @@ -1977,18 +2115,19 @@
3122 buf_LRU_block_free_hashed_page(
3123 /*===========================*/
3124 - buf_block_t* block) /*!< in: block, must contain a file page and
3125 + buf_block_t* block, /*!< in: block, must contain a file page and
3126 be in a state where it can be freed */
3127 + ibool have_page_hash_mutex)
3130 - buf_pool_t* buf_pool = buf_pool_from_block(block);
3131 - ut_ad(buf_pool_mutex_own(buf_pool));
3132 + //buf_pool_t* buf_pool = buf_pool_from_block(block);
3133 + //ut_ad(buf_pool_mutex_own(buf_pool));
3135 ut_ad(mutex_own(&block->mutex));
3137 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3139 - buf_LRU_block_free_non_file_page(block);
3140 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3143 /**********************************************************************//**
3144 @@ -2015,7 +2154,8 @@
3148 - buf_pool_mutex_enter(buf_pool);
3149 + //buf_pool_mutex_enter(buf_pool);
3150 + mutex_enter(&buf_pool->LRU_list_mutex);
3152 if (ratio != buf_pool->LRU_old_ratio) {
3153 buf_pool->LRU_old_ratio = ratio;
3154 @@ -2027,7 +2167,8 @@
3158 - buf_pool_mutex_exit(buf_pool);
3159 + //buf_pool_mutex_exit(buf_pool);
3160 + mutex_exit(&buf_pool->LRU_list_mutex);
3162 buf_pool->LRU_old_ratio = ratio;
3164 @@ -2132,7 +2273,8 @@
3168 - buf_pool_mutex_enter(buf_pool);
3169 + //buf_pool_mutex_enter(buf_pool);
3170 + mutex_enter(&buf_pool->LRU_list_mutex);
3172 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3174 @@ -2193,16 +2335,22 @@
3176 ut_a(buf_pool->LRU_old_len == old_len);
3178 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3179 + mutex_exit(&buf_pool->LRU_list_mutex);
3180 + mutex_enter(&buf_pool->free_list_mutex);
3182 + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3183 ut_ad(ut_list_node_313->in_free_list));
3185 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3187 - bpage = UT_LIST_GET_NEXT(list, bpage)) {
3188 + bpage = UT_LIST_GET_NEXT(free, bpage)) {
3190 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3193 + mutex_exit(&buf_pool->free_list_mutex);
3194 + mutex_enter(&buf_pool->LRU_list_mutex);
3196 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3197 ut_ad(ut_list_node_313->in_unzip_LRU_list
3198 && ut_list_node_313->page.in_LRU_list));
3199 @@ -2216,7 +2364,8 @@
3200 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3203 - buf_pool_mutex_exit(buf_pool);
3204 + //buf_pool_mutex_exit(buf_pool);
3205 + mutex_exit(&buf_pool->LRU_list_mutex);
3208 /**********************************************************************//**
3209 @@ -2252,7 +2401,8 @@
3210 const buf_page_t* bpage;
3213 - buf_pool_mutex_enter(buf_pool);
3214 + //buf_pool_mutex_enter(buf_pool);
3215 + mutex_enter(&buf_pool->LRU_list_mutex);
3217 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3219 @@ -2309,7 +2459,8 @@
3220 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3223 - buf_pool_mutex_exit(buf_pool);
3224 + //buf_pool_mutex_exit(buf_pool);
3225 + mutex_exit(&buf_pool->LRU_list_mutex);
3228 /**********************************************************************//**
3229 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3230 --- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3231 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3236 + buf_pool_mutex_exit(buf_pool);
3238 /* Check that almost all pages in the area have been accessed; if
3239 offset == low, the accesses must be in a descending order, otherwise,
3244 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3245 for (i = low; i < high; i++) {
3246 bpage = buf_page_hash_get(buf_pool, space, i);
3250 if (fail_count > threshold) {
3251 /* Too many failures: return */
3252 - buf_pool_mutex_exit(buf_pool);
3253 + //buf_pool_mutex_exit(buf_pool);
3254 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3259 bpage = buf_page_hash_get(buf_pool, space, offset);
3261 if (bpage == NULL) {
3262 - buf_pool_mutex_exit(buf_pool);
3263 + //buf_pool_mutex_exit(buf_pool);
3264 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3269 pred_offset = fil_page_get_prev(frame);
3270 succ_offset = fil_page_get_next(frame);
3272 - buf_pool_mutex_exit(buf_pool);
3273 + //buf_pool_mutex_exit(buf_pool);
3274 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3276 if ((offset == low) && (succ_offset == offset + 1)) {
3278 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3279 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3280 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
3281 @@ -245,6 +245,10 @@
3282 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3283 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3284 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3285 + {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3286 + {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3287 + {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3288 + {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3289 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3290 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3291 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3293 {&archive_lock_key, "archive_lock", 0},
3294 # endif /* UNIV_LOG_ARCHIVE */
3295 {&btr_search_latch_key, "btr_search_latch", 0},
3296 + {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3297 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3298 {&buf_block_lock_key, "buf_block_lock", 0},
3299 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3300 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3301 --- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3302 +++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
3303 @@ -1565,7 +1565,8 @@
3305 buf_pool = buf_pool_from_array(i);
3307 - buf_pool_mutex_enter(buf_pool);
3308 + //buf_pool_mutex_enter(buf_pool);
3309 + mutex_enter(&buf_pool->zip_free_mutex);
3311 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3312 buf_buddy_stat_t* buddy_stat;
3313 @@ -1595,7 +1596,8 @@
3317 - buf_pool_mutex_exit(buf_pool);
3318 + //buf_pool_mutex_exit(buf_pool);
3319 + mutex_exit(&buf_pool->zip_free_mutex);
3323 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3324 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3325 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3326 @@ -3783,9 +3783,11 @@
3327 ulint fold = buf_page_address_fold(space, page_no);
3328 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3330 - buf_pool_mutex_enter(buf_pool);
3331 + //buf_pool_mutex_enter(buf_pool);
3332 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3333 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3334 - buf_pool_mutex_exit(buf_pool);
3335 + //buf_pool_mutex_exit(buf_pool);
3336 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3338 if (UNIV_LIKELY_NULL(bpage)) {
3339 /* A buffer pool watch has been set or the
3340 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3341 --- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3342 +++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3344 buf_pool_t* buf_pool,
3345 /*!< buffer pool in which the block resides */
3346 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3347 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3348 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3349 TRUE if storage was allocated from the LRU list
3350 and buf_pool->mutex was temporarily released,
3351 or NULL if the LRU list should not be used */
3352 + ibool have_page_hash_mutex)
3353 __attribute__((malloc));
3355 /**********************************************************************//**
3357 /*!< buffer pool in which the block resides */
3358 void* buf, /*!< in: block to be freed, must not be
3359 pointed to by the buffer pool */
3360 - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3361 + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3362 + ibool have_page_hash_mutex)
3363 __attribute__((nonnull));
3366 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3367 --- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3368 +++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3370 /*!< in: buffer pool in which the page resides */
3371 ulint i, /*!< in: index of buf_pool->zip_free[],
3372 or BUF_BUDDY_SIZES */
3373 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3374 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3375 TRUE if storage was allocated from the LRU list
3376 and buf_pool->mutex was temporarily released,
3377 or NULL if the LRU list should not be used */
3378 + ibool have_page_hash_mutex)
3379 __attribute__((malloc));
3381 /**********************************************************************//**
3383 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3384 void* buf, /*!< in: block to be freed, must not be
3385 pointed to by the buffer pool */
3386 - ulint i) /*!< in: index of buf_pool->zip_free[],
3387 + ulint i, /*!< in: index of buf_pool->zip_free[],
3388 or BUF_BUDDY_SIZES */
3389 + ibool have_page_hash_mutex)
3390 __attribute__((nonnull));
3392 /**********************************************************************//**
3393 @@ -102,16 +104,17 @@
3395 ulint size, /*!< in: block size, up to
3397 - ibool* lru) /*!< in: pointer to a variable
3398 + ibool* lru, /*!< in: pointer to a variable
3399 that will be assigned TRUE if
3400 storage was allocated from the
3401 LRU list and buf_pool->mutex was
3402 temporarily released, or NULL if
3403 the LRU list should not be used */
3404 + ibool have_page_hash_mutex)
3406 - ut_ad(buf_pool_mutex_own(buf_pool));
3407 + //ut_ad(buf_pool_mutex_own(buf_pool));
3409 - return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3410 + return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3413 /**********************************************************************//**
3414 @@ -123,12 +126,25 @@
3415 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3416 void* buf, /*!< in: block to be freed, must not be
3417 pointed to by the buffer pool */
3418 - ulint size) /*!< in: block size, up to
3419 + ulint size, /*!< in: block size, up to
3421 + ibool have_page_hash_mutex)
3423 - ut_ad(buf_pool_mutex_own(buf_pool));
3424 + //ut_ad(buf_pool_mutex_own(buf_pool));
3426 + if (!have_page_hash_mutex) {
3427 + mutex_enter(&buf_pool->LRU_list_mutex);
3428 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3431 - buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3432 + mutex_enter(&buf_pool->zip_free_mutex);
3433 + buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3434 + mutex_exit(&buf_pool->zip_free_mutex);
3436 + if (!have_page_hash_mutex) {
3437 + mutex_exit(&buf_pool->LRU_list_mutex);
3438 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3442 #ifdef UNIV_MATERIALIZE
3443 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3444 --- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3445 +++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
3446 @@ -203,6 +203,20 @@
3447 /*==========================*/
3449 /********************************************************************//**
3453 +buf_pool_page_hash_x_lock_all(void);
3454 +/*================================*/
3456 +/********************************************************************//**
3460 +buf_pool_page_hash_x_unlock_all(void);
3461 +/*==================================*/
3463 +/********************************************************************//**
3464 Creates the buffer pool.
3465 @return own: buf_pool object, NULL if not enough memory or error */
3467 @@ -832,6 +846,15 @@
3468 const buf_page_t* bpage) /*!< in: pointer to control block */
3469 __attribute__((pure));
3471 +/*************************************************************************
3472 +Gets the mutex of a block and enter the mutex with consistency. */
3475 +buf_page_get_mutex_enter(
3476 +/*=========================*/
3477 + const buf_page_t* bpage) /*!< in: pointer to control block */
3478 + __attribute__((pure));
3480 /*********************************************************************//**
3481 Get the flush type of a page.
3482 @return flush type */
3483 @@ -1313,7 +1336,7 @@
3484 All these are protected by buf_pool->mutex. */
3487 - UT_LIST_NODE_T(buf_page_t) list;
3488 + /* UT_LIST_NODE_T(buf_page_t) list; */
3489 /*!< based on state, this is a
3490 list node, protected either by
3491 buf_pool->mutex or by
3492 @@ -1341,6 +1364,10 @@
3493 BUF_BLOCK_REMOVE_HASH or
3494 BUF_BLOCK_READY_IN_USE. */
3496 + /* resplit for optimistic use */
3497 + UT_LIST_NODE_T(buf_page_t) free;
3498 + UT_LIST_NODE_T(buf_page_t) flush_list;
3499 + UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3501 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3502 when buf_pool->flush_list_mutex is
3503 @@ -1433,11 +1460,11 @@
3504 a block is in the unzip_LRU list
3505 if page.state == BUF_BLOCK_FILE_PAGE
3506 and page.zip.data != NULL */
3508 +//#ifdef UNIV_DEBUG
3509 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3510 decompressed LRU list;
3511 used in debugging */
3512 -#endif /* UNIV_DEBUG */
3513 +//#endif /* UNIV_DEBUG */
3514 mutex_t mutex; /*!< mutex protecting this block:
3515 state (also protected by the buffer
3516 pool mutex), io_fix, buf_fix_count,
3517 @@ -1612,6 +1639,11 @@
3518 pool instance, protects compressed
3519 only pages (of type buf_page_t, not
3521 + mutex_t LRU_list_mutex;
3522 + rw_lock_t page_hash_latch;
3523 + mutex_t free_list_mutex;
3524 + mutex_t zip_free_mutex;
3525 + mutex_t zip_hash_mutex;
3526 ulint instance_no; /*!< Array index of this buffer
3528 ulint old_pool_size; /*!< Old pool size in bytes */
3529 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3530 --- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3531 +++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3533 case BUF_BLOCK_ZIP_FREE:
3534 /* This is a free page in buf_pool->zip_free[].
3535 Such pages should only be accessed by the buddy allocator. */
3537 + /* ut_error; */ /* optimistic */
3539 case BUF_BLOCK_ZIP_PAGE:
3540 case BUF_BLOCK_ZIP_DIRTY:
3541 @@ -317,9 +317,14 @@
3543 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3545 + if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3546 + /* TODO: this code is the interim. should be confirmed later. */
3547 + return(&buf_pool->zip_mutex);
3550 switch (buf_page_get_state(bpage)) {
3551 case BUF_BLOCK_ZIP_FREE:
3553 + /* ut_error; */ /* optimistic */
3555 case BUF_BLOCK_ZIP_PAGE:
3556 case BUF_BLOCK_ZIP_DIRTY:
3557 @@ -329,6 +334,28 @@
3561 +/*************************************************************************
3562 +Gets the mutex of a block and enter the mutex with consistency. */
3565 +buf_page_get_mutex_enter(
3566 +/*=========================*/
3567 + const buf_page_t* bpage) /*!< in: pointer to control block */
3569 + mutex_t* block_mutex;
3572 + block_mutex = buf_page_get_mutex(bpage);
3574 + return block_mutex;
3576 + mutex_enter(block_mutex);
3577 + if (block_mutex == buf_page_get_mutex(bpage))
3578 + return block_mutex;
3579 + mutex_exit(block_mutex);
3583 /*********************************************************************//**
3584 Get the flush type of a page.
3585 @return flush type */
3587 enum buf_io_fix io_fix) /*!< in: io_fix state */
3590 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3591 - ut_ad(buf_pool_mutex_own(buf_pool));
3592 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3593 + //ut_ad(buf_pool_mutex_own(buf_pool));
3595 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3597 @@ -456,14 +483,14 @@
3598 const buf_page_t* bpage) /*!< control block being relocated */
3601 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3602 - ut_ad(buf_pool_mutex_own(buf_pool));
3603 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3604 + //ut_ad(buf_pool_mutex_own(buf_pool));
3606 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3607 ut_ad(buf_page_in_file(bpage));
3608 - ut_ad(bpage->in_LRU_list);
3609 + //ut_ad(bpage->in_LRU_list);
3611 - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3612 + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3613 && bpage->buf_fix_count == 0);
3617 const buf_page_t* bpage) /*!< in: control block */
3620 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3621 - ut_ad(buf_pool_mutex_own(buf_pool));
3622 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3623 + //ut_ad(buf_pool_mutex_own(buf_pool));
3625 ut_ad(buf_page_in_file(bpage));
3628 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3629 #endif /* UNIV_DEBUG */
3630 ut_a(buf_page_in_file(bpage));
3631 - ut_ad(buf_pool_mutex_own(buf_pool));
3632 + //ut_ad(buf_pool_mutex_own(buf_pool));
3633 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3634 ut_ad(bpage->in_LRU_list);
3636 #ifdef UNIV_LRU_DEBUG
3637 @@ -545,9 +573,10 @@
3638 ulint time_ms) /*!< in: ut_time_ms() */
3641 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3642 - ut_ad(buf_pool_mutex_own(buf_pool));
3643 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3644 + //ut_ad(buf_pool_mutex_own(buf_pool));
3646 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3647 ut_a(buf_page_in_file(bpage));
3649 if (!bpage->access_time) {
3650 @@ -761,19 +790,19 @@
3652 buf_block_t* block) /*!< in, own: block to be freed */
3654 - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3655 + //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3657 - buf_pool_mutex_enter(buf_pool);
3658 + //buf_pool_mutex_enter(buf_pool);
3660 mutex_enter(&block->mutex);
3662 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3664 - buf_LRU_block_free_non_file_page(block);
3665 + buf_LRU_block_free_non_file_page(block, FALSE);
3667 mutex_exit(&block->mutex);
3669 - buf_pool_mutex_exit(buf_pool);
3670 + //buf_pool_mutex_exit(buf_pool);
3672 #endif /* !UNIV_HOTBACKUP */
3674 @@ -821,17 +850,17 @@
3678 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
3680 - mutex_enter(block_mutex);
3681 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3683 - if (buf_page_in_file(bpage)) {
3684 + if (block_mutex && buf_page_in_file(bpage)) {
3685 lsn = bpage->newest_modification;
3690 - mutex_exit(block_mutex);
3691 + if (block_mutex) {
3692 + mutex_exit(block_mutex);
3698 #ifdef UNIV_SYNC_DEBUG
3699 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3701 - ut_ad((buf_pool_mutex_own(buf_pool)
3702 + ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3703 && (block->page.buf_fix_count == 0))
3704 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3705 #endif /* UNIV_SYNC_DEBUG */
3706 @@ -979,7 +1008,11 @@
3710 - ut_ad(buf_pool_mutex_own(buf_pool));
3711 + //ut_ad(buf_pool_mutex_own(buf_pool));
3712 +#ifdef UNIV_SYNC_DEBUG
3713 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3714 + || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3716 ut_ad(fold == buf_page_address_fold(space, offset));
3718 /* Look for the page in the hash table */
3719 @@ -1064,11 +1097,13 @@
3720 const buf_page_t* bpage;
3721 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3723 - buf_pool_mutex_enter(buf_pool);
3724 + //buf_pool_mutex_enter(buf_pool);
3725 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3727 bpage = buf_page_hash_get(buf_pool, space, offset);
3729 - buf_pool_mutex_exit(buf_pool);
3730 + //buf_pool_mutex_exit(buf_pool);
3731 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3733 return(bpage != NULL);
3735 @@ -1196,4 +1231,38 @@
3736 buf_pool_mutex_exit(buf_pool);
3740 +/********************************************************************//**
3744 +buf_pool_page_hash_x_lock_all(void)
3745 +/*===============================*/
3749 + for (i = 0; i < srv_buf_pool_instances; i++) {
3750 + buf_pool_t* buf_pool;
3752 + buf_pool = buf_pool_from_array(i);
3753 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3757 +/********************************************************************//**
3761 +buf_pool_page_hash_x_unlock_all(void)
3762 +/*=================================*/
3766 + for (i = 0; i < srv_buf_pool_instances; i++) {
3767 + buf_pool_t* buf_pool;
3769 + buf_pool = buf_pool_from_array(i);
3770 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3773 #endif /* !UNIV_HOTBACKUP */
3774 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3775 --- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3776 +++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
3777 @@ -113,10 +113,11 @@
3778 buf_page_t* bpage, /*!< in: block to be freed */
3779 ibool zip, /*!< in: TRUE if should remove also the
3780 compressed page of an uncompressed page */
3781 - ibool* buf_pool_mutex_released);
3782 + ibool* buf_pool_mutex_released,
3783 /*!< in: pointer to a variable that will
3784 be assigned TRUE if buf_pool->mutex
3785 was temporarily released, or NULL */
3786 + ibool have_LRU_mutex);
3787 /******************************************************************//**
3788 Try to free a replaceable block.
3789 @return TRUE if found and freed */
3792 buf_LRU_block_free_non_file_page(
3793 /*=============================*/
3794 - buf_block_t* block); /*!< in: block, must not contain a file page */
3795 + buf_block_t* block, /*!< in: block, must not contain a file page */
3796 + ibool have_page_hash_mutex);
3797 /******************************************************************//**
3798 Adds a block to the LRU list. */
3800 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3801 --- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3802 +++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3804 extern mysql_pfs_key_t archive_lock_key;
3805 # endif /* UNIV_LOG_ARCHIVE */
3806 extern mysql_pfs_key_t btr_search_latch_key;
3807 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3808 extern mysql_pfs_key_t buf_block_lock_key;
3809 # ifdef UNIV_SYNC_DEBUG
3810 extern mysql_pfs_key_t buf_block_debug_latch_key;
3811 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3812 --- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3813 +++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3815 extern mysql_pfs_key_t buffer_block_mutex_key;
3816 extern mysql_pfs_key_t buf_pool_mutex_key;
3817 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3818 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3819 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3820 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3821 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3822 extern mysql_pfs_key_t cache_last_read_mutex_key;
3823 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3824 extern mysql_pfs_key_t dict_sys_mutex_key;
3826 #define SYNC_TRX_LOCK_HEAP 298
3827 #define SYNC_TRX_SYS_HEADER 290
3828 #define SYNC_LOG 170
3829 -#define SYNC_LOG_FLUSH_ORDER 147
3830 +#define SYNC_LOG_FLUSH_ORDER 156
3831 #define SYNC_RECV 168
3832 #define SYNC_WORK_QUEUE 162
3833 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3834 @@ -670,8 +674,13 @@
3835 SYNC_SEARCH_SYS, as memory allocation
3836 can call routines there! Otherwise
3837 the level is SYNC_MEM_HASH. */
3838 +#define SYNC_BUF_LRU_LIST 158
3839 +#define SYNC_BUF_PAGE_HASH 157
3840 +#define SYNC_BUF_BLOCK 155 /* Block mutex */
3841 +#define SYNC_BUF_FREE_LIST 153
3842 +#define SYNC_BUF_ZIP_FREE 152
3843 +#define SYNC_BUF_ZIP_HASH 151
3844 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3845 -#define SYNC_BUF_BLOCK 146 /* Block mutex */
3846 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3847 #define SYNC_DOUBLEWRITE 140
3848 #define SYNC_ANY_LATCH 135
3850 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3851 when atomic operations are not enabled */
3853 - ulint waiters; /*!< This ulint is set to 1 if there are (or
3854 + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3855 may be) threads waiting in the global wait
3856 array for this mutex to be released.
3857 Otherwise, this is 0. */
3858 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3859 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3860 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
3861 @@ -3094,7 +3094,7 @@
3862 level += log_sys->max_checkpoint_age
3863 - (lsn - oldest_modification);
3865 - bpage = UT_LIST_GET_NEXT(list, bpage);
3866 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3870 @@ -3180,7 +3180,7 @@
3874 - bpage = UT_LIST_GET_NEXT(list, bpage);
3875 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3879 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3880 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3881 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3883 mutex->lock_word = 0;
3885 mutex->event = os_event_create(NULL);
3886 - mutex_set_waiters(mutex, 0);
3887 + mutex->waiters = 0;
3889 mutex->magic_n = MUTEX_MAGIC_N;
3890 #endif /* UNIV_DEBUG */
3891 @@ -444,6 +444,15 @@
3892 mutex_t* mutex, /*!< in: mutex */
3893 ulint n) /*!< in: value to set */
3895 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3899 + os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3901 + os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3904 volatile ulint* ptr; /* declared volatile to ensure that
3905 the value is stored to memory */
3909 *ptr = n; /* Here we assume that the write of a single
3910 word in memory is atomic */
3914 /******************************************************************//**
3915 @@ -1193,7 +1203,12 @@
3919 + case SYNC_BUF_LRU_LIST:
3920 case SYNC_BUF_FLUSH_LIST:
3921 + case SYNC_BUF_PAGE_HASH:
3922 + case SYNC_BUF_FREE_LIST:
3923 + case SYNC_BUF_ZIP_FREE:
3924 + case SYNC_BUF_ZIP_HASH:
3926 /* We can have multiple mutexes of this type therefore we
3927 can only check whether the greater than condition holds. */
3928 @@ -1211,7 +1226,8 @@
3929 buffer block (block->mutex or buf_pool->zip_mutex). */
3930 if (!sync_thread_levels_g(array, level, FALSE)) {
3931 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3932 - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3933 + /* the exact rule is not fixed yet, for now */
3934 + //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));