1 # name : innodb_split_buf_pool_mutex.patch
2 # introduced : 11 or before
3 # maintainer : Yasufumi
6 # Any small change to this file in the main branch
7 # should be done or reviewed by the maintainer!
8 diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
9 --- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
10 +++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
15 - buf_pool_mutex_enter(buf_pool);
16 + //buf_pool_mutex_enter(buf_pool);
17 + mutex_enter(&buf_pool->LRU_list_mutex);
18 mutex_enter(&block->mutex);
20 /* Only free the block if it is still allocated to
21 @@ -4077,16 +4078,21 @@
22 && buf_block_get_space(block) == space
23 && buf_block_get_page_no(block) == page_no) {
25 - if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
26 - && all && block->page.zip.data) {
27 + if (buf_LRU_free_block(&block->page, all, TRUE) != BUF_LRU_FREED
28 + && all && block->page.zip.data
29 + /* Now, buf_LRU_free_block() may release mutex temporarily */
30 + && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
31 + && buf_block_get_space(block) == space
32 + && buf_block_get_page_no(block) == page_no) {
33 /* Attempt to deallocate the uncompressed page
34 if the whole block cannot be deallocted. */
36 - buf_LRU_free_block(&block->page, FALSE);
37 + buf_LRU_free_block(&block->page, FALSE, TRUE);
41 - buf_pool_mutex_exit(buf_pool);
42 + //buf_pool_mutex_exit(buf_pool);
43 + mutex_exit(&buf_pool->LRU_list_mutex);
44 mutex_exit(&block->mutex);
47 diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
48 --- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
49 +++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
51 rec_offs_init(offsets_);
53 rw_lock_x_lock(&btr_search_latch);
54 - buf_pool_mutex_enter_all();
55 + buf_pool_page_hash_x_lock_all();
57 cell_count = hash_get_n_cells(btr_search_sys->hash_index);
59 @@ -1951,11 +1951,11 @@
60 /* We release btr_search_latch every once in a while to
61 give other queries a chance to run. */
62 if ((i != 0) && ((i % chunk_size) == 0)) {
63 - buf_pool_mutex_exit_all();
64 + buf_pool_page_hash_x_unlock_all();
65 rw_lock_x_unlock(&btr_search_latch);
67 rw_lock_x_lock(&btr_search_latch);
68 - buf_pool_mutex_enter_all();
69 + buf_pool_page_hash_x_lock_all();
72 node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
73 @@ -2066,11 +2066,11 @@
74 /* We release btr_search_latch every once in a while to
75 give other queries a chance to run. */
77 - buf_pool_mutex_exit_all();
78 + buf_pool_page_hash_x_unlock_all();
79 rw_lock_x_unlock(&btr_search_latch);
81 rw_lock_x_lock(&btr_search_latch);
82 - buf_pool_mutex_enter_all();
83 + buf_pool_page_hash_x_lock_all();
86 if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
91 - buf_pool_mutex_exit_all();
92 + buf_pool_page_hash_x_unlock_all();
93 rw_lock_x_unlock(&btr_search_latch);
94 if (UNIV_LIKELY_NULL(heap)) {
96 diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
97 --- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
98 +++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
100 if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
101 #endif /* UNIV_DEBUG_VALGRIND */
103 - ut_ad(buf_pool_mutex_own(buf_pool));
104 + //ut_ad(buf_pool_mutex_own(buf_pool));
105 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
106 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
107 ut_ad(buf_pool->zip_free[i].start != bpage);
108 - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
109 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
111 #ifdef UNIV_DEBUG_VALGRIND
112 if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
114 buf_pool->zip_free[] */
116 #ifdef UNIV_DEBUG_VALGRIND
117 - buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
118 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
119 + buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
120 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
122 if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
123 if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
125 ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
126 #endif /* UNIV_DEBUG_VALGRIND */
128 - ut_ad(buf_pool_mutex_own(buf_pool));
129 + //ut_ad(buf_pool_mutex_own(buf_pool));
130 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
131 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
132 - UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
133 + UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
135 #ifdef UNIV_DEBUG_VALGRIND
136 if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
137 @@ -128,12 +130,13 @@
141 - ut_ad(buf_pool_mutex_own(buf_pool));
142 + //ut_ad(buf_pool_mutex_own(buf_pool));
143 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
144 ut_a(i < BUF_BUDDY_SIZES);
146 #ifndef UNIV_DEBUG_VALGRIND
147 /* Valgrind would complain about accessing free memory. */
148 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
149 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
150 ut_ad(buf_page_get_state(ut_list_node_313)
151 == BUF_BLOCK_ZIP_FREE)));
152 #endif /* !UNIV_DEBUG_VALGRIND */
153 @@ -177,16 +180,19 @@
154 buf_buddy_block_free(
155 /*=================*/
156 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
157 - void* buf) /*!< in: buffer frame to deallocate */
158 + void* buf, /*!< in: buffer frame to deallocate */
159 + ibool have_page_hash_mutex)
161 const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
165 - ut_ad(buf_pool_mutex_own(buf_pool));
166 + //ut_ad(buf_pool_mutex_own(buf_pool));
167 ut_ad(!mutex_own(&buf_pool->zip_mutex));
168 ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
170 + mutex_enter(&buf_pool->zip_hash_mutex);
172 HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
173 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
174 && bpage->in_zip_hash && !bpage->in_page_hash),
175 @@ -198,12 +204,14 @@
176 ut_d(bpage->in_zip_hash = FALSE);
177 HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
179 + mutex_exit(&buf_pool->zip_hash_mutex);
181 ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
182 UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
184 block = (buf_block_t*) bpage;
185 mutex_enter(&block->mutex);
186 - buf_LRU_block_free_non_file_page(block);
187 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
188 mutex_exit(&block->mutex);
190 ut_ad(buf_pool->buddy_n_frames > 0);
193 buf_pool_t* buf_pool = buf_pool_from_block(block);
194 const ulint fold = BUF_POOL_ZIP_FOLD(block);
195 - ut_ad(buf_pool_mutex_own(buf_pool));
196 + //ut_ad(buf_pool_mutex_own(buf_pool));
197 ut_ad(!mutex_own(&buf_pool->zip_mutex));
198 ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
201 ut_ad(!block->page.in_page_hash);
202 ut_ad(!block->page.in_zip_hash);
203 ut_d(block->page.in_zip_hash = TRUE);
205 + mutex_enter(&buf_pool->zip_hash_mutex);
206 HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
207 + mutex_exit(&buf_pool->zip_hash_mutex);
209 ut_d(buf_pool->buddy_n_frames++);
212 bpage->state = BUF_BLOCK_ZIP_FREE;
213 #ifndef UNIV_DEBUG_VALGRIND
214 /* Valgrind would complain about accessing free memory. */
215 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
216 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
217 ut_ad(buf_page_get_state(
219 == BUF_BLOCK_ZIP_FREE)));
220 @@ -291,25 +302,29 @@
221 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
222 ulint i, /*!< in: index of buf_pool->zip_free[],
223 or BUF_BUDDY_SIZES */
224 - ibool* lru) /*!< in: pointer to a variable that
225 + ibool* lru, /*!< in: pointer to a variable that
226 will be assigned TRUE if storage was
227 allocated from the LRU list and
228 buf_pool->mutex was temporarily
229 released, or NULL if the LRU list
230 should not be used */
231 + ibool have_page_hash_mutex)
235 - ut_ad(buf_pool_mutex_own(buf_pool));
236 + //ut_ad(buf_pool_mutex_own(buf_pool));
237 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
238 ut_ad(!mutex_own(&buf_pool->zip_mutex));
240 if (i < BUF_BUDDY_SIZES) {
241 /* Try to allocate from the buddy system. */
242 + mutex_enter(&buf_pool->zip_free_mutex);
243 block = buf_buddy_alloc_zip(buf_pool, i);
248 + mutex_exit(&buf_pool->zip_free_mutex);
251 /* Try allocating from the buf_pool->free list. */
252 @@ -326,19 +341,30 @@
255 /* Try replacing an uncompressed page in the buffer pool. */
256 - buf_pool_mutex_exit(buf_pool);
257 + //buf_pool_mutex_exit(buf_pool);
258 + mutex_exit(&buf_pool->LRU_list_mutex);
259 + if (have_page_hash_mutex) {
260 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
262 block = buf_LRU_get_free_block(buf_pool);
264 - buf_pool_mutex_enter(buf_pool);
265 + //buf_pool_mutex_enter(buf_pool);
266 + mutex_enter(&buf_pool->LRU_list_mutex);
267 + if (have_page_hash_mutex) {
268 + rw_lock_x_lock(&buf_pool->page_hash_latch);
272 buf_buddy_block_register(block);
274 + mutex_enter(&buf_pool->zip_free_mutex);
275 block = buf_buddy_alloc_from(
276 buf_pool, block->frame, i, BUF_BUDDY_SIZES);
279 buf_pool->buddy_stat[i].used++;
280 + mutex_exit(&buf_pool->zip_free_mutex);
287 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
289 - ut_ad(buf_pool_mutex_own(buf_pool));
290 + //ut_ad(buf_pool_mutex_own(buf_pool));
291 +#ifdef UNIV_SYNC_DEBUG
292 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
295 switch (buf_page_get_state(bpage)) {
296 case BUF_BLOCK_ZIP_FREE:
298 case BUF_BLOCK_FILE_PAGE:
299 case BUF_BLOCK_MEMORY:
300 case BUF_BLOCK_REMOVE_HASH:
302 + /* ut_error; */ /* optimistic */
303 case BUF_BLOCK_ZIP_DIRTY:
304 /* Cannot relocate dirty pages. */
309 mutex_enter(&buf_pool->zip_mutex);
310 + mutex_enter(&buf_pool->zip_free_mutex);
312 if (!buf_page_can_relocate(bpage)) {
313 mutex_exit(&buf_pool->zip_mutex);
314 + mutex_exit(&buf_pool->zip_free_mutex);
318 + if (bpage != buf_page_hash_get(buf_pool,
319 + bpage->space, bpage->offset)) {
320 + mutex_exit(&buf_pool->zip_mutex);
321 + mutex_exit(&buf_pool->zip_free_mutex);
325 @@ -384,18 +422,19 @@
326 ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
328 /* relocate buf_pool->zip_clean */
329 - b = UT_LIST_GET_PREV(list, dpage);
330 - UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
331 + b = UT_LIST_GET_PREV(zip_list, dpage);
332 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
335 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
336 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
338 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
339 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
342 UNIV_MEM_INVALID(bpage, sizeof *bpage);
344 mutex_exit(&buf_pool->zip_mutex);
345 + mutex_exit(&buf_pool->zip_free_mutex);
349 @@ -409,14 +448,16 @@
350 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
351 void* src, /*!< in: block to relocate */
352 void* dst, /*!< in: free block to relocate to */
353 - ulint i) /*!< in: index of
354 + ulint i, /*!< in: index of
355 buf_pool->zip_free[] */
356 + ibool have_page_hash_mutex)
359 const ulint size = BUF_BUDDY_LOW << i;
360 ullint usec = ut_time_us(NULL);
362 - ut_ad(buf_pool_mutex_own(buf_pool));
363 + //ut_ad(buf_pool_mutex_own(buf_pool));
364 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
365 ut_ad(!mutex_own(&buf_pool->zip_mutex));
366 ut_ad(!ut_align_offset(src, size));
367 ut_ad(!ut_align_offset(dst, size));
369 if (size >= PAGE_ZIP_MIN_SIZE) {
370 /* This is a compressed page. */
372 + ulint space, page_no;
374 + if (!have_page_hash_mutex) {
375 + mutex_exit(&buf_pool->zip_free_mutex);
376 + mutex_enter(&buf_pool->LRU_list_mutex);
377 + rw_lock_x_lock(&buf_pool->page_hash_latch);
380 /* The src block may be split into smaller blocks,
381 some of which may be free. Thus, the
383 pool), so there is nothing wrong about this. The
384 mach_read_from_4() calls here will only trigger bogus
385 Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
386 - ulint space = mach_read_from_4(
387 + space = mach_read_from_4(
388 (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
389 - ulint page_no = mach_read_from_4(
390 + page_no = mach_read_from_4(
391 (const byte*) src + FIL_PAGE_OFFSET);
392 /* Suppress Valgrind warnings about conditional jump
393 on uninitialized value. */
395 added to buf_pool->page_hash yet. Obviously,
396 it cannot be relocated. */
398 + if (!have_page_hash_mutex) {
399 + mutex_enter(&buf_pool->zip_free_mutex);
400 + mutex_exit(&buf_pool->LRU_list_mutex);
401 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
406 @@ -473,18 +526,27 @@
407 For the sake of simplicity, give up. */
408 ut_ad(page_zip_get_size(&bpage->zip) < size);
410 + if (!have_page_hash_mutex) {
411 + mutex_enter(&buf_pool->zip_free_mutex);
412 + mutex_exit(&buf_pool->LRU_list_mutex);
413 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
418 + /* To keep latch order */
419 + if (have_page_hash_mutex)
420 + mutex_exit(&buf_pool->zip_free_mutex);
422 /* The block must have been allocated, but it may
423 contain uninitialized data. */
424 UNIV_MEM_ASSERT_W(src, size);
426 - mutex = buf_page_get_mutex(bpage);
427 + mutex = buf_page_get_mutex_enter(bpage);
429 - mutex_enter(mutex);
430 + mutex_enter(&buf_pool->zip_free_mutex);
432 - if (buf_page_can_relocate(bpage)) {
433 + if (mutex && buf_page_can_relocate(bpage)) {
434 /* Relocate the compressed page. */
435 ut_a(bpage->zip.data == src);
436 memcpy(dst, src, size);
437 @@ -499,10 +561,22 @@
438 buddy_stat->relocated_usec
439 += ut_time_us(NULL) - usec;
442 + if (!have_page_hash_mutex) {
443 + mutex_exit(&buf_pool->LRU_list_mutex);
444 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
450 + if (!have_page_hash_mutex) {
451 + mutex_exit(&buf_pool->LRU_list_mutex);
452 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
458 } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
459 /* This must be a buf_page_t object. */
460 #if UNIV_WORD_SIZE == 4
461 @@ -511,10 +585,31 @@
462 about uninitialized pad bytes. */
463 UNIV_MEM_ASSERT_RW(src, size);
466 + mutex_exit(&buf_pool->zip_free_mutex);
468 + if (!have_page_hash_mutex) {
469 + mutex_enter(&buf_pool->LRU_list_mutex);
470 + rw_lock_x_lock(&buf_pool->page_hash_latch);
473 if (buf_buddy_relocate_block(src, dst)) {
474 + mutex_enter(&buf_pool->zip_free_mutex);
476 + if (!have_page_hash_mutex) {
477 + mutex_exit(&buf_pool->LRU_list_mutex);
478 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
484 + mutex_enter(&buf_pool->zip_free_mutex);
486 + if (!have_page_hash_mutex) {
487 + mutex_exit(&buf_pool->LRU_list_mutex);
488 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
493 @@ -529,13 +624,15 @@
494 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
495 void* buf, /*!< in: block to be freed, must not be
496 pointed to by the buffer pool */
497 - ulint i) /*!< in: index of buf_pool->zip_free[],
498 + ulint i, /*!< in: index of buf_pool->zip_free[],
499 or BUF_BUDDY_SIZES */
500 + ibool have_page_hash_mutex)
505 - ut_ad(buf_pool_mutex_own(buf_pool));
506 + //ut_ad(buf_pool_mutex_own(buf_pool));
507 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
508 ut_ad(!mutex_own(&buf_pool->zip_mutex));
509 ut_ad(i <= BUF_BUDDY_SIZES);
510 ut_ad(buf_pool->buddy_stat[i].used > 0);
512 ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
514 if (i == BUF_BUDDY_SIZES) {
515 - buf_buddy_block_free(buf_pool, buf);
516 + mutex_exit(&buf_pool->zip_free_mutex);
517 + buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
518 + mutex_enter(&buf_pool->zip_free_mutex);
526 - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
527 + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
528 UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
531 @@ -600,13 +699,13 @@
532 #ifndef UNIV_DEBUG_VALGRIND
534 /* Valgrind would complain about accessing free memory. */
535 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
536 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
537 ut_ad(buf_page_get_state(ut_list_node_313)
538 == BUF_BLOCK_ZIP_FREE)));
539 #endif /* UNIV_DEBUG_VALGRIND */
541 /* The buddy is not free. Is there a free block of this size? */
542 - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
543 + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
546 /* Remove the block from the free list, because a successful
548 buf_buddy_remove_from_free(buf_pool, bpage, i);
550 /* Try to relocate the buddy of buf to the free block. */
551 - if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
552 + if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
554 ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
556 @@ -636,14 +735,14 @@
558 (Parts of the buddy can be free in
559 buf_pool->zip_free[j] with j < i.) */
560 - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
561 + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
562 ut_ad(buf_page_get_state(
564 == BUF_BLOCK_ZIP_FREE
565 && ut_list_node_313 != buddy)));
566 #endif /* !UNIV_DEBUG_VALGRIND */
568 - if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
569 + if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
572 UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
573 diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
574 --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
575 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
577 #ifdef UNIV_PFS_RWLOCK
578 /* Keys to register buffer block related rwlocks and mutexes with
579 performance schema */
580 +UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
581 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
582 # ifdef UNIV_SYNC_DEBUG
583 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
585 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
586 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
587 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
588 +UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
589 +UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
590 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
591 +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
592 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
593 #endif /* UNIV_PFS_MUTEX */
596 block->page.in_zip_hash = FALSE;
597 block->page.in_flush_list = FALSE;
598 block->page.in_free_list = FALSE;
599 - block->in_unzip_LRU_list = FALSE;
600 #endif /* UNIV_DEBUG */
601 block->page.in_LRU_list = FALSE;
602 + block->in_unzip_LRU_list = FALSE;
603 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
604 block->n_pointers = 0;
605 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
607 memset(block->frame, '\0', UNIV_PAGE_SIZE);
609 /* Add the block to the free list */
610 - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
611 + mutex_enter(&buf_pool->free_list_mutex);
612 + UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
614 ut_d(block->page.in_free_list = TRUE);
615 + mutex_exit(&buf_pool->free_list_mutex);
616 ut_ad(buf_pool_from_block(block) == buf_pool);
619 @@ -1038,7 +1045,8 @@
620 buf_chunk_t* chunk = buf_pool->chunks;
623 - ut_ad(buf_pool_mutex_own(buf_pool));
624 + //ut_ad(buf_pool_mutex_own(buf_pool));
625 + ut_ad(mutex_own(&buf_pool->zip_free_mutex));
626 for (n = buf_pool->n_chunks; n--; chunk++) {
628 buf_block_t* block = buf_chunk_contains_zip(chunk, data);
629 @@ -1138,7 +1146,7 @@
631 const buf_block_t* block_end;
633 - ut_ad(buf_pool_mutex_own(buf_pool));
634 + //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
636 block_end = chunk->blocks + chunk->size;
638 @@ -1150,8 +1158,10 @@
639 ut_ad(!block->in_unzip_LRU_list);
640 ut_ad(!block->page.in_flush_list);
641 /* Remove the block from the free list. */
642 + mutex_enter(&buf_pool->free_list_mutex);
643 ut_ad(block->page.in_free_list);
644 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
645 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
646 + mutex_exit(&buf_pool->free_list_mutex);
648 /* Free the latches. */
649 mutex_free(&block->mutex);
650 @@ -1208,9 +1218,21 @@
651 ------------------------------- */
652 mutex_create(buf_pool_mutex_key,
653 &buf_pool->mutex, SYNC_BUF_POOL);
654 + mutex_create(buf_pool_LRU_list_mutex_key,
655 + &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
656 + rw_lock_create(buf_pool_page_hash_key,
657 + &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
658 + mutex_create(buf_pool_free_list_mutex_key,
659 + &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
660 + mutex_create(buf_pool_zip_free_mutex_key,
661 + &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
662 + mutex_create(buf_pool_zip_hash_mutex_key,
663 + &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
664 mutex_create(buf_pool_zip_mutex_key,
665 &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
667 + mutex_enter(&buf_pool->LRU_list_mutex);
668 + rw_lock_x_lock(&buf_pool->page_hash_latch);
669 buf_pool_mutex_enter(buf_pool);
671 if (buf_pool_size > 0) {
672 @@ -1223,6 +1245,8 @@
676 + mutex_exit(&buf_pool->LRU_list_mutex);
677 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
678 buf_pool_mutex_exit(buf_pool);
681 @@ -1253,6 +1277,8 @@
683 /* All fields are initialized by mem_zalloc(). */
685 + mutex_exit(&buf_pool->LRU_list_mutex);
686 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
687 buf_pool_mutex_exit(buf_pool);
690 @@ -1467,7 +1493,11 @@
692 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
694 - ut_ad(buf_pool_mutex_own(buf_pool));
695 + //ut_ad(buf_pool_mutex_own(buf_pool));
696 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
697 +#ifdef UNIV_SYNC_DEBUG
698 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
700 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
701 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
702 ut_a(bpage->buf_fix_count == 0);
703 @@ -1554,7 +1584,8 @@
706 btr_search_disable(); /* Empty the adaptive hash index again */
707 - buf_pool_mutex_enter(buf_pool);
708 + //buf_pool_mutex_enter(buf_pool);
709 + mutex_enter(&buf_pool->LRU_list_mutex);
712 if (buf_pool->n_chunks <= 1) {
713 @@ -1625,7 +1656,7 @@
715 buf_LRU_make_block_old(&block->page);
717 - } else if (buf_LRU_free_block(&block->page, TRUE)
718 + } else if (buf_LRU_free_block(&block->page, TRUE, TRUE)
722 @@ -1633,7 +1664,8 @@
723 mutex_exit(&block->mutex);
726 - buf_pool_mutex_exit(buf_pool);
727 + //buf_pool_mutex_exit(buf_pool);
728 + mutex_exit(&buf_pool->LRU_list_mutex);
730 /* Request for a flush of the chunk if it helps.
731 Do not flush if there are non-free blocks, since
732 @@ -1683,7 +1715,8 @@
734 buf_pool->old_pool_size = buf_pool->curr_pool_size;
736 - buf_pool_mutex_exit(buf_pool);
737 + //buf_pool_mutex_exit(buf_pool);
738 + mutex_exit(&buf_pool->LRU_list_mutex);
742 @@ -1724,7 +1757,9 @@
743 hash_table_t* zip_hash;
744 hash_table_t* page_hash;
746 - buf_pool_mutex_enter(buf_pool);
747 + //buf_pool_mutex_enter(buf_pool);
748 + mutex_enter(&buf_pool->LRU_list_mutex);
749 + rw_lock_x_lock(&buf_pool->page_hash_latch);
751 /* Free, create, and populate the hash table. */
752 hash_table_free(buf_pool->page_hash);
753 @@ -1765,8 +1800,9 @@
754 All such blocks are either in buf_pool->zip_clean or
755 in buf_pool->flush_list. */
757 + mutex_enter(&buf_pool->zip_mutex);
758 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
759 - b = UT_LIST_GET_NEXT(list, b)) {
760 + b = UT_LIST_GET_NEXT(zip_list, b)) {
761 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
762 ut_ad(!b->in_flush_list);
763 ut_ad(b->in_LRU_list);
764 @@ -1776,10 +1812,11 @@
765 HASH_INSERT(buf_page_t, hash, page_hash,
766 buf_page_address_fold(b->space, b->offset), b);
768 + mutex_exit(&buf_pool->zip_mutex);
770 buf_flush_list_mutex_enter(buf_pool);
771 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
772 - b = UT_LIST_GET_NEXT(list, b)) {
773 + b = UT_LIST_GET_NEXT(flush_list, b)) {
774 ut_ad(b->in_flush_list);
775 ut_ad(b->in_LRU_list);
776 ut_ad(b->in_page_hash);
777 @@ -1806,7 +1843,9 @@
780 buf_flush_list_mutex_exit(buf_pool);
781 - buf_pool_mutex_exit(buf_pool);
782 + //buf_pool_mutex_exit(buf_pool);
783 + mutex_exit(&buf_pool->LRU_list_mutex);
784 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
787 /********************************************************************
788 @@ -1853,21 +1892,32 @@
791 buf_pool_t* buf_pool = buf_pool_get(space, offset);
792 + mutex_t* block_mutex;
794 - ut_ad(buf_pool_mutex_own(buf_pool));
795 + //ut_ad(buf_pool_mutex_own(buf_pool));
797 + rw_lock_x_lock(&buf_pool->page_hash_latch);
798 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
800 + block_mutex = buf_page_get_mutex_enter(bpage);
804 if (UNIV_LIKELY_NULL(bpage)) {
805 if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
806 /* The page was loaded meanwhile. */
807 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
810 /* Add to an existing watch. */
811 bpage->buf_fix_count++;
812 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
813 + mutex_exit(block_mutex);
817 + /* buf_pool->watch is protected by zip_mutex for now */
818 + mutex_enter(&buf_pool->zip_mutex);
819 for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
820 bpage = &buf_pool->watch[i];
822 @@ -1891,10 +1941,12 @@
823 bpage->space = space;
824 bpage->offset = offset;
825 bpage->buf_fix_count = 1;
827 + bpage->buf_pool_index = buf_pool_index(buf_pool);
828 ut_d(bpage->in_page_hash = TRUE);
829 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
831 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
832 + mutex_exit(&buf_pool->zip_mutex);
834 case BUF_BLOCK_ZIP_PAGE:
835 ut_ad(bpage->in_page_hash);
836 @@ -1912,6 +1964,8 @@
839 /* Fix compiler warning */
840 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
841 + mutex_exit(&buf_pool->zip_mutex);
845 @@ -1941,6 +1995,8 @@
849 + mutex_enter(&buf_pool->LRU_list_mutex);
850 + rw_lock_x_lock(&buf_pool->page_hash_latch);
851 buf_pool_mutex_enter(buf_pool);
852 chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
854 @@ -1959,6 +2015,8 @@
855 buf_pool->n_chunks++;
858 + mutex_exit(&buf_pool->LRU_list_mutex);
859 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
860 buf_pool_mutex_exit(buf_pool);
863 @@ -2046,7 +2104,11 @@
865 buf_page_t* watch) /*!< in/out: sentinel for watch */
867 - ut_ad(buf_pool_mutex_own(buf_pool));
868 + //ut_ad(buf_pool_mutex_own(buf_pool));
869 +#ifdef UNIV_SYNC_DEBUG
870 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
872 + ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
874 HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
875 ut_d(watch->in_page_hash = FALSE);
876 @@ -2068,28 +2130,31 @@
877 buf_pool_t* buf_pool = buf_pool_get(space, offset);
878 ulint fold = buf_page_address_fold(space, offset);
880 - buf_pool_mutex_enter(buf_pool);
881 + //buf_pool_mutex_enter(buf_pool);
882 + rw_lock_x_lock(&buf_pool->page_hash_latch);
883 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
884 /* The page must exist because buf_pool_watch_set()
885 increments buf_fix_count. */
888 if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
889 - mutex_t* mutex = buf_page_get_mutex(bpage);
890 + mutex_t* mutex = buf_page_get_mutex_enter(bpage);
892 - mutex_enter(mutex);
893 ut_a(bpage->buf_fix_count > 0);
894 bpage->buf_fix_count--;
897 + mutex_enter(&buf_pool->zip_mutex);
898 ut_a(bpage->buf_fix_count > 0);
900 if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
901 buf_pool_watch_remove(buf_pool, fold, bpage);
903 + mutex_exit(&buf_pool->zip_mutex);
906 - buf_pool_mutex_exit(buf_pool);
907 + //buf_pool_mutex_exit(buf_pool);
908 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
911 /****************************************************************//**
912 @@ -2109,14 +2174,16 @@
913 buf_pool_t* buf_pool = buf_pool_get(space, offset);
914 ulint fold = buf_page_address_fold(space, offset);
916 - buf_pool_mutex_enter(buf_pool);
917 + //buf_pool_mutex_enter(buf_pool);
918 + rw_lock_s_lock(&buf_pool->page_hash_latch);
920 bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
921 /* The page must exist because buf_pool_watch_set()
922 increments buf_fix_count. */
924 ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
925 - buf_pool_mutex_exit(buf_pool);
926 + //buf_pool_mutex_exit(buf_pool);
927 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
931 @@ -2133,13 +2200,15 @@
933 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
935 - buf_pool_mutex_enter(buf_pool);
936 + //buf_pool_mutex_enter(buf_pool);
937 + mutex_enter(&buf_pool->LRU_list_mutex);
939 ut_a(buf_page_in_file(bpage));
941 buf_LRU_make_block_young(bpage);
943 - buf_pool_mutex_exit(buf_pool);
944 + //buf_pool_mutex_exit(buf_pool);
945 + mutex_exit(&buf_pool->LRU_list_mutex);
948 /********************************************************************//**
949 @@ -2163,14 +2232,20 @@
950 ut_a(buf_page_in_file(bpage));
952 if (buf_page_peek_if_too_old(bpage)) {
953 - buf_pool_mutex_enter(buf_pool);
954 + //buf_pool_mutex_enter(buf_pool);
955 + mutex_enter(&buf_pool->LRU_list_mutex);
956 buf_LRU_make_block_young(bpage);
957 - buf_pool_mutex_exit(buf_pool);
958 + //buf_pool_mutex_exit(buf_pool);
959 + mutex_exit(&buf_pool->LRU_list_mutex);
960 } else if (!access_time) {
961 ulint time_ms = ut_time_ms();
962 - buf_pool_mutex_enter(buf_pool);
963 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
964 + //buf_pool_mutex_enter(buf_pool);
966 buf_page_set_accessed(bpage, time_ms);
967 - buf_pool_mutex_exit(buf_pool);
968 + mutex_exit(block_mutex);
970 + //buf_pool_mutex_exit(buf_pool);
974 @@ -2187,7 +2262,8 @@
976 buf_pool_t* buf_pool = buf_pool_get(space, offset);
978 - buf_pool_mutex_enter(buf_pool);
979 + //buf_pool_mutex_enter(buf_pool);
980 + rw_lock_s_lock(&buf_pool->page_hash_latch);
982 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
984 @@ -2196,7 +2272,8 @@
985 block->check_index_page_at_flush = FALSE;
988 - buf_pool_mutex_exit(buf_pool);
989 + //buf_pool_mutex_exit(buf_pool);
990 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
993 /********************************************************************//**
994 @@ -2215,7 +2292,8 @@
996 buf_pool_t* buf_pool = buf_pool_get(space, offset);
998 - buf_pool_mutex_enter(buf_pool);
999 + //buf_pool_mutex_enter(buf_pool);
1000 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1002 block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
1004 @@ -2226,7 +2304,8 @@
1005 is_hashed = block->is_hashed;
1008 - buf_pool_mutex_exit(buf_pool);
1009 + //buf_pool_mutex_exit(buf_pool);
1010 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1014 @@ -2248,7 +2327,8 @@
1016 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1018 - buf_pool_mutex_enter(buf_pool);
1019 + //buf_pool_mutex_enter(buf_pool);
1020 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1022 bpage = buf_page_hash_get(buf_pool, space, offset);
1024 @@ -2259,7 +2339,8 @@
1025 bpage->file_page_was_freed = TRUE;
1028 - buf_pool_mutex_exit(buf_pool);
1029 + //buf_pool_mutex_exit(buf_pool);
1030 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1034 @@ -2280,7 +2361,8 @@
1036 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1038 - buf_pool_mutex_enter(buf_pool);
1039 + //buf_pool_mutex_enter(buf_pool);
1040 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1042 bpage = buf_page_hash_get(buf_pool, space, offset);
1044 @@ -2289,7 +2371,8 @@
1045 bpage->file_page_was_freed = FALSE;
1048 - buf_pool_mutex_exit(buf_pool);
1049 + //buf_pool_mutex_exit(buf_pool);
1050 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1054 @@ -2324,8 +2407,9 @@
1055 buf_pool->stat.n_page_gets++;
1058 - buf_pool_mutex_enter(buf_pool);
1059 + //buf_pool_mutex_enter(buf_pool);
1061 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1062 bpage = buf_page_hash_get(buf_pool, space, offset);
1064 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1065 @@ -2334,7 +2418,8 @@
1067 /* Page not in buf_pool: needs to be read from file */
1069 - buf_pool_mutex_exit(buf_pool);
1070 + //buf_pool_mutex_exit(buf_pool);
1071 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1073 buf_read_page(space, zip_size, offset);
1075 @@ -2346,10 +2431,15 @@
1076 if (UNIV_UNLIKELY(!bpage->zip.data)) {
1077 /* There is no compressed page. */
1079 - buf_pool_mutex_exit(buf_pool);
1080 + //buf_pool_mutex_exit(buf_pool);
1081 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1085 + block_mutex = buf_page_get_mutex_enter(bpage);
1087 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1089 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1091 switch (buf_page_get_state(bpage)) {
1092 @@ -2358,19 +2448,19 @@
1093 case BUF_BLOCK_MEMORY:
1094 case BUF_BLOCK_REMOVE_HASH:
1095 case BUF_BLOCK_ZIP_FREE:
1097 + mutex_exit(block_mutex);
1099 case BUF_BLOCK_ZIP_PAGE:
1100 case BUF_BLOCK_ZIP_DIRTY:
1101 - block_mutex = &buf_pool->zip_mutex;
1102 - mutex_enter(block_mutex);
1103 + ut_a(block_mutex == &buf_pool->zip_mutex);
1104 bpage->buf_fix_count++;
1106 case BUF_BLOCK_FILE_PAGE:
1107 - block_mutex = &((buf_block_t*) bpage)->mutex;
1108 - mutex_enter(block_mutex);
1109 + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
1111 /* Discard the uncompressed page frame if possible. */
1112 - if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
1113 + if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) {
1115 mutex_exit(block_mutex);
1117 @@ -2388,7 +2478,7 @@
1118 must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
1119 access_time = buf_page_is_accessed(bpage);
1121 - buf_pool_mutex_exit(buf_pool);
1122 + //buf_pool_mutex_exit(buf_pool);
1124 mutex_exit(block_mutex);
1126 @@ -2697,7 +2787,7 @@
1127 const buf_block_t* block) /*!< in: pointer to block,
1130 - ut_ad(buf_pool_mutex_own(buf_pool));
1131 + //ut_ad(buf_pool_mutex_own(buf_pool));
1133 if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
1134 /* The pointer should be aligned. */
1135 @@ -2733,6 +2823,7 @@
1139 + mutex_t* block_mutex = NULL;
1140 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1143 @@ -2755,9 +2846,11 @@
1144 fold = buf_page_address_fold(space, offset);
1147 - buf_pool_mutex_enter(buf_pool);
1148 + //buf_pool_mutex_enter(buf_pool);
1151 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1153 /* If the guess is a compressed page descriptor that
1154 has been allocated by buf_buddy_alloc(), it may have
1155 been invalidated by buf_buddy_relocate(). In that
1156 @@ -2766,11 +2859,15 @@
1157 the guess may be pointing to a buffer pool chunk that
1158 has been released when resizing the buffer pool. */
1160 - if (!buf_block_is_uncompressed(buf_pool, block)
1161 + if (!block_mutex) {
1162 + block = guess = NULL;
1163 + } else if (!buf_block_is_uncompressed(buf_pool, block)
1164 || offset != block->page.offset
1165 || space != block->page.space
1166 || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1168 + mutex_exit(block_mutex);
1170 block = guess = NULL;
1172 ut_ad(!block->page.in_zip_hash);
1173 @@ -2779,12 +2876,19 @@
1176 if (block == NULL) {
1177 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1178 block = (buf_block_t*) buf_page_hash_get_low(
1179 buf_pool, space, offset, fold);
1181 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1182 + ut_a(block_mutex);
1184 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1188 if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
1189 + mutex_exit(block_mutex);
1193 @@ -2796,12 +2900,14 @@
1194 space, offset, fold);
1196 if (UNIV_LIKELY_NULL(block)) {
1198 + block_mutex = buf_page_get_mutex((buf_page_t*)block);
1199 + ut_a(block_mutex);
1200 + ut_ad(mutex_own(block_mutex));
1205 - buf_pool_mutex_exit(buf_pool);
1206 + //buf_pool_mutex_exit(buf_pool);
1208 if (mode == BUF_GET_IF_IN_POOL
1209 || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1210 @@ -2849,7 +2955,8 @@
1211 /* The page is being read to buffer pool,
1212 but we cannot wait around for the read to
1214 - buf_pool_mutex_exit(buf_pool);
1215 + //buf_pool_mutex_exit(buf_pool);
1216 + mutex_exit(block_mutex);
1220 @@ -2859,38 +2966,49 @@
1223 case BUF_BLOCK_FILE_PAGE:
1224 + if (block_mutex == &buf_pool->zip_mutex) {
1225 + /* it is wrong mutex... */
1226 + mutex_exit(block_mutex);
1231 case BUF_BLOCK_ZIP_PAGE:
1232 case BUF_BLOCK_ZIP_DIRTY:
1233 + ut_ad(block_mutex == &buf_pool->zip_mutex);
1234 bpage = &block->page;
1235 /* Protect bpage->buf_fix_count. */
1236 - mutex_enter(&buf_pool->zip_mutex);
1237 + //mutex_enter(&buf_pool->zip_mutex);
1239 if (bpage->buf_fix_count
1240 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
1241 /* This condition often occurs when the buffer
1242 is not buffer-fixed, but I/O-fixed by
1243 buf_page_init_for_read(). */
1244 - mutex_exit(&buf_pool->zip_mutex);
1245 + //mutex_exit(&buf_pool->zip_mutex);
1247 /* The block is buffer-fixed or I/O-fixed.
1249 - buf_pool_mutex_exit(buf_pool);
1250 + //buf_pool_mutex_exit(buf_pool);
1251 + mutex_exit(block_mutex);
1252 os_thread_sleep(WAIT_FOR_READ);
1257 /* Allocate an uncompressed page. */
1258 - buf_pool_mutex_exit(buf_pool);
1259 - mutex_exit(&buf_pool->zip_mutex);
1260 + //buf_pool_mutex_exit(buf_pool);
1261 + //mutex_exit(&buf_pool->zip_mutex);
1262 + mutex_exit(block_mutex);
1264 block = buf_LRU_get_free_block(buf_pool);
1266 + block_mutex = &block->mutex;
1268 - buf_pool_mutex_enter(buf_pool);
1269 - mutex_enter(&block->mutex);
1270 + //buf_pool_mutex_enter(buf_pool);
1271 + mutex_enter(&buf_pool->LRU_list_mutex);
1272 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1273 + mutex_enter(block_mutex);
1276 buf_page_t* hash_bpage;
1277 @@ -2903,35 +3021,47 @@
1278 while buf_pool->mutex was released.
1279 Free the block that was allocated. */
1281 - buf_LRU_block_free_non_file_page(block);
1282 - mutex_exit(&block->mutex);
1283 + buf_LRU_block_free_non_file_page(block, TRUE);
1284 + mutex_exit(block_mutex);
1286 block = (buf_block_t*) hash_bpage;
1288 + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
1289 + ut_a(block_mutex);
1291 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1292 + mutex_exit(&buf_pool->LRU_list_mutex);
1297 + mutex_enter(&buf_pool->zip_mutex);
1300 (bpage->buf_fix_count
1301 || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
1303 + mutex_exit(&buf_pool->zip_mutex);
1304 /* The block was buffer-fixed or I/O-fixed
1305 while buf_pool->mutex was not held by this thread.
1306 Free the block that was allocated and try again.
1307 This should be extremely unlikely. */
1309 - buf_LRU_block_free_non_file_page(block);
1310 - mutex_exit(&block->mutex);
1311 + buf_LRU_block_free_non_file_page(block, TRUE);
1312 + //mutex_exit(&block->mutex);
1314 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1315 + mutex_exit(&buf_pool->LRU_list_mutex);
1316 goto wait_until_unfixed;
1319 /* Move the compressed page from bpage to block,
1320 and uncompress it. */
1322 - mutex_enter(&buf_pool->zip_mutex);
1324 buf_relocate(bpage, &block->page);
1326 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1328 buf_block_init_low(block);
1329 block->lock_hash_val = lock_rec_hash(space, offset);
1331 @@ -2940,7 +3070,7 @@
1333 if (buf_page_get_state(&block->page)
1334 == BUF_BLOCK_ZIP_PAGE) {
1335 - UT_LIST_REMOVE(list, buf_pool->zip_clean,
1336 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
1338 ut_ad(!block->page.in_flush_list);
1340 @@ -2957,19 +3087,24 @@
1341 /* Insert at the front of unzip_LRU list */
1342 buf_unzip_LRU_add_block(block, FALSE);
1344 + mutex_exit(&buf_pool->LRU_list_mutex);
1346 block->page.buf_fix_count = 1;
1347 buf_block_set_io_fix(block, BUF_IO_READ);
1348 rw_lock_x_lock_func(&block->lock, 0, file, line);
1350 UNIV_MEM_INVALID(bpage, sizeof *bpage);
1352 - mutex_exit(&block->mutex);
1353 + mutex_exit(block_mutex);
1354 mutex_exit(&buf_pool->zip_mutex);
1356 + buf_pool_mutex_enter(buf_pool);
1357 buf_pool->n_pend_unzip++;
1358 + buf_pool_mutex_exit(buf_pool);
1360 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1361 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
1363 - buf_pool_mutex_exit(buf_pool);
1364 + //buf_pool_mutex_exit(buf_pool);
1366 /* Decompress the page and apply buffered operations
1367 while not holding buf_pool->mutex or block->mutex. */
1368 @@ -2982,12 +3117,15 @@
1371 /* Unfix and unlatch the block. */
1372 - buf_pool_mutex_enter(buf_pool);
1373 - mutex_enter(&block->mutex);
1374 + //buf_pool_mutex_enter(buf_pool);
1375 + block_mutex = &block->mutex;
1376 + mutex_enter(block_mutex);
1377 block->page.buf_fix_count--;
1378 buf_block_set_io_fix(block, BUF_IO_NONE);
1379 - mutex_exit(&block->mutex);
1381 + buf_pool_mutex_enter(buf_pool);
1382 buf_pool->n_pend_unzip--;
1383 + buf_pool_mutex_exit(buf_pool);
1384 rw_lock_x_unlock(&block->lock);
1387 @@ -3003,7 +3141,7 @@
1389 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1391 - mutex_enter(&block->mutex);
1392 + //mutex_enter(&block->mutex);
1393 #if UNIV_WORD_SIZE == 4
1394 /* On 32-bit systems, there is no padding in buf_page_t. On
1395 other systems, Valgrind could complain about uninitialized pad
1396 @@ -3016,7 +3154,7 @@
1397 /* Try to evict the block from the buffer pool, to use the
1398 insert buffer (change buffer) as much as possible. */
1400 - if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
1401 + if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) {
1402 mutex_exit(&block->mutex);
1403 if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
1404 /* Set the watch, as it would have
1405 @@ -3052,13 +3190,14 @@
1407 buf_block_buf_fix_inc(block, file, line);
1409 - mutex_exit(&block->mutex);
1410 + //mutex_exit(&block->mutex);
1412 /* Check if this is the first access to the page */
1414 access_time = buf_page_is_accessed(&block->page);
1416 - buf_pool_mutex_exit(buf_pool);
1417 + //buf_pool_mutex_exit(buf_pool);
1418 + mutex_exit(block_mutex);
1420 buf_page_set_accessed_make_young(&block->page, access_time);
1422 @@ -3291,9 +3430,11 @@
1423 buf_pool = buf_pool_from_block(block);
1425 if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
1426 - buf_pool_mutex_enter(buf_pool);
1427 + //buf_pool_mutex_enter(buf_pool);
1428 + mutex_enter(&buf_pool->LRU_list_mutex);
1429 buf_LRU_make_block_young(&block->page);
1430 - buf_pool_mutex_exit(buf_pool);
1431 + //buf_pool_mutex_exit(buf_pool);
1432 + mutex_exit(&buf_pool->LRU_list_mutex);
1433 } else if (!buf_page_is_accessed(&block->page)) {
1434 /* Above, we do a dirty read on purpose, to avoid
1435 mutex contention. The field buf_page_t::access_time
1436 @@ -3301,9 +3442,11 @@
1437 field must be protected by mutex, however. */
1438 ulint time_ms = ut_time_ms();
1440 - buf_pool_mutex_enter(buf_pool);
1441 + //buf_pool_mutex_enter(buf_pool);
1442 + mutex_enter(&block->mutex);
1443 buf_page_set_accessed(&block->page, time_ms);
1444 - buf_pool_mutex_exit(buf_pool);
1445 + //buf_pool_mutex_exit(buf_pool);
1446 + mutex_exit(&block->mutex);
1449 ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
1450 @@ -3370,18 +3513,21 @@
1452 ut_ad(mtr->state == MTR_ACTIVE);
1454 - buf_pool_mutex_enter(buf_pool);
1455 + //buf_pool_mutex_enter(buf_pool);
1456 + rw_lock_s_lock(&buf_pool->page_hash_latch);
1457 block = buf_block_hash_get(buf_pool, space_id, page_no);
1459 if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
1460 - buf_pool_mutex_exit(buf_pool);
1461 + //buf_pool_mutex_exit(buf_pool);
1462 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1466 ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
1468 mutex_enter(&block->mutex);
1469 - buf_pool_mutex_exit(buf_pool);
1470 + //buf_pool_mutex_exit(buf_pool);
1471 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
1473 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1474 ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1475 @@ -3470,7 +3616,10 @@
1476 buf_page_t* hash_page;
1477 buf_pool_t* buf_pool = buf_pool_get(space, offset);
1479 - ut_ad(buf_pool_mutex_own(buf_pool));
1480 + //ut_ad(buf_pool_mutex_own(buf_pool));
1481 +#ifdef UNIV_SYNC_DEBUG
1482 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
1484 ut_ad(mutex_own(&(block->mutex)));
1485 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
1487 @@ -3499,11 +3648,14 @@
1488 if (UNIV_LIKELY(!hash_page)) {
1489 } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
1490 /* Preserve the reference count. */
1491 - ulint buf_fix_count = hash_page->buf_fix_count;
1492 + ulint buf_fix_count;
1494 + mutex_enter(&buf_pool->zip_mutex);
1495 + buf_fix_count = hash_page->buf_fix_count;
1496 ut_a(buf_fix_count > 0);
1497 block->page.buf_fix_count += buf_fix_count;
1498 buf_pool_watch_remove(buf_pool, fold, hash_page);
1499 + mutex_exit(&buf_pool->zip_mutex);
1502 "InnoDB: Error: page %lu %lu already found"
1503 @@ -3513,7 +3665,8 @@
1504 (const void*) hash_page, (const void*) block);
1505 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1506 mutex_exit(&block->mutex);
1507 - buf_pool_mutex_exit(buf_pool);
1508 + //buf_pool_mutex_exit(buf_pool);
1509 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1513 @@ -3597,7 +3750,9 @@
1515 fold = buf_page_address_fold(space, offset);
1517 - buf_pool_mutex_enter(buf_pool);
1518 + //buf_pool_mutex_enter(buf_pool);
1519 + mutex_enter(&buf_pool->LRU_list_mutex);
1520 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1522 watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
1523 if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
1524 @@ -3606,9 +3761,15 @@
1527 mutex_enter(&block->mutex);
1528 - buf_LRU_block_free_non_file_page(block);
1529 + mutex_exit(&buf_pool->LRU_list_mutex);
1530 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1531 + buf_LRU_block_free_non_file_page(block, FALSE);
1532 mutex_exit(&block->mutex);
1535 + mutex_exit(&buf_pool->LRU_list_mutex);
1536 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1541 @@ -3631,6 +3792,8 @@
1543 buf_page_init(space, offset, fold, block);
1545 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1547 /* The block must be put to the LRU list, to the old blocks */
1548 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1550 @@ -3658,7 +3821,7 @@
1551 been added to buf_pool->LRU and
1552 buf_pool->page_hash. */
1553 mutex_exit(&block->mutex);
1554 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1555 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1556 mutex_enter(&block->mutex);
1557 block->page.zip.data = data;
1559 @@ -3671,6 +3834,7 @@
1560 buf_unzip_LRU_add_block(block, TRUE);
1563 + mutex_exit(&buf_pool->LRU_list_mutex);
1564 mutex_exit(&block->mutex);
1566 /* Defer buf_buddy_alloc() until after the block has
1567 @@ -3682,8 +3846,8 @@
1568 control block (bpage), in order to avoid the
1569 invocation of buf_buddy_relocate_block() on
1570 uninitialized data. */
1571 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1572 - bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
1573 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
1574 + bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
1576 /* Initialize the buf_pool pointer. */
1577 bpage->buf_pool_index = buf_pool_index(buf_pool);
1578 @@ -3702,8 +3866,11 @@
1580 /* The block was added by some other thread. */
1582 - buf_buddy_free(buf_pool, bpage, sizeof *bpage);
1583 - buf_buddy_free(buf_pool, data, zip_size);
1584 + buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
1585 + buf_buddy_free(buf_pool, data, zip_size, TRUE);
1587 + mutex_exit(&buf_pool->LRU_list_mutex);
1588 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1592 @@ -3747,18 +3914,24 @@
1593 HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
1596 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1598 /* The block must be put to the LRU list, to the old blocks */
1599 buf_LRU_add_block(bpage, TRUE/* to old blocks */);
1600 buf_LRU_insert_zip_clean(bpage);
1602 + mutex_exit(&buf_pool->LRU_list_mutex);
1604 buf_page_set_io_fix(bpage, BUF_IO_READ);
1606 mutex_exit(&buf_pool->zip_mutex);
1609 + buf_pool_mutex_enter(buf_pool);
1610 buf_pool->n_pend_reads++;
1612 buf_pool_mutex_exit(buf_pool);
1614 + //buf_pool_mutex_exit(buf_pool);
1616 if (mode == BUF_READ_IBUF_PAGES_ONLY) {
1618 @@ -3800,7 +3973,9 @@
1620 fold = buf_page_address_fold(space, offset);
1622 - buf_pool_mutex_enter(buf_pool);
1623 + //buf_pool_mutex_enter(buf_pool);
1624 + mutex_enter(&buf_pool->LRU_list_mutex);
1625 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1627 block = (buf_block_t*) buf_page_hash_get_low(
1628 buf_pool, space, offset, fold);
1629 @@ -3816,7 +3991,9 @@
1630 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
1632 /* Page can be found in buf_pool */
1633 - buf_pool_mutex_exit(buf_pool);
1634 + //buf_pool_mutex_exit(buf_pool);
1635 + mutex_exit(&buf_pool->LRU_list_mutex);
1636 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1638 buf_block_free(free_block);
1640 @@ -3838,6 +4015,7 @@
1641 mutex_enter(&block->mutex);
1643 buf_page_init(space, offset, fold, block);
1644 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1646 /* The block must be put to the LRU list */
1647 buf_LRU_add_block(&block->page, FALSE);
1648 @@ -3864,7 +4042,7 @@
1649 the reacquisition of buf_pool->mutex. We also must
1650 defer this operation until after the block descriptor
1651 has been added to buf_pool->LRU and buf_pool->page_hash. */
1652 - data = buf_buddy_alloc(buf_pool, zip_size, &lru);
1653 + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
1654 mutex_enter(&block->mutex);
1655 block->page.zip.data = data;
1657 @@ -3882,7 +4060,8 @@
1659 buf_page_set_accessed(&block->page, time_ms);
1661 - buf_pool_mutex_exit(buf_pool);
1662 + //buf_pool_mutex_exit(buf_pool);
1663 + mutex_exit(&buf_pool->LRU_list_mutex);
1665 mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
1667 @@ -3933,6 +4112,8 @@
1668 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1669 const ibool uncompressed = (buf_page_get_state(bpage)
1670 == BUF_BLOCK_FILE_PAGE);
1671 + ibool have_LRU_mutex = FALSE;
1672 + mutex_t* block_mutex;
1674 ut_a(buf_page_in_file(bpage));
1676 @@ -4066,8 +4247,26 @@
1680 + if (io_type == BUF_IO_WRITE
1681 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1682 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
1683 + /* to keep consistency at buf_LRU_insert_zip_clean() */
1684 + have_LRU_mutex = TRUE; /* optimistic */
1687 + if (have_LRU_mutex)
1688 + mutex_enter(&buf_pool->LRU_list_mutex);
1689 + block_mutex = buf_page_get_mutex_enter(bpage);
1690 + ut_a(block_mutex);
1691 + if (io_type == BUF_IO_WRITE
1692 + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
1693 + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
1694 + && !have_LRU_mutex) {
1695 + mutex_exit(block_mutex);
1696 + have_LRU_mutex = TRUE;
1699 buf_pool_mutex_enter(buf_pool);
1700 - mutex_enter(buf_page_get_mutex(bpage));
1702 #ifdef UNIV_IBUF_COUNT_DEBUG
1703 if (io_type == BUF_IO_WRITE || uncompressed) {
1704 @@ -4090,6 +4289,7 @@
1705 the x-latch to this OS thread: do not let this confuse you in
1708 + ut_a(!have_LRU_mutex);
1709 ut_ad(buf_pool->n_pend_reads > 0);
1710 buf_pool->n_pend_reads--;
1711 buf_pool->stat.n_pages_read++;
1712 @@ -4107,6 +4307,9 @@
1714 buf_flush_write_complete(bpage);
1716 + if (have_LRU_mutex)
1717 + mutex_exit(&buf_pool->LRU_list_mutex);
1720 rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
1722 @@ -4129,8 +4332,8 @@
1724 #endif /* UNIV_DEBUG */
1726 - mutex_exit(buf_page_get_mutex(bpage));
1727 buf_pool_mutex_exit(buf_pool);
1728 + mutex_exit(block_mutex);
1731 /*********************************************************************//**
1732 @@ -4147,7 +4350,9 @@
1736 - buf_pool_mutex_enter(buf_pool);
1737 + //buf_pool_mutex_enter(buf_pool);
1738 + mutex_enter(&buf_pool->LRU_list_mutex);
1739 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1741 chunk = buf_pool->chunks;
1743 @@ -4164,7 +4369,9 @@
1747 - buf_pool_mutex_exit(buf_pool);
1748 + //buf_pool_mutex_exit(buf_pool);
1749 + mutex_exit(&buf_pool->LRU_list_mutex);
1750 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1754 @@ -4212,7 +4419,8 @@
1755 freed = buf_LRU_search_and_free_block(buf_pool, 100);
1758 - buf_pool_mutex_enter(buf_pool);
1759 + //buf_pool_mutex_enter(buf_pool);
1760 + mutex_enter(&buf_pool->LRU_list_mutex);
1762 ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
1763 ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
1764 @@ -4225,7 +4433,8 @@
1765 memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
1766 buf_refresh_io_stats(buf_pool);
1768 - buf_pool_mutex_exit(buf_pool);
1769 + //buf_pool_mutex_exit(buf_pool);
1770 + mutex_exit(&buf_pool->LRU_list_mutex);
1773 /*********************************************************************//**
1774 @@ -4267,7 +4476,10 @@
1778 - buf_pool_mutex_enter(buf_pool);
1779 + //buf_pool_mutex_enter(buf_pool);
1780 + mutex_enter(&buf_pool->LRU_list_mutex);
1781 + rw_lock_x_lock(&buf_pool->page_hash_latch);
1782 + /* for keep the new latch order, it cannot validate correctly... */
1784 chunk = buf_pool->chunks;
1786 @@ -4362,7 +4574,7 @@
1787 /* Check clean compressed-only blocks. */
1789 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1790 - b = UT_LIST_GET_NEXT(list, b)) {
1791 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1792 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1793 switch (buf_page_get_io_fix(b)) {
1795 @@ -4393,7 +4605,7 @@
1797 buf_flush_list_mutex_enter(buf_pool);
1798 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1799 - b = UT_LIST_GET_NEXT(list, b)) {
1800 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1801 ut_ad(b->in_flush_list);
1802 ut_a(b->oldest_modification);
1804 @@ -4452,6 +4664,8 @@
1807 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
1808 + /* because of latching order with block->mutex, we cannot get needed mutexes before that */
1810 if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
1811 fprintf(stderr, "Free list len %lu, free blocks %lu\n",
1812 (ulong) UT_LIST_GET_LEN(buf_pool->free),
1813 @@ -4462,8 +4676,11 @@
1814 ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
1815 ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
1816 ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
1819 - buf_pool_mutex_exit(buf_pool);
1820 + //buf_pool_mutex_exit(buf_pool);
1821 + mutex_exit(&buf_pool->LRU_list_mutex);
1822 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
1824 ut_a(buf_LRU_validate());
1825 ut_a(buf_flush_validate(buf_pool));
1826 @@ -4519,7 +4736,9 @@
1827 index_ids = mem_alloc(size * sizeof *index_ids);
1828 counts = mem_alloc(sizeof(ulint) * size);
1830 - buf_pool_mutex_enter(buf_pool);
1831 + //buf_pool_mutex_enter(buf_pool);
1832 + mutex_enter(&buf_pool->LRU_list_mutex);
1833 + mutex_enter(&buf_pool->free_list_mutex);
1834 buf_flush_list_mutex_enter(buf_pool);
1837 @@ -4588,7 +4807,9 @@
1841 - buf_pool_mutex_exit(buf_pool);
1842 + //buf_pool_mutex_exit(buf_pool);
1843 + mutex_exit(&buf_pool->LRU_list_mutex);
1844 + mutex_exit(&buf_pool->free_list_mutex);
1846 for (i = 0; i < n_found; i++) {
1847 index = dict_index_get_if_in_cache(index_ids[i]);
1848 @@ -4645,7 +4866,7 @@
1850 ulint fixed_pages_number = 0;
1852 - buf_pool_mutex_enter(buf_pool);
1853 + //buf_pool_mutex_enter(buf_pool);
1855 chunk = buf_pool->chunks;
1857 @@ -4679,7 +4900,7 @@
1858 /* Traverse the lists of clean and dirty compressed-only blocks. */
1860 for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1861 - b = UT_LIST_GET_NEXT(list, b)) {
1862 + b = UT_LIST_GET_NEXT(zip_list, b)) {
1863 ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
1864 ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
1866 @@ -4691,7 +4912,7 @@
1868 buf_flush_list_mutex_enter(buf_pool);
1869 for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1870 - b = UT_LIST_GET_NEXT(list, b)) {
1871 + b = UT_LIST_GET_NEXT(flush_list, b)) {
1872 ut_ad(b->in_flush_list);
1874 switch (buf_page_get_state(b)) {
1875 @@ -4717,7 +4938,7 @@
1877 buf_flush_list_mutex_exit(buf_pool);
1878 mutex_exit(&buf_pool->zip_mutex);
1879 - buf_pool_mutex_exit(buf_pool);
1880 + //buf_pool_mutex_exit(buf_pool);
1882 return(fixed_pages_number);
1884 @@ -4873,6 +5094,8 @@
1885 /* Find appropriate pool_info to store stats for this buffer pool */
1886 pool_info = &all_pool_info[pool_id];
1888 + mutex_enter(&buf_pool->LRU_list_mutex);
1889 + mutex_enter(&buf_pool->free_list_mutex);
1890 buf_pool_mutex_enter(buf_pool);
1891 buf_flush_list_mutex_enter(buf_pool);
1893 @@ -4983,6 +5206,8 @@
1894 pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
1896 buf_refresh_io_stats(buf_pool);
1897 + mutex_exit(&buf_pool->LRU_list_mutex);
1898 + mutex_exit(&buf_pool->free_list_mutex);
1899 buf_pool_mutex_exit(buf_pool);
1902 @@ -5224,11 +5449,13 @@
1906 - buf_pool_mutex_enter(buf_pool);
1907 + //buf_pool_mutex_enter(buf_pool);
1908 + mutex_enter(&buf_pool->free_list_mutex);
1910 len = UT_LIST_GET_LEN(buf_pool->free);
1912 - buf_pool_mutex_exit(buf_pool);
1913 + //buf_pool_mutex_exit(buf_pool);
1914 + mutex_exit(&buf_pool->free_list_mutex);
1918 diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
1919 --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
1920 +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
1923 ut_d(block->page.in_flush_list = TRUE);
1924 block->page.oldest_modification = lsn;
1925 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1926 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1928 #ifdef UNIV_DEBUG_VALGRIND
1930 @@ -401,14 +401,14 @@
1931 > block->page.oldest_modification) {
1932 ut_ad(b->in_flush_list);
1934 - b = UT_LIST_GET_NEXT(list, b);
1935 + b = UT_LIST_GET_NEXT(flush_list, b);
1939 if (prev_b == NULL) {
1940 - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
1941 + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
1943 - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
1944 + UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
1945 prev_b, &block->page);
1949 //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1950 //ut_ad(buf_pool_mutex_own(buf_pool));
1952 - //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1953 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1954 //ut_ad(bpage->in_LRU_list);
1956 if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
1957 @@ -470,14 +470,14 @@
1958 enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1961 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1962 - ut_ad(buf_pool_mutex_own(buf_pool));
1963 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1964 + //ut_ad(buf_pool_mutex_own(buf_pool));
1966 - ut_a(buf_page_in_file(bpage));
1967 + //ut_a(buf_page_in_file(bpage));
1968 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1969 ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
1971 - if (bpage->oldest_modification != 0
1972 + if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
1973 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
1974 ut_ad(bpage->in_flush_list);
1978 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1980 - ut_ad(buf_pool_mutex_own(buf_pool));
1981 + //ut_ad(buf_pool_mutex_own(buf_pool));
1982 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1983 ut_ad(bpage->in_flush_list);
1985 @@ -526,11 +526,11 @@
1987 case BUF_BLOCK_ZIP_DIRTY:
1988 buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
1989 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1990 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
1991 buf_LRU_insert_zip_clean(bpage);
1993 case BUF_BLOCK_FILE_PAGE:
1994 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
1995 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2000 buf_page_t* prev_b = NULL;
2001 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2003 - ut_ad(buf_pool_mutex_own(buf_pool));
2004 + //ut_ad(buf_pool_mutex_own(buf_pool));
2005 /* Must reside in the same buffer pool. */
2006 ut_ad(buf_pool == buf_pool_from_bpage(dpage));
2008 @@ -603,18 +603,18 @@
2009 because we assert on in_flush_list in comparison function. */
2010 ut_d(bpage->in_flush_list = FALSE);
2012 - prev = UT_LIST_GET_PREV(list, bpage);
2013 - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
2014 + prev = UT_LIST_GET_PREV(flush_list, bpage);
2015 + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
2018 ut_ad(prev->in_flush_list);
2019 UT_LIST_INSERT_AFTER(
2022 buf_pool->flush_list,
2028 buf_pool->flush_list,
2031 @@ -1083,7 +1083,7 @@
2034 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2035 - ut_ad(!buf_pool_mutex_own(buf_pool));
2036 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2039 #ifdef UNIV_LOG_DEBUG
2040 @@ -1097,7 +1097,8 @@
2041 io_fixed and oldest_modification != 0. Thus, it cannot be
2042 relocated in the buffer pool or removed from flush_list or
2044 - ut_ad(!buf_pool_mutex_own(buf_pool));
2045 + //ut_ad(!buf_pool_mutex_own(buf_pool));
2046 + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
2047 ut_ad(!buf_flush_list_mutex_own(buf_pool));
2048 ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
2049 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
2050 @@ -1260,12 +1261,18 @@
2051 ibool is_uncompressed;
2053 ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
2054 - ut_ad(buf_pool_mutex_own(buf_pool));
2055 + //ut_ad(buf_pool_mutex_own(buf_pool));
2056 +#ifdef UNIV_SYNC_DEBUG
2057 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
2059 ut_ad(buf_page_in_file(bpage));
2061 block_mutex = buf_page_get_mutex(bpage);
2062 ut_ad(mutex_own(block_mutex));
2064 + buf_pool_mutex_enter(buf_pool);
2065 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2067 ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
2069 buf_page_set_io_fix(bpage, BUF_IO_WRITE);
2070 @@ -1427,14 +1434,16 @@
2072 buf_pool = buf_pool_get(space, i);
2074 - buf_pool_mutex_enter(buf_pool);
2075 + //buf_pool_mutex_enter(buf_pool);
2076 + rw_lock_s_lock(&buf_pool->page_hash_latch);
2078 /* We only want to flush pages from this buffer pool. */
2079 bpage = buf_page_hash_get(buf_pool, space, i);
2083 - buf_pool_mutex_exit(buf_pool);
2084 + //buf_pool_mutex_exit(buf_pool);
2085 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2089 @@ -1446,11 +1455,9 @@
2090 if (flush_type != BUF_FLUSH_LRU
2092 || buf_page_is_old(bpage)) {
2093 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2095 - mutex_enter(block_mutex);
2096 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2098 - if (buf_flush_ready_for_flush(bpage, flush_type)
2099 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
2100 && (i == offset || !bpage->buf_fix_count)) {
2101 /* We only try to flush those
2102 neighbors != offset where the buf fix
2103 @@ -1466,11 +1473,12 @@
2104 ut_ad(!buf_pool_mutex_own(buf_pool));
2108 + } else if (block_mutex) {
2109 mutex_exit(block_mutex);
2112 - buf_pool_mutex_exit(buf_pool);
2113 + //buf_pool_mutex_exit(buf_pool);
2114 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
2118 @@ -1503,21 +1511,25 @@
2119 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2120 #endif /* UNIV_DEBUG */
2122 - ut_ad(buf_pool_mutex_own(buf_pool));
2123 + //ut_ad(buf_pool_mutex_own(buf_pool));
2124 + ut_ad(flush_type != BUF_FLUSH_LRU
2125 + || mutex_own(&buf_pool->LRU_list_mutex));
2127 - block_mutex = buf_page_get_mutex(bpage);
2128 - mutex_enter(block_mutex);
2129 + block_mutex = buf_page_get_mutex_enter(bpage);
2131 - ut_a(buf_page_in_file(bpage));
2132 + //ut_a(buf_page_in_file(bpage));
2134 - if (buf_flush_ready_for_flush(bpage, flush_type)) {
2135 + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
2138 buf_pool_t* buf_pool;
2140 buf_pool = buf_pool_from_bpage(bpage);
2142 - buf_pool_mutex_exit(buf_pool);
2143 + //buf_pool_mutex_exit(buf_pool);
2144 + if (flush_type == BUF_FLUSH_LRU) {
2145 + mutex_exit(&buf_pool->LRU_list_mutex);
2148 /* These fields are protected by both the
2149 buffer pool mutex and block mutex. */
2150 @@ -1533,13 +1545,18 @@
2154 - buf_pool_mutex_enter(buf_pool);
2155 + //buf_pool_mutex_enter(buf_pool);
2156 + if (flush_type == BUF_FLUSH_LRU) {
2157 + mutex_enter(&buf_pool->LRU_list_mutex);
2161 + } else if (block_mutex) {
2162 mutex_exit(block_mutex);
2165 - ut_ad(buf_pool_mutex_own(buf_pool));
2166 + //ut_ad(buf_pool_mutex_own(buf_pool));
2167 + ut_ad(flush_type != BUF_FLUSH_LRU
2168 + || mutex_own(&buf_pool->LRU_list_mutex));
2172 @@ -1560,7 +1577,8 @@
2176 - ut_ad(buf_pool_mutex_own(buf_pool));
2177 + //ut_ad(buf_pool_mutex_own(buf_pool));
2178 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2181 /* Start from the end of the list looking for a
2182 @@ -1582,7 +1600,8 @@
2183 should be flushed, we factor in this value. */
2184 buf_lru_flush_page_count += count;
2186 - ut_ad(buf_pool_mutex_own(buf_pool));
2187 + //ut_ad(buf_pool_mutex_own(buf_pool));
2188 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2192 @@ -1610,9 +1629,10 @@
2196 + buf_page_t* prev_bpage = NULL;
2199 - ut_ad(buf_pool_mutex_own(buf_pool));
2200 + //ut_ad(buf_pool_mutex_own(buf_pool));
2202 /* If we have flushed enough, leave the loop */
2204 @@ -1631,6 +1651,7 @@
2207 ut_a(bpage->oldest_modification > 0);
2208 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2211 if (!bpage || bpage->oldest_modification >= lsn_limit) {
2212 @@ -1672,9 +1693,17 @@
2216 - bpage = UT_LIST_GET_PREV(list, bpage);
2217 + bpage = UT_LIST_GET_PREV(flush_list, bpage);
2219 - ut_ad(!bpage || bpage->in_flush_list);
2220 + //ut_ad(!bpage || bpage->in_flush_list);
2221 + if (bpage != prev_bpage) {
2222 + /* the search might warp.. retrying */
2223 + buf_flush_list_mutex_exit(buf_pool);
2227 + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
2230 buf_flush_list_mutex_exit(buf_pool);
2232 @@ -1683,7 +1712,7 @@
2234 } while (count < min_n && bpage != NULL && len > 0);
2236 - ut_ad(buf_pool_mutex_own(buf_pool));
2237 + //ut_ad(buf_pool_mutex_own(buf_pool));
2241 @@ -1722,13 +1751,15 @@
2242 || sync_thread_levels_empty_gen(TRUE));
2243 #endif /* UNIV_SYNC_DEBUG */
2245 - buf_pool_mutex_enter(buf_pool);
2246 + //buf_pool_mutex_enter(buf_pool);
2248 /* Note: The buffer pool mutex is released and reacquired within
2249 the flush functions. */
2250 switch(flush_type) {
2252 + mutex_enter(&buf_pool->LRU_list_mutex);
2253 count = buf_flush_LRU_list_batch(buf_pool, min_n);
2254 + mutex_exit(&buf_pool->LRU_list_mutex);
2256 case BUF_FLUSH_LIST:
2257 count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
2258 @@ -1737,7 +1768,7 @@
2262 - buf_pool_mutex_exit(buf_pool);
2263 + //buf_pool_mutex_exit(buf_pool);
2265 buf_flush_buffered_writes();
2267 @@ -1993,7 +2024,7 @@
2269 //buf_pool_mutex_enter(buf_pool);
2271 - buf_pool_mutex_enter(buf_pool);
2272 + mutex_enter(&buf_pool->LRU_list_mutex);
2274 n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
2276 @@ -2010,15 +2041,15 @@
2277 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2280 - block_mutex = buf_page_get_mutex(bpage);
2282 - mutex_enter(block_mutex);
2283 + block_mutex = buf_page_get_mutex_enter(bpage);
2285 - if (buf_flush_ready_for_replace(bpage)) {
2286 + if (block_mutex && buf_flush_ready_for_replace(bpage)) {
2290 - mutex_exit(block_mutex);
2291 + if (block_mutex) {
2292 + mutex_exit(block_mutex);
2297 @@ -2027,7 +2058,7 @@
2299 //buf_pool_mutex_exit(buf_pool);
2301 - buf_pool_mutex_exit(buf_pool);
2302 + mutex_exit(&buf_pool->LRU_list_mutex);
2304 if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
2306 @@ -2226,7 +2257,7 @@
2308 ut_ad(buf_flush_list_mutex_own(buf_pool));
2310 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
2311 + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
2312 ut_ad(ut_list_node_313->in_flush_list));
2314 bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2315 @@ -2266,7 +2297,7 @@
2316 rnode = rbt_next(buf_pool->flush_rbt, rnode);
2319 - bpage = UT_LIST_GET_NEXT(list, bpage);
2320 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
2322 ut_a(!bpage || om >= bpage->oldest_modification);
2324 diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
2325 --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
2326 +++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
2329 buf_LRU_block_free_hashed_page(
2330 /*===========================*/
2331 - buf_block_t* block); /*!< in: block, must contain a file page and
2332 + buf_block_t* block, /*!< in: block, must contain a file page and
2333 be in a state where it can be freed */
2334 + ibool have_page_hash_mutex);
2336 /******************************************************************//**
2337 Determines if the unzip_LRU list should be used for evicting a victim
2338 @@ -154,15 +155,20 @@
2340 buf_LRU_evict_from_unzip_LRU(
2341 /*=========================*/
2342 - buf_pool_t* buf_pool)
2343 + buf_pool_t* buf_pool,
2344 + ibool have_LRU_mutex)
2349 - ut_ad(buf_pool_mutex_own(buf_pool));
2350 + //ut_ad(buf_pool_mutex_own(buf_pool));
2352 + if (!have_LRU_mutex)
2353 + mutex_enter(&buf_pool->LRU_list_mutex);
2354 /* If the unzip_LRU list is empty, we can only use the LRU. */
2355 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
2356 + if (!have_LRU_mutex)
2357 + mutex_exit(&buf_pool->LRU_list_mutex);
2361 @@ -171,14 +177,20 @@
2362 decompressed pages in the buffer pool. */
2363 if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
2364 <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
2365 + if (!have_LRU_mutex)
2366 + mutex_exit(&buf_pool->LRU_list_mutex);
2370 /* If eviction hasn't started yet, we assume by default
2371 that a workload is disk bound. */
2372 if (buf_pool->freed_page_clock == 0) {
2373 + if (!have_LRU_mutex)
2374 + mutex_exit(&buf_pool->LRU_list_mutex);
2377 + if (!have_LRU_mutex)
2378 + mutex_exit(&buf_pool->LRU_list_mutex);
2380 /* Calculate the average over past intervals, and add the values
2381 of the current interval. */
2382 @@ -246,19 +258,23 @@
2383 page_arr = ut_malloc(
2384 sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
2386 - buf_pool_mutex_enter(buf_pool);
2387 + //buf_pool_mutex_enter(buf_pool);
2388 + mutex_enter(&buf_pool->LRU_list_mutex);
2392 bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2394 while (bpage != NULL) {
2395 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2396 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2397 buf_page_t* prev_bpage;
2399 - mutex_enter(block_mutex);
2400 prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
2402 + if (!block_mutex) {
2406 ut_a(buf_page_in_file(bpage));
2408 if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
2409 @@ -287,14 +303,16 @@
2411 /* Array full. We release the buf_pool->mutex to
2412 obey the latching order. */
2413 - buf_pool_mutex_exit(buf_pool);
2414 + //buf_pool_mutex_exit(buf_pool);
2415 + mutex_exit(&buf_pool->LRU_list_mutex);
2417 buf_LRU_drop_page_hash_batch(
2418 id, zip_size, page_arr, num_entries);
2422 - buf_pool_mutex_enter(buf_pool);
2423 + //buf_pool_mutex_enter(buf_pool);
2424 + mutex_enter(&buf_pool->LRU_list_mutex);
2426 mutex_exit(block_mutex);
2432 - buf_pool_mutex_exit(buf_pool);
2433 + //buf_pool_mutex_exit(buf_pool);
2434 + mutex_exit(&buf_pool->LRU_list_mutex);
2436 /* Drop any remaining batch of search hashed pages. */
2437 buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
2442 - buf_pool_mutex_enter(buf_pool);
2443 + //buf_pool_mutex_enter(buf_pool);
2444 + mutex_enter(&buf_pool->LRU_list_mutex);
2445 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2449 @@ -369,8 +390,16 @@
2453 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
2454 - mutex_enter(block_mutex);
2455 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
2457 + if (!block_mutex) {
2458 + /* It may be impossible case...
2459 + Something wrong, so will be scan_again */
2461 + all_freed = FALSE;
2463 + goto next_page_no_mutex;
2466 if (bpage->buf_fix_count > 0) {
2472 - buf_pool_mutex_exit(buf_pool);
2473 + //buf_pool_mutex_exit(buf_pool);
2474 + mutex_exit(&buf_pool->LRU_list_mutex);
2475 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2477 zip_size = buf_page_get_zip_size(bpage);
2478 page_no = buf_page_get_page_no(bpage);
2480 if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
2481 != BUF_BLOCK_ZIP_FREE) {
2482 buf_LRU_block_free_hashed_page((buf_block_t*)
2486 /* The block_mutex should have been
2487 released by buf_LRU_block_remove_hashed_page()
2492 - buf_pool_mutex_exit(buf_pool);
2493 + //buf_pool_mutex_exit(buf_pool);
2494 + mutex_exit(&buf_pool->LRU_list_mutex);
2495 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2498 os_thread_sleep(20000);
2501 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2503 - ut_ad(buf_pool_mutex_own(buf_pool));
2504 + //ut_ad(buf_pool_mutex_own(buf_pool));
2505 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2506 + ut_ad(mutex_own(&buf_pool->flush_list_mutex));
2507 ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
2509 /* Find the first successor of bpage in the LRU list
2510 @@ -540,17 +575,17 @@
2513 b = UT_LIST_GET_NEXT(LRU, b);
2514 - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
2515 + } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
2517 /* Insert bpage before b, i.e., after the predecessor of b. */
2519 - b = UT_LIST_GET_PREV(list, b);
2520 + b = UT_LIST_GET_PREV(zip_list, b);
2524 - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
2525 + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
2527 - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
2528 + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
2532 @@ -563,18 +598,19 @@
2533 buf_LRU_free_from_unzip_LRU_list(
2534 /*=============================*/
2535 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
2536 - ulint n_iterations) /*!< in: how many times this has
2537 + ulint n_iterations, /*!< in: how many times this has
2538 been called repeatedly without
2539 result: a high value means that
2540 we should search farther; we will
2541 search n_iterations / 5 of the
2542 unzip_LRU list, or nothing if
2543 n_iterations >= 5 */
2544 + ibool have_LRU_mutex)
2549 - ut_ad(buf_pool_mutex_own(buf_pool));
2550 + //ut_ad(buf_pool_mutex_own(buf_pool));
2552 /* Theoratically it should be much easier to find a victim
2553 from unzip_LRU as we can choose even a dirty block (as we'll
2555 if we have done five iterations so far. */
2557 if (UNIV_UNLIKELY(n_iterations >= 5)
2558 - || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
2559 + || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
2563 @@ -592,18 +628,25 @@
2564 distance = 100 + (n_iterations
2565 * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
2568 for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
2569 UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
2570 block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
2572 enum buf_lru_free_block_status freed;
2574 + mutex_enter(&block->mutex);
2575 + if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
2576 + || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2577 + mutex_exit(&block->mutex);
2581 ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
2582 ut_ad(block->in_unzip_LRU_list);
2583 ut_ad(block->page.in_LRU_list);
2585 - mutex_enter(&block->mutex);
2586 - freed = buf_LRU_free_block(&block->page, FALSE);
2587 + freed = buf_LRU_free_block(&block->page, FALSE, have_LRU_mutex);
2588 mutex_exit(&block->mutex);
2591 @@ -637,21 +680,23 @@
2592 buf_LRU_free_from_common_LRU_list(
2593 /*==============================*/
2594 buf_pool_t* buf_pool,
2595 - ulint n_iterations)
2596 + ulint n_iterations,
2597 /*!< in: how many times this has been called
2598 repeatedly without result: a high value means
2599 that we should search farther; if
2600 n_iterations < 10, then we search
2601 n_iterations / 10 * buf_pool->curr_size
2602 pages from the end of the LRU list */
2603 + ibool have_LRU_mutex)
2608 - ut_ad(buf_pool_mutex_own(buf_pool));
2609 + //ut_ad(buf_pool_mutex_own(buf_pool));
2611 distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
2614 for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2615 UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
2616 bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
2617 @@ -659,14 +704,23 @@
2618 enum buf_lru_free_block_status freed;
2620 mutex_t* block_mutex
2621 - = buf_page_get_mutex(bpage);
2622 + = buf_page_get_mutex_enter(bpage);
2624 + if (!block_mutex) {
2628 + if (!bpage->in_LRU_list
2629 + || !buf_page_in_file(bpage)) {
2630 + mutex_exit(block_mutex);
2634 ut_ad(buf_page_in_file(bpage));
2635 ut_ad(bpage->in_LRU_list);
2637 - mutex_enter(block_mutex);
2638 accessed = buf_page_is_accessed(bpage);
2639 - freed = buf_LRU_free_block(bpage, TRUE);
2640 + freed = buf_LRU_free_block(bpage, TRUE, have_LRU_mutex);
2641 mutex_exit(block_mutex);
2644 @@ -718,16 +772,23 @@
2645 n_iterations / 5 of the unzip_LRU list. */
2647 ibool freed = FALSE;
2648 + ibool have_LRU_mutex = FALSE;
2650 - buf_pool_mutex_enter(buf_pool);
2651 + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
2652 + have_LRU_mutex = TRUE;
2654 + //buf_pool_mutex_enter(buf_pool);
2655 + if (have_LRU_mutex)
2656 + mutex_enter(&buf_pool->LRU_list_mutex);
2658 - freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
2659 + freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
2662 freed = buf_LRU_free_from_common_LRU_list(
2663 - buf_pool, n_iterations);
2664 + buf_pool, n_iterations, have_LRU_mutex);
2667 + buf_pool_mutex_enter(buf_pool);
2669 buf_pool->LRU_flush_ended = 0;
2670 } else if (buf_pool->LRU_flush_ended > 0) {
2674 buf_pool_mutex_exit(buf_pool);
2675 + if (have_LRU_mutex)
2676 + mutex_exit(&buf_pool->LRU_list_mutex);
2682 buf_pool = buf_pool_from_array(i);
2684 - buf_pool_mutex_enter(buf_pool);
2685 + //buf_pool_mutex_enter(buf_pool);
2686 + mutex_enter(&buf_pool->LRU_list_mutex);
2687 + mutex_enter(&buf_pool->free_list_mutex);
2689 if (!recv_recovery_on
2690 && UT_LIST_GET_LEN(buf_pool->free)
2695 - buf_pool_mutex_exit(buf_pool);
2696 + //buf_pool_mutex_exit(buf_pool);
2697 + mutex_exit(&buf_pool->LRU_list_mutex);
2698 + mutex_exit(&buf_pool->free_list_mutex);
2702 @@ -823,9 +890,10 @@
2706 - ut_ad(buf_pool_mutex_own(buf_pool));
2707 + //ut_ad(buf_pool_mutex_own(buf_pool));
2709 - block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
2710 + mutex_enter(&buf_pool->free_list_mutex);
2711 + block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
2716 ut_ad(!block->page.in_flush_list);
2717 ut_ad(!block->page.in_LRU_list);
2718 ut_a(!buf_page_in_file(&block->page));
2719 - UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
2720 + UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
2722 + mutex_exit(&buf_pool->free_list_mutex);
2724 mutex_enter(&block->mutex);
2727 ut_ad(buf_pool_from_block(block) == buf_pool);
2729 mutex_exit(&block->mutex);
2731 + mutex_exit(&buf_pool->free_list_mutex);
2736 ibool mon_value_was = FALSE;
2737 ibool started_monitor = FALSE;
2739 - buf_pool_mutex_enter(buf_pool);
2740 + //buf_pool_mutex_enter(buf_pool);
2742 if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
2743 + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
2744 @@ -934,7 +1006,7 @@
2746 /* If there is a block in the free list, take it */
2747 block = buf_LRU_get_free_only(buf_pool);
2748 - buf_pool_mutex_exit(buf_pool);
2749 + //buf_pool_mutex_exit(buf_pool);
2752 ut_ad(buf_pool_from_block(block) == buf_pool);
2753 @@ -1034,7 +1106,8 @@
2756 ut_a(buf_pool->LRU_old);
2757 - ut_ad(buf_pool_mutex_own(buf_pool));
2758 + //ut_ad(buf_pool_mutex_own(buf_pool));
2759 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2760 ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
2761 ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
2762 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
2763 @@ -1100,7 +1173,8 @@
2767 - ut_ad(buf_pool_mutex_own(buf_pool));
2768 + //ut_ad(buf_pool_mutex_own(buf_pool));
2769 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2770 ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
2772 /* We first initialize all blocks in the LRU list as old and then use
2773 @@ -1135,13 +1209,14 @@
2776 ut_ad(buf_page_in_file(bpage));
2777 - ut_ad(buf_pool_mutex_own(buf_pool));
2778 + //ut_ad(buf_pool_mutex_own(buf_pool));
2779 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2781 if (buf_page_belongs_to_unzip_LRU(bpage)) {
2782 buf_block_t* block = (buf_block_t*) bpage;
2784 ut_ad(block->in_unzip_LRU_list);
2785 - ut_d(block->in_unzip_LRU_list = FALSE);
2786 + block->in_unzip_LRU_list = FALSE;
2788 UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
2790 @@ -1159,7 +1234,8 @@
2794 - ut_ad(buf_pool_mutex_own(buf_pool));
2795 + //ut_ad(buf_pool_mutex_own(buf_pool));
2796 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2798 ut_a(buf_page_in_file(bpage));
2800 @@ -1236,12 +1312,13 @@
2804 - ut_ad(buf_pool_mutex_own(buf_pool));
2805 + //ut_ad(buf_pool_mutex_own(buf_pool));
2806 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2808 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
2810 ut_ad(!block->in_unzip_LRU_list);
2811 - ut_d(block->in_unzip_LRU_list = TRUE);
2812 + block->in_unzip_LRU_list = TRUE;
2815 UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
2816 @@ -1262,7 +1339,8 @@
2820 - ut_ad(buf_pool_mutex_own(buf_pool));
2821 + //ut_ad(buf_pool_mutex_own(buf_pool));
2822 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2824 ut_a(buf_page_in_file(bpage));
2826 @@ -1313,7 +1391,8 @@
2830 - ut_ad(buf_pool_mutex_own(buf_pool));
2831 + //ut_ad(buf_pool_mutex_own(buf_pool));
2832 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2834 ut_a(buf_page_in_file(bpage));
2835 ut_ad(!bpage->in_LRU_list);
2836 @@ -1392,7 +1471,8 @@
2838 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2840 - ut_ad(buf_pool_mutex_own(buf_pool));
2841 + //ut_ad(buf_pool_mutex_own(buf_pool));
2842 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
2845 buf_pool->stat.n_pages_made_young++;
2846 @@ -1432,17 +1512,18 @@
2849 buf_page_t* bpage, /*!< in: block to be freed */
2850 - ibool zip) /*!< in: TRUE if should remove also the
2851 + ibool zip, /*!< in: TRUE if should remove also the
2852 compressed page of an uncompressed page */
2853 + ibool have_LRU_mutex)
2855 buf_page_t* b = NULL;
2856 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2857 mutex_t* block_mutex = buf_page_get_mutex(bpage);
2859 - ut_ad(buf_pool_mutex_own(buf_pool));
2860 + //ut_ad(buf_pool_mutex_own(buf_pool));
2861 ut_ad(mutex_own(block_mutex));
2862 ut_ad(buf_page_in_file(bpage));
2863 - ut_ad(bpage->in_LRU_list);
2864 + //ut_ad(bpage->in_LRU_list);
2865 ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
2866 #if UNIV_WORD_SIZE == 4
2867 /* On 32-bit systems, there is no padding in buf_page_t. On
2868 @@ -1451,7 +1532,7 @@
2869 UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
2872 - if (!buf_page_can_relocate(bpage)) {
2873 + if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
2875 /* Do not free buffer-fixed or I/O-fixed blocks. */
2876 return(BUF_LRU_NOT_FREED);
2877 @@ -1483,15 +1564,15 @@
2878 If it cannot be allocated (without freeing a block
2879 from the LRU list), refuse to free bpage. */
2881 - buf_pool_mutex_exit_forbid(buf_pool);
2882 - b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
2883 - buf_pool_mutex_exit_allow(buf_pool);
2884 + //buf_pool_mutex_exit_forbid(buf_pool);
2885 + b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
2886 + //buf_pool_mutex_exit_allow(buf_pool);
2888 if (UNIV_UNLIKELY(!b)) {
2889 return(BUF_LRU_CANNOT_RELOCATE);
2892 - memcpy(b, bpage, sizeof *b);
2893 + //memcpy(b, bpage, sizeof *b);
2897 @@ -1502,6 +1583,39 @@
2899 #endif /* UNIV_DEBUG */
2901 + /* not to break latch order, must re-enter block_mutex */
2902 + mutex_exit(block_mutex);
2904 + if (!have_LRU_mutex)
2905 + mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
2906 + rw_lock_x_lock(&buf_pool->page_hash_latch);
2907 + mutex_enter(block_mutex);
2909 + /* recheck states of block */
2910 + if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
2911 + || !buf_page_can_relocate(bpage)) {
2914 + buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
2916 + if (!have_LRU_mutex)
2917 + mutex_exit(&buf_pool->LRU_list_mutex);
2918 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2919 + return(BUF_LRU_NOT_FREED);
2920 + } else if (zip || !bpage->zip.data) {
2921 + if (bpage->oldest_modification)
2923 + } else if (bpage->oldest_modification) {
2924 + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
2925 + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
2931 + memcpy(b, bpage, sizeof *b);
2934 if (buf_LRU_block_remove_hashed_page(bpage, zip)
2935 != BUF_BLOCK_ZIP_FREE) {
2936 ut_a(bpage->buf_fix_count == 0);
2937 @@ -1518,6 +1632,10 @@
2941 + while (prev_b && !prev_b->in_LRU_list) {
2942 + prev_b = UT_LIST_GET_PREV(LRU, prev_b);
2945 b->state = b->oldest_modification
2946 ? BUF_BLOCK_ZIP_DIRTY
2947 : BUF_BLOCK_ZIP_PAGE;
2948 @@ -1610,7 +1728,9 @@
2949 b->io_fix = BUF_IO_READ;
2952 - buf_pool_mutex_exit(buf_pool);
2953 + //buf_pool_mutex_exit(buf_pool);
2954 + mutex_exit(&buf_pool->LRU_list_mutex);
2955 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2956 mutex_exit(block_mutex);
2958 /* Remove possible adaptive hash index on the page.
2959 @@ -1642,7 +1762,9 @@
2960 : BUF_NO_CHECKSUM_MAGIC);
2963 - buf_pool_mutex_enter(buf_pool);
2964 + //buf_pool_mutex_enter(buf_pool);
2965 + if (have_LRU_mutex)
2966 + mutex_enter(&buf_pool->LRU_list_mutex);
2967 mutex_enter(block_mutex);
2970 @@ -1652,13 +1774,17 @@
2971 mutex_exit(&buf_pool->zip_mutex);
2974 - buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
2975 + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
2977 /* The block_mutex should have been released by
2978 buf_LRU_block_remove_hashed_page() when it returns
2979 BUF_BLOCK_ZIP_FREE. */
2980 ut_ad(block_mutex == &buf_pool->zip_mutex);
2981 mutex_enter(block_mutex);
2983 + if (!have_LRU_mutex)
2984 + mutex_exit(&buf_pool->LRU_list_mutex);
2985 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
2988 return(BUF_LRU_FREED);
2989 @@ -1670,13 +1796,14 @@
2991 buf_LRU_block_free_non_file_page(
2992 /*=============================*/
2993 - buf_block_t* block) /*!< in: block, must not contain a file page */
2994 + buf_block_t* block, /*!< in: block, must not contain a file page */
2995 + ibool have_page_hash_mutex)
2998 buf_pool_t* buf_pool = buf_pool_from_block(block);
3001 - ut_ad(buf_pool_mutex_own(buf_pool));
3002 + //ut_ad(buf_pool_mutex_own(buf_pool));
3003 ut_ad(mutex_own(&block->mutex));
3005 switch (buf_block_get_state(block)) {
3006 @@ -1710,18 +1837,21 @@
3008 block->page.zip.data = NULL;
3009 mutex_exit(&block->mutex);
3010 - buf_pool_mutex_exit_forbid(buf_pool);
3011 + //buf_pool_mutex_exit_forbid(buf_pool);
3014 - buf_pool, data, page_zip_get_size(&block->page.zip));
3015 + buf_pool, data, page_zip_get_size(&block->page.zip),
3016 + have_page_hash_mutex);
3018 - buf_pool_mutex_exit_allow(buf_pool);
3019 + //buf_pool_mutex_exit_allow(buf_pool);
3020 mutex_enter(&block->mutex);
3021 page_zip_set_size(&block->page.zip, 0);
3024 - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
3025 + mutex_enter(&buf_pool->free_list_mutex);
3026 + UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
3027 ut_d(block->page.in_free_list = TRUE);
3028 + mutex_exit(&buf_pool->free_list_mutex);
3030 UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
3032 @@ -1751,7 +1881,11 @@
3033 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3036 - ut_ad(buf_pool_mutex_own(buf_pool));
3037 + //ut_ad(buf_pool_mutex_own(buf_pool));
3038 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3039 +#ifdef UNIV_SYNC_DEBUG
3040 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
3042 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3044 ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
3045 @@ -1859,7 +1993,9 @@
3047 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3048 mutex_exit(buf_page_get_mutex(bpage));
3049 - buf_pool_mutex_exit(buf_pool);
3050 + //buf_pool_mutex_exit(buf_pool);
3051 + mutex_exit(&buf_pool->LRU_list_mutex);
3052 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3056 @@ -1880,17 +2016,17 @@
3057 ut_a(bpage->zip.data);
3058 ut_a(buf_page_get_zip_size(bpage));
3060 - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
3061 + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
3063 mutex_exit(&buf_pool->zip_mutex);
3064 - buf_pool_mutex_exit_forbid(buf_pool);
3065 + //buf_pool_mutex_exit_forbid(buf_pool);
3068 buf_pool, bpage->zip.data,
3069 - page_zip_get_size(&bpage->zip));
3070 + page_zip_get_size(&bpage->zip), TRUE);
3072 - buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
3073 - buf_pool_mutex_exit_allow(buf_pool);
3074 + buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
3075 + //buf_pool_mutex_exit_allow(buf_pool);
3077 UNIV_MEM_UNDESC(bpage);
3078 return(BUF_BLOCK_ZIP_FREE);
3079 @@ -1913,13 +2049,13 @@
3080 ut_ad(!bpage->in_flush_list);
3081 ut_ad(!bpage->in_LRU_list);
3082 mutex_exit(&((buf_block_t*) bpage)->mutex);
3083 - buf_pool_mutex_exit_forbid(buf_pool);
3084 + //buf_pool_mutex_exit_forbid(buf_pool);
3088 - page_zip_get_size(&bpage->zip));
3089 + page_zip_get_size(&bpage->zip), TRUE);
3091 - buf_pool_mutex_exit_allow(buf_pool);
3092 + //buf_pool_mutex_exit_allow(buf_pool);
3093 mutex_enter(&((buf_block_t*) bpage)->mutex);
3094 page_zip_set_size(&bpage->zip, 0);
3096 @@ -1945,18 +2081,19 @@
3098 buf_LRU_block_free_hashed_page(
3099 /*===========================*/
3100 - buf_block_t* block) /*!< in: block, must contain a file page and
3101 + buf_block_t* block, /*!< in: block, must contain a file page and
3102 be in a state where it can be freed */
3103 + ibool have_page_hash_mutex)
3106 - buf_pool_t* buf_pool = buf_pool_from_block(block);
3107 - ut_ad(buf_pool_mutex_own(buf_pool));
3108 + //buf_pool_t* buf_pool = buf_pool_from_block(block);
3109 + //ut_ad(buf_pool_mutex_own(buf_pool));
3111 ut_ad(mutex_own(&block->mutex));
3113 buf_block_set_state(block, BUF_BLOCK_MEMORY);
3115 - buf_LRU_block_free_non_file_page(block);
3116 + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
3119 /**********************************************************************//**
3120 @@ -1983,7 +2120,8 @@
3124 - buf_pool_mutex_enter(buf_pool);
3125 + //buf_pool_mutex_enter(buf_pool);
3126 + mutex_enter(&buf_pool->LRU_list_mutex);
3128 if (ratio != buf_pool->LRU_old_ratio) {
3129 buf_pool->LRU_old_ratio = ratio;
3130 @@ -1995,7 +2133,8 @@
3134 - buf_pool_mutex_exit(buf_pool);
3135 + //buf_pool_mutex_exit(buf_pool);
3136 + mutex_exit(&buf_pool->LRU_list_mutex);
3138 buf_pool->LRU_old_ratio = ratio;
3140 @@ -2100,7 +2239,8 @@
3144 - buf_pool_mutex_enter(buf_pool);
3145 + //buf_pool_mutex_enter(buf_pool);
3146 + mutex_enter(&buf_pool->LRU_list_mutex);
3148 if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
3150 @@ -2161,16 +2301,22 @@
3152 ut_a(buf_pool->LRU_old_len == old_len);
3154 - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
3155 + mutex_exit(&buf_pool->LRU_list_mutex);
3156 + mutex_enter(&buf_pool->free_list_mutex);
3158 + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
3159 ut_ad(ut_list_node_313->in_free_list));
3161 for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
3163 - bpage = UT_LIST_GET_NEXT(list, bpage)) {
3164 + bpage = UT_LIST_GET_NEXT(free, bpage)) {
3166 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
3169 + mutex_exit(&buf_pool->free_list_mutex);
3170 + mutex_enter(&buf_pool->LRU_list_mutex);
3172 UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
3173 ut_ad(ut_list_node_313->in_unzip_LRU_list
3174 && ut_list_node_313->page.in_LRU_list));
3175 @@ -2184,7 +2330,8 @@
3176 ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
3179 - buf_pool_mutex_exit(buf_pool);
3180 + //buf_pool_mutex_exit(buf_pool);
3181 + mutex_exit(&buf_pool->LRU_list_mutex);
3184 /**********************************************************************//**
3185 @@ -2220,7 +2367,8 @@
3186 const buf_page_t* bpage;
3189 - buf_pool_mutex_enter(buf_pool);
3190 + //buf_pool_mutex_enter(buf_pool);
3191 + mutex_enter(&buf_pool->LRU_list_mutex);
3193 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
3195 @@ -2277,7 +2425,8 @@
3196 bpage = UT_LIST_GET_NEXT(LRU, bpage);
3199 - buf_pool_mutex_exit(buf_pool);
3200 + //buf_pool_mutex_exit(buf_pool);
3201 + mutex_exit(&buf_pool->LRU_list_mutex);
3204 /**********************************************************************//**
3205 diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
3206 --- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
3207 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
3212 + buf_pool_mutex_exit(buf_pool);
3214 /* Check that almost all pages in the area have been accessed; if
3215 offset == low, the accesses must be in a descending order, otherwise,
3220 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3221 for (i = low; i < high; i++) {
3222 bpage = buf_page_hash_get(buf_pool, space, i);
3226 if (fail_count > threshold) {
3227 /* Too many failures: return */
3228 - buf_pool_mutex_exit(buf_pool);
3229 + //buf_pool_mutex_exit(buf_pool);
3230 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3235 bpage = buf_page_hash_get(buf_pool, space, offset);
3237 if (bpage == NULL) {
3238 - buf_pool_mutex_exit(buf_pool);
3239 + //buf_pool_mutex_exit(buf_pool);
3240 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3245 pred_offset = fil_page_get_prev(frame);
3246 succ_offset = fil_page_get_next(frame);
3248 - buf_pool_mutex_exit(buf_pool);
3249 + //buf_pool_mutex_exit(buf_pool);
3250 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3252 if ((offset == low) && (succ_offset == offset + 1)) {
3254 diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
3255 --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
3256 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
3257 @@ -264,6 +264,10 @@
3258 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3259 {&buf_pool_mutex_key, "buf_pool_mutex", 0},
3260 {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
3261 + {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
3262 + {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
3263 + {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
3264 + {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
3265 {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
3266 {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
3267 {&dict_sys_mutex_key, "dict_sys_mutex", 0},
3269 {&archive_lock_key, "archive_lock", 0},
3270 # endif /* UNIV_LOG_ARCHIVE */
3271 {&btr_search_latch_key, "btr_search_latch", 0},
3272 + {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
3273 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
3274 {&buf_block_lock_key, "buf_block_lock", 0},
3275 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
3276 diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
3277 --- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
3278 +++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
3279 @@ -1565,7 +1565,8 @@
3281 buf_pool = buf_pool_from_array(i);
3283 - buf_pool_mutex_enter(buf_pool);
3284 + //buf_pool_mutex_enter(buf_pool);
3285 + mutex_enter(&buf_pool->zip_free_mutex);
3287 for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
3288 buf_buddy_stat_t* buddy_stat;
3289 @@ -1595,7 +1596,8 @@
3293 - buf_pool_mutex_exit(buf_pool);
3294 + //buf_pool_mutex_exit(buf_pool);
3295 + mutex_exit(&buf_pool->zip_free_mutex);
3299 diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
3300 --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
3301 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
3302 @@ -3766,9 +3766,11 @@
3303 ulint fold = buf_page_address_fold(space, page_no);
3304 buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3306 - buf_pool_mutex_enter(buf_pool);
3307 + //buf_pool_mutex_enter(buf_pool);
3308 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3309 bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3310 - buf_pool_mutex_exit(buf_pool);
3311 + //buf_pool_mutex_exit(buf_pool);
3312 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3314 if (UNIV_LIKELY_NULL(bpage)) {
3315 /* A buffer pool watch has been set or the
3316 diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
3317 --- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
3318 +++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
3320 buf_pool_t* buf_pool,
3321 /*!< buffer pool in which the block resides */
3322 ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3323 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3324 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3325 TRUE if storage was allocated from the LRU list
3326 and buf_pool->mutex was temporarily released,
3327 or NULL if the LRU list should not be used */
3328 + ibool have_page_hash_mutex)
3329 __attribute__((malloc));
3331 /**********************************************************************//**
3333 /*!< buffer pool in which the block resides */
3334 void* buf, /*!< in: block to be freed, must not be
3335 pointed to by the buffer pool */
3336 - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
3337 + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
3338 + ibool have_page_hash_mutex)
3339 __attribute__((nonnull));
3342 diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
3343 --- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
3344 +++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
3346 /*!< in: buffer pool in which the page resides */
3347 ulint i, /*!< in: index of buf_pool->zip_free[],
3348 or BUF_BUDDY_SIZES */
3349 - ibool* lru) /*!< in: pointer to a variable that will be assigned
3350 + ibool* lru, /*!< in: pointer to a variable that will be assigned
3351 TRUE if storage was allocated from the LRU list
3352 and buf_pool->mutex was temporarily released,
3353 or NULL if the LRU list should not be used */
3354 + ibool have_page_hash_mutex)
3355 __attribute__((malloc));
3357 /**********************************************************************//**
3359 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3360 void* buf, /*!< in: block to be freed, must not be
3361 pointed to by the buffer pool */
3362 - ulint i) /*!< in: index of buf_pool->zip_free[],
3363 + ulint i, /*!< in: index of buf_pool->zip_free[],
3364 or BUF_BUDDY_SIZES */
3365 + ibool have_page_hash_mutex)
3366 __attribute__((nonnull));
3368 /**********************************************************************//**
3369 @@ -102,16 +104,17 @@
3371 ulint size, /*!< in: block size, up to
3373 - ibool* lru) /*!< in: pointer to a variable
3374 + ibool* lru, /*!< in: pointer to a variable
3375 that will be assigned TRUE if
3376 storage was allocated from the
3377 LRU list and buf_pool->mutex was
3378 temporarily released, or NULL if
3379 the LRU list should not be used */
3380 + ibool have_page_hash_mutex)
3382 - ut_ad(buf_pool_mutex_own(buf_pool));
3383 + //ut_ad(buf_pool_mutex_own(buf_pool));
3385 - return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
3386 + return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
3389 /**********************************************************************//**
3390 @@ -123,12 +126,25 @@
3391 buf_pool_t* buf_pool, /*!< in: buffer pool instance */
3392 void* buf, /*!< in: block to be freed, must not be
3393 pointed to by the buffer pool */
3394 - ulint size) /*!< in: block size, up to
3395 + ulint size, /*!< in: block size, up to
3397 + ibool have_page_hash_mutex)
3399 - ut_ad(buf_pool_mutex_own(buf_pool));
3400 + //ut_ad(buf_pool_mutex_own(buf_pool));
3402 + if (!have_page_hash_mutex) {
3403 + mutex_enter(&buf_pool->LRU_list_mutex);
3404 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3407 - buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
3408 + mutex_enter(&buf_pool->zip_free_mutex);
3409 + buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
3410 + mutex_exit(&buf_pool->zip_free_mutex);
3412 + if (!have_page_hash_mutex) {
3413 + mutex_exit(&buf_pool->LRU_list_mutex);
3414 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3418 #ifdef UNIV_MATERIALIZE
3419 diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
3420 --- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
3421 +++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
3422 @@ -203,6 +203,20 @@
3423 /*==========================*/
3425 /********************************************************************//**
3429 +buf_pool_page_hash_x_lock_all(void);
3430 +/*================================*/
3432 +/********************************************************************//**
3436 +buf_pool_page_hash_x_unlock_all(void);
3437 +/*==================================*/
3439 +/********************************************************************//**
3440 Creates the buffer pool.
3441 @return own: buf_pool object, NULL if not enough memory or error */
3443 @@ -832,6 +846,15 @@
3444 const buf_page_t* bpage) /*!< in: pointer to control block */
3445 __attribute__((pure));
3447 +/*************************************************************************
3448 +Gets the mutex of a block and enter the mutex with consistency. */
3451 +buf_page_get_mutex_enter(
3452 +/*=========================*/
3453 + const buf_page_t* bpage) /*!< in: pointer to control block */
3454 + __attribute__((pure));
3456 /*********************************************************************//**
3457 Get the flush type of a page.
3458 @return flush type */
3459 @@ -1313,7 +1336,7 @@
3460 All these are protected by buf_pool->mutex. */
3463 - UT_LIST_NODE_T(buf_page_t) list;
3464 + /* UT_LIST_NODE_T(buf_page_t) list; */
3465 /*!< based on state, this is a
3466 list node, protected either by
3467 buf_pool->mutex or by
3468 @@ -1341,6 +1364,10 @@
3469 BUF_BLOCK_REMOVE_HASH or
3470 BUF_BLOCK_READY_IN_USE. */
3472 + /* resplit for optimistic use */
3473 + UT_LIST_NODE_T(buf_page_t) free;
3474 + UT_LIST_NODE_T(buf_page_t) flush_list;
3475 + UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
3477 ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
3478 when buf_pool->flush_list_mutex is
3479 @@ -1433,11 +1460,11 @@
3480 a block is in the unzip_LRU list
3481 if page.state == BUF_BLOCK_FILE_PAGE
3482 and page.zip.data != NULL */
3484 +//#ifdef UNIV_DEBUG
3485 ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
3486 decompressed LRU list;
3487 used in debugging */
3488 -#endif /* UNIV_DEBUG */
3489 +//#endif /* UNIV_DEBUG */
3490 mutex_t mutex; /*!< mutex protecting this block:
3491 state (also protected by the buffer
3492 pool mutex), io_fix, buf_fix_count,
3493 @@ -1612,6 +1639,11 @@
3494 pool instance, protects compressed
3495 only pages (of type buf_page_t, not
3497 + mutex_t LRU_list_mutex;
3498 + rw_lock_t page_hash_latch;
3499 + mutex_t free_list_mutex;
3500 + mutex_t zip_free_mutex;
3501 + mutex_t zip_hash_mutex;
3502 ulint instance_no; /*!< Array index of this buffer
3504 ulint old_pool_size; /*!< Old pool size in bytes */
3505 diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
3506 --- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
3507 +++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
3509 case BUF_BLOCK_ZIP_FREE:
3510 /* This is a free page in buf_pool->zip_free[].
3511 Such pages should only be accessed by the buddy allocator. */
3513 + /* ut_error; */ /* optimistic */
3515 case BUF_BLOCK_ZIP_PAGE:
3516 case BUF_BLOCK_ZIP_DIRTY:
3517 @@ -317,9 +317,14 @@
3519 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3521 + if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
3522 + /* TODO: this code is the interim. should be confirmed later. */
3523 + return(&buf_pool->zip_mutex);
3526 switch (buf_page_get_state(bpage)) {
3527 case BUF_BLOCK_ZIP_FREE:
3529 + /* ut_error; */ /* optimistic */
3531 case BUF_BLOCK_ZIP_PAGE:
3532 case BUF_BLOCK_ZIP_DIRTY:
3533 @@ -329,6 +334,28 @@
3537 +/*************************************************************************
3538 +Gets the mutex of a block and enter the mutex with consistency. */
3541 +buf_page_get_mutex_enter(
3542 +/*=========================*/
3543 + const buf_page_t* bpage) /*!< in: pointer to control block */
3545 + mutex_t* block_mutex;
3548 + block_mutex = buf_page_get_mutex(bpage);
3550 + return block_mutex;
3552 + mutex_enter(block_mutex);
3553 + if (block_mutex == buf_page_get_mutex(bpage))
3554 + return block_mutex;
3555 + mutex_exit(block_mutex);
3559 /*********************************************************************//**
3560 Get the flush type of a page.
3561 @return flush type */
3563 enum buf_io_fix io_fix) /*!< in: io_fix state */
3566 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3567 - ut_ad(buf_pool_mutex_own(buf_pool));
3568 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3569 + //ut_ad(buf_pool_mutex_own(buf_pool));
3571 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3573 @@ -456,14 +483,14 @@
3574 const buf_page_t* bpage) /*!< control block being relocated */
3577 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3578 - ut_ad(buf_pool_mutex_own(buf_pool));
3579 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3580 + //ut_ad(buf_pool_mutex_own(buf_pool));
3582 ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3583 ut_ad(buf_page_in_file(bpage));
3584 - ut_ad(bpage->in_LRU_list);
3585 + //ut_ad(bpage->in_LRU_list);
3587 - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
3588 + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
3589 && bpage->buf_fix_count == 0);
3593 const buf_page_t* bpage) /*!< in: control block */
3596 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3597 - ut_ad(buf_pool_mutex_own(buf_pool));
3598 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3599 + //ut_ad(buf_pool_mutex_own(buf_pool));
3601 ut_ad(buf_page_in_file(bpage));
3604 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3605 #endif /* UNIV_DEBUG */
3606 ut_a(buf_page_in_file(bpage));
3607 - ut_ad(buf_pool_mutex_own(buf_pool));
3608 + //ut_ad(buf_pool_mutex_own(buf_pool));
3609 + ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
3610 ut_ad(bpage->in_LRU_list);
3612 #ifdef UNIV_LRU_DEBUG
3613 @@ -545,9 +573,10 @@
3614 ulint time_ms) /*!< in: ut_time_ms() */
3617 - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3618 - ut_ad(buf_pool_mutex_own(buf_pool));
3619 + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3620 + //ut_ad(buf_pool_mutex_own(buf_pool));
3622 + ut_ad(mutex_own(buf_page_get_mutex(bpage)));
3623 ut_a(buf_page_in_file(bpage));
3625 if (!bpage->access_time) {
3626 @@ -761,19 +790,19 @@
3628 buf_block_t* block) /*!< in, own: block to be freed */
3630 - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3631 + //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3633 - buf_pool_mutex_enter(buf_pool);
3634 + //buf_pool_mutex_enter(buf_pool);
3636 mutex_enter(&block->mutex);
3638 ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
3640 - buf_LRU_block_free_non_file_page(block);
3641 + buf_LRU_block_free_non_file_page(block, FALSE);
3643 mutex_exit(&block->mutex);
3645 - buf_pool_mutex_exit(buf_pool);
3646 + //buf_pool_mutex_exit(buf_pool);
3648 #endif /* !UNIV_HOTBACKUP */
3650 @@ -821,17 +850,17 @@
3654 - mutex_t* block_mutex = buf_page_get_mutex(bpage);
3656 - mutex_enter(block_mutex);
3657 + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
3659 - if (buf_page_in_file(bpage)) {
3660 + if (block_mutex && buf_page_in_file(bpage)) {
3661 lsn = bpage->newest_modification;
3666 - mutex_exit(block_mutex);
3667 + if (block_mutex) {
3668 + mutex_exit(block_mutex);
3674 #ifdef UNIV_SYNC_DEBUG
3675 buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
3677 - ut_ad((buf_pool_mutex_own(buf_pool)
3678 + ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
3679 && (block->page.buf_fix_count == 0))
3680 || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
3681 #endif /* UNIV_SYNC_DEBUG */
3682 @@ -979,7 +1008,11 @@
3686 - ut_ad(buf_pool_mutex_own(buf_pool));
3687 + //ut_ad(buf_pool_mutex_own(buf_pool));
3688 +#ifdef UNIV_SYNC_DEBUG
3689 + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
3690 + || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
3692 ut_ad(fold == buf_page_address_fold(space, offset));
3694 /* Look for the page in the hash table */
3695 @@ -1064,11 +1097,13 @@
3696 const buf_page_t* bpage;
3697 buf_pool_t* buf_pool = buf_pool_get(space, offset);
3699 - buf_pool_mutex_enter(buf_pool);
3700 + //buf_pool_mutex_enter(buf_pool);
3701 + rw_lock_s_lock(&buf_pool->page_hash_latch);
3703 bpage = buf_page_hash_get(buf_pool, space, offset);
3705 - buf_pool_mutex_exit(buf_pool);
3706 + //buf_pool_mutex_exit(buf_pool);
3707 + rw_lock_s_unlock(&buf_pool->page_hash_latch);
3709 return(bpage != NULL);
3711 @@ -1196,4 +1231,38 @@
3712 buf_pool_mutex_exit(buf_pool);
3716 +/********************************************************************//**
3720 +buf_pool_page_hash_x_lock_all(void)
3721 +/*===============================*/
3725 + for (i = 0; i < srv_buf_pool_instances; i++) {
3726 + buf_pool_t* buf_pool;
3728 + buf_pool = buf_pool_from_array(i);
3729 + rw_lock_x_lock(&buf_pool->page_hash_latch);
3733 +/********************************************************************//**
3737 +buf_pool_page_hash_x_unlock_all(void)
3738 +/*=================================*/
3742 + for (i = 0; i < srv_buf_pool_instances; i++) {
3743 + buf_pool_t* buf_pool;
3745 + buf_pool = buf_pool_from_array(i);
3746 + rw_lock_x_unlock(&buf_pool->page_hash_latch);
3749 #endif /* !UNIV_HOTBACKUP */
3750 diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
3751 --- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
3752 +++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
3756 buf_page_t* bpage, /*!< in: block to be freed */
3757 - ibool zip) /*!< in: TRUE if should remove also the
3758 + ibool zip, /*!< in: TRUE if should remove also the
3759 compressed page of an uncompressed page */
3760 + ibool have_LRU_mutex)
3761 __attribute__((nonnull));
3762 /******************************************************************//**
3763 Try to free a replaceable block.
3766 buf_LRU_block_free_non_file_page(
3767 /*=============================*/
3768 - buf_block_t* block); /*!< in: block, must not contain a file page */
3769 + buf_block_t* block, /*!< in: block, must not contain a file page */
3770 + ibool have_page_hash_mutex);
3771 /******************************************************************//**
3772 Adds a block to the LRU list. */
3774 diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
3775 --- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
3776 +++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
3778 extern mysql_pfs_key_t archive_lock_key;
3779 # endif /* UNIV_LOG_ARCHIVE */
3780 extern mysql_pfs_key_t btr_search_latch_key;
3781 +extern mysql_pfs_key_t buf_pool_page_hash_key;
3782 extern mysql_pfs_key_t buf_block_lock_key;
3783 # ifdef UNIV_SYNC_DEBUG
3784 extern mysql_pfs_key_t buf_block_debug_latch_key;
3785 diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
3786 --- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
3787 +++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
3789 extern mysql_pfs_key_t buffer_block_mutex_key;
3790 extern mysql_pfs_key_t buf_pool_mutex_key;
3791 extern mysql_pfs_key_t buf_pool_zip_mutex_key;
3792 +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
3793 +extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
3794 +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
3795 +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
3796 extern mysql_pfs_key_t cache_last_read_mutex_key;
3797 extern mysql_pfs_key_t dict_foreign_err_mutex_key;
3798 extern mysql_pfs_key_t dict_sys_mutex_key;
3800 #define SYNC_TRX_LOCK_HEAP 298
3801 #define SYNC_TRX_SYS_HEADER 290
3802 #define SYNC_LOG 170
3803 -#define SYNC_LOG_FLUSH_ORDER 147
3804 +#define SYNC_LOG_FLUSH_ORDER 156
3805 #define SYNC_RECV 168
3806 #define SYNC_WORK_QUEUE 162
3807 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
3808 @@ -670,8 +674,13 @@
3809 SYNC_SEARCH_SYS, as memory allocation
3810 can call routines there! Otherwise
3811 the level is SYNC_MEM_HASH. */
3812 +#define SYNC_BUF_LRU_LIST 158
3813 +#define SYNC_BUF_PAGE_HASH 157
3814 +#define SYNC_BUF_BLOCK 155 /* Block mutex */
3815 +#define SYNC_BUF_FREE_LIST 153
3816 +#define SYNC_BUF_ZIP_FREE 152
3817 +#define SYNC_BUF_ZIP_HASH 151
3818 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
3819 -#define SYNC_BUF_BLOCK 146 /* Block mutex */
3820 #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
3821 #define SYNC_DOUBLEWRITE 140
3822 #define SYNC_ANY_LATCH 135
3824 os_fast_mutex; /*!< We use this OS mutex in place of lock_word
3825 when atomic operations are not enabled */
3827 - ulint waiters; /*!< This ulint is set to 1 if there are (or
3828 + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
3829 may be) threads waiting in the global wait
3830 array for this mutex to be released.
3831 Otherwise, this is 0. */
3832 diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
3833 --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
3834 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
3835 @@ -3099,7 +3099,7 @@
3836 level += log_sys->max_checkpoint_age
3837 - (lsn - oldest_modification);
3839 - bpage = UT_LIST_GET_NEXT(list, bpage);
3840 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3844 @@ -3185,7 +3185,7 @@
3848 - bpage = UT_LIST_GET_NEXT(list, bpage);
3849 + bpage = UT_LIST_GET_NEXT(flush_list, bpage);
3853 diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
3854 --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
3855 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
3857 mutex->lock_word = 0;
3859 mutex->event = os_event_create(NULL);
3860 - mutex_set_waiters(mutex, 0);
3861 + mutex->waiters = 0;
3863 mutex->magic_n = MUTEX_MAGIC_N;
3864 #endif /* UNIV_DEBUG */
3865 @@ -463,6 +463,15 @@
3866 mutex_t* mutex, /*!< in: mutex */
3867 ulint n) /*!< in: value to set */
3869 +#ifdef INNODB_RW_LOCKS_USE_ATOMICS
3873 + os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
3875 + os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
3878 volatile ulint* ptr; /* declared volatile to ensure that
3879 the value is stored to memory */
3883 *ptr = n; /* Here we assume that the write of a single
3884 word in memory is atomic */
3888 /******************************************************************//**
3889 @@ -1185,7 +1195,12 @@
3893 + case SYNC_BUF_LRU_LIST:
3894 case SYNC_BUF_FLUSH_LIST:
3895 + case SYNC_BUF_PAGE_HASH:
3896 + case SYNC_BUF_FREE_LIST:
3897 + case SYNC_BUF_ZIP_FREE:
3898 + case SYNC_BUF_ZIP_HASH:
3900 /* We can have multiple mutexes of this type therefore we
3901 can only check whether the greater than condition holds. */
3902 @@ -1203,7 +1218,8 @@
3903 buffer block (block->mutex or buf_pool->zip_mutex). */
3904 if (!sync_thread_levels_g(array, level, FALSE)) {
3905 ut_a(sync_thread_levels_g(array, level - 1, TRUE));
3906 - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
3907 + /* the exact rule is not fixed yet, for now */
3908 + //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));