# name : innodb_split_buf_pool_mutex.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c --- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900 @@ -3935,7 +3935,8 @@ mtr_commit(mtr); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); mutex_enter(&block->mutex); /* Only free the block if it is still allocated to @@ -3946,17 +3947,22 @@ && buf_block_get_space(block) == space && buf_block_get_page_no(block) == page_no) { - if (buf_LRU_free_block(&block->page, all, NULL) + if (buf_LRU_free_block(&block->page, all, NULL, TRUE) != BUF_LRU_FREED - && all && block->page.zip.data) { + && all && block->page.zip.data + /* Now, buf_LRU_free_block() may release mutex temporarily */ + && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE + && buf_block_get_space(block) == space + && buf_block_get_page_no(block) == page_no) { /* Attempt to deallocate the uncompressed page if the whole block cannot be deallocted. */ - buf_LRU_free_block(&block->page, FALSE, NULL); + buf_LRU_free_block(&block->page, FALSE, NULL, TRUE); } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); mutex_exit(&block->mutex); } diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c --- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900 +++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900 @@ -1943,7 +1943,7 @@ rec_offs_init(offsets_); rw_lock_x_lock(&btr_search_latch); - buf_pool_mutex_enter_all(); + buf_pool_page_hash_x_lock_all(); cell_count = hash_get_n_cells(btr_search_sys->hash_index); @@ -1951,11 +1951,11 @@ /* We release btr_search_latch every once in a while to give other queries a chance to run. */ if ((i != 0) && ((i % chunk_size) == 0)) { - buf_pool_mutex_exit_all(); + buf_pool_page_hash_x_unlock_all(); rw_lock_x_unlock(&btr_search_latch); os_thread_yield(); rw_lock_x_lock(&btr_search_latch); - buf_pool_mutex_enter_all(); + buf_pool_page_hash_x_lock_all(); } node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node; @@ -2066,11 +2066,11 @@ /* We release btr_search_latch every once in a while to give other queries a chance to run. */ if (i != 0) { - buf_pool_mutex_exit_all(); + buf_pool_page_hash_x_unlock_all(); rw_lock_x_unlock(&btr_search_latch); os_thread_yield(); rw_lock_x_lock(&btr_search_latch); - buf_pool_mutex_enter_all(); + buf_pool_page_hash_x_lock_all(); } if (!ha_validate(btr_search_sys->hash_index, i, end_index)) { @@ -2078,7 +2078,7 @@ } } - buf_pool_mutex_exit_all(); + buf_pool_page_hash_x_unlock_all(); rw_lock_x_unlock(&btr_search_latch); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c --- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900 +++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900 @@ -73,10 +73,11 @@ if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i); #endif /* UNIV_DEBUG_VALGRIND */ - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->zip_free_mutex)); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); ut_ad(buf_pool->zip_free[i].start != bpage); - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage); + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage); #ifdef UNIV_DEBUG_VALGRIND if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i); @@ -96,8 +97,8 @@ buf_pool->zip_free[] */ { #ifdef UNIV_DEBUG_VALGRIND - buf_page_t* prev = UT_LIST_GET_PREV(list, bpage); - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage); + buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage); + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage); if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i); if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i); @@ -106,9 +107,10 @@ ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE); #endif /* UNIV_DEBUG_VALGRIND */ - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->zip_free_mutex)); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); - UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage); + UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage); #ifdef UNIV_DEBUG_VALGRIND if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i); @@ -128,12 +130,13 @@ { buf_page_t* bpage; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->zip_free_mutex)); ut_a(i < BUF_BUDDY_SIZES); #ifndef UNIV_DEBUG_VALGRIND /* Valgrind would complain about accessing free memory. */ - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i], + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], ut_ad(buf_page_get_state(ut_list_node_313) == BUF_BLOCK_ZIP_FREE))); #endif /* !UNIV_DEBUG_VALGRIND */ @@ -177,16 +180,19 @@ buf_buddy_block_free( /*=================*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - void* buf) /*!< in: buffer frame to deallocate */ + void* buf, /*!< in: buffer frame to deallocate */ + ibool have_page_hash_mutex) { const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf); buf_page_t* bpage; buf_block_t* block; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE)); + mutex_enter(&buf_pool->zip_hash_mutex); + HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage, ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY && bpage->in_zip_hash && !bpage->in_page_hash), @@ -198,12 +204,14 @@ ut_d(bpage->in_zip_hash = FALSE); HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage); + mutex_exit(&buf_pool->zip_hash_mutex); + ut_d(memset(buf, 0, UNIV_PAGE_SIZE)); UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE); block = (buf_block_t*) bpage; mutex_enter(&block->mutex); - buf_LRU_block_free_non_file_page(block); + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex); mutex_exit(&block->mutex); ut_ad(buf_pool->buddy_n_frames > 0); @@ -220,7 +228,7 @@ { buf_pool_t* buf_pool = buf_pool_from_block(block); const ulint fold = BUF_POOL_ZIP_FOLD(block); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE); @@ -232,7 +240,10 @@ ut_ad(!block->page.in_page_hash); ut_ad(!block->page.in_zip_hash); ut_d(block->page.in_zip_hash = TRUE); + + mutex_enter(&buf_pool->zip_hash_mutex); HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page); + mutex_exit(&buf_pool->zip_hash_mutex); ut_d(buf_pool->buddy_n_frames++); } @@ -268,7 +279,7 @@ bpage->state = BUF_BLOCK_ZIP_FREE; #ifndef UNIV_DEBUG_VALGRIND /* Valgrind would complain about accessing free memory. */ - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i], + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], ut_ad(buf_page_get_state( ut_list_node_313) == BUF_BLOCK_ZIP_FREE))); @@ -291,25 +302,29 @@ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ ulint i, /*!< in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ - ibool* lru) /*!< in: pointer to a variable that + ibool* lru, /*!< in: pointer to a variable that will be assigned TRUE if storage was allocated from the LRU list and buf_pool->mutex was temporarily released, or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) { buf_block_t* block; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); if (i < BUF_BUDDY_SIZES) { /* Try to allocate from the buddy system. */ + mutex_enter(&buf_pool->zip_free_mutex); block = buf_buddy_alloc_zip(buf_pool, i); if (block) { goto func_exit; } + mutex_exit(&buf_pool->zip_free_mutex); } /* Try allocating from the buf_pool->free list. */ @@ -326,19 +341,30 @@ } /* Try replacing an uncompressed page in the buffer pool. */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + if (have_page_hash_mutex) { + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } block = buf_LRU_get_free_block(buf_pool, 0); *lru = TRUE; - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + if (have_page_hash_mutex) { + rw_lock_x_lock(&buf_pool->page_hash_latch); + } alloc_big: buf_buddy_block_register(block); + mutex_enter(&buf_pool->zip_free_mutex); block = buf_buddy_alloc_from( buf_pool, block->frame, i, BUF_BUDDY_SIZES); func_exit: buf_pool->buddy_stat[i].used++; + mutex_exit(&buf_pool->zip_free_mutex); + return(block); } @@ -355,7 +381,10 @@ buf_page_t* b; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)); +#endif switch (buf_page_get_state(bpage)) { case BUF_BLOCK_ZIP_FREE: @@ -364,7 +393,7 @@ case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: - ut_error; + /* ut_error; */ /* optimistic */ case BUF_BLOCK_ZIP_DIRTY: /* Cannot relocate dirty pages. */ return(FALSE); @@ -374,9 +403,18 @@ } mutex_enter(&buf_pool->zip_mutex); + mutex_enter(&buf_pool->zip_free_mutex); if (!buf_page_can_relocate(bpage)) { mutex_exit(&buf_pool->zip_mutex); + mutex_exit(&buf_pool->zip_free_mutex); + return(FALSE); + } + + if (bpage != buf_page_hash_get(buf_pool, + bpage->space, bpage->offset)) { + mutex_exit(&buf_pool->zip_mutex); + mutex_exit(&buf_pool->zip_free_mutex); return(FALSE); } @@ -384,18 +422,19 @@ ut_d(bpage->state = BUF_BLOCK_ZIP_FREE); /* relocate buf_pool->zip_clean */ - b = UT_LIST_GET_PREV(list, dpage); - UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage); + b = UT_LIST_GET_PREV(zip_list, dpage); + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage); if (b) { - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage); + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage); } else { - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage); + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage); } UNIV_MEM_INVALID(bpage, sizeof *bpage); mutex_exit(&buf_pool->zip_mutex); + mutex_exit(&buf_pool->zip_free_mutex); return(TRUE); } @@ -409,14 +448,16 @@ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ void* src, /*!< in: block to relocate */ void* dst, /*!< in: free block to relocate to */ - ulint i) /*!< in: index of + ulint i, /*!< in: index of buf_pool->zip_free[] */ + ibool have_page_hash_mutex) { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; ullint usec = ut_time_us(NULL); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->zip_free_mutex)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); ut_ad(!ut_align_offset(src, size)); ut_ad(!ut_align_offset(dst, size)); @@ -437,6 +478,13 @@ if (size >= PAGE_ZIP_MIN_SIZE) { /* This is a compressed page. */ mutex_t* mutex; + ulint space, page_no; + + if (!have_page_hash_mutex) { + mutex_exit(&buf_pool->zip_free_mutex); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); + } /* The src block may be split into smaller blocks, some of which may be free. Thus, the @@ -446,9 +494,9 @@ pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ - ulint space = mach_read_from_4( + space = mach_read_from_4( (const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint page_no = mach_read_from_4( + page_no = mach_read_from_4( (const byte*) src + FIL_PAGE_OFFSET); /* Suppress Valgrind warnings about conditional jump on uninitialized value. */ @@ -462,6 +510,11 @@ added to buf_pool->page_hash yet. Obviously, it cannot be relocated. */ + if (!have_page_hash_mutex) { + mutex_enter(&buf_pool->zip_free_mutex); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } return(FALSE); } @@ -473,18 +526,27 @@ For the sake of simplicity, give up. */ ut_ad(page_zip_get_size(&bpage->zip) < size); + if (!have_page_hash_mutex) { + mutex_enter(&buf_pool->zip_free_mutex); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } return(FALSE); } + /* To keep latch order */ + if (have_page_hash_mutex) + mutex_exit(&buf_pool->zip_free_mutex); + /* The block must have been allocated, but it may contain uninitialized data. */ UNIV_MEM_ASSERT_W(src, size); - mutex = buf_page_get_mutex(bpage); + mutex = buf_page_get_mutex_enter(bpage); - mutex_enter(mutex); + mutex_enter(&buf_pool->zip_free_mutex); - if (buf_page_can_relocate(bpage)) { + if (mutex && buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ ut_a(bpage->zip.data == src); memcpy(dst, src, size); @@ -499,10 +561,22 @@ buddy_stat->relocated_usec += ut_time_us(NULL) - usec; } + + if (!have_page_hash_mutex) { + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } return(TRUE); } - mutex_exit(mutex); + if (!have_page_hash_mutex) { + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } + + if (mutex) { + mutex_exit(mutex); + } } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) { /* This must be a buf_page_t object. */ #if UNIV_WORD_SIZE == 4 @@ -511,10 +585,31 @@ about uninitialized pad bytes. */ UNIV_MEM_ASSERT_RW(src, size); #endif + + mutex_exit(&buf_pool->zip_free_mutex); + + if (!have_page_hash_mutex) { + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); + } + if (buf_buddy_relocate_block(src, dst)) { + mutex_enter(&buf_pool->zip_free_mutex); + + if (!have_page_hash_mutex) { + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } goto success; } + + mutex_enter(&buf_pool->zip_free_mutex); + + if (!have_page_hash_mutex) { + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } } return(FALSE); @@ -529,13 +624,15 @@ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ void* buf, /*!< in: block to be freed, must not be pointed to by the buffer pool */ - ulint i) /*!< in: index of buf_pool->zip_free[], + ulint i, /*!< in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ + ibool have_page_hash_mutex) { buf_page_t* bpage; buf_page_t* buddy; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->zip_free_mutex)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); ut_ad(i <= BUF_BUDDY_SIZES); ut_ad(buf_pool->buddy_stat[i].used > 0); @@ -546,7 +643,9 @@ ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE); if (i == BUF_BUDDY_SIZES) { - buf_buddy_block_free(buf_pool, buf); + mutex_exit(&buf_pool->zip_free_mutex); + buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex); + mutex_enter(&buf_pool->zip_free_mutex); return; } @@ -591,7 +690,7 @@ ut_a(bpage != buf); { - buf_page_t* next = UT_LIST_GET_NEXT(list, bpage); + buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage); UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i); bpage = next; } @@ -600,13 +699,13 @@ #ifndef UNIV_DEBUG_VALGRIND buddy_nonfree: /* Valgrind would complain about accessing free memory. */ - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i], + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], ut_ad(buf_page_get_state(ut_list_node_313) == BUF_BLOCK_ZIP_FREE))); #endif /* UNIV_DEBUG_VALGRIND */ /* The buddy is not free. Is there a free block of this size? */ - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]); if (bpage) { /* Remove the block from the free list, because a successful @@ -616,7 +715,7 @@ buf_buddy_remove_from_free(buf_pool, bpage, i); /* Try to relocate the buddy of buf to the free block. */ - if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) { + if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) { ut_d(buddy->state = BUF_BLOCK_ZIP_FREE); goto buddy_free2; @@ -636,14 +735,14 @@ (Parts of the buddy can be free in buf_pool->zip_free[j] with j < i.) */ - ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i], + ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i], ut_ad(buf_page_get_state( ut_list_node_313) == BUF_BLOCK_ZIP_FREE && ut_list_node_313 != buddy))); #endif /* !UNIV_DEBUG_VALGRIND */ - if (buf_buddy_relocate(buf_pool, buddy, buf, i)) { + if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) { buf = bpage; UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i); diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c --- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900 +++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900 @@ -263,6 +263,7 @@ #ifdef UNIV_PFS_RWLOCK /* Keys to register buffer block related rwlocks and mutexes with performance schema */ +UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key; UNIV_INTERN mysql_pfs_key_t buf_block_lock_key; # ifdef UNIV_SYNC_DEBUG UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key; @@ -273,6 +274,10 @@ UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key; UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key; UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key; +UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key; +UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key; +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key; +UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key; UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key; #endif /* UNIV_PFS_MUTEX */ @@ -881,9 +886,9 @@ block->page.in_zip_hash = FALSE; block->page.in_flush_list = FALSE; block->page.in_free_list = FALSE; - block->in_unzip_LRU_list = FALSE; #endif /* UNIV_DEBUG */ block->page.in_LRU_list = FALSE; + block->in_unzip_LRU_list = FALSE; #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG block->n_pointers = 0; #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -981,9 +986,11 @@ memset(block->frame, '\0', UNIV_PAGE_SIZE); #endif /* Add the block to the free list */ - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page)); + mutex_enter(&buf_pool->free_list_mutex); + UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page)); ut_d(block->page.in_free_list = TRUE); + mutex_exit(&buf_pool->free_list_mutex); ut_ad(buf_pool_from_block(block) == buf_pool); block++; @@ -1038,7 +1045,8 @@ buf_chunk_t* chunk = buf_pool->chunks; ut_ad(buf_pool); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->zip_free_mutex)); for (n = buf_pool->n_chunks; n--; chunk++) { buf_block_t* block = buf_chunk_contains_zip(chunk, data); @@ -1138,7 +1146,7 @@ buf_block_t* block; const buf_block_t* block_end; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */ block_end = chunk->blocks + chunk->size; @@ -1150,8 +1158,10 @@ ut_ad(!block->in_unzip_LRU_list); ut_ad(!block->page.in_flush_list); /* Remove the block from the free list. */ + mutex_enter(&buf_pool->free_list_mutex); ut_ad(block->page.in_free_list); - UT_LIST_REMOVE(list, buf_pool->free, (&block->page)); + UT_LIST_REMOVE(free, buf_pool->free, (&block->page)); + mutex_exit(&buf_pool->free_list_mutex); /* Free the latches. */ mutex_free(&block->mutex); @@ -1208,9 +1218,21 @@ ------------------------------- */ mutex_create(buf_pool_mutex_key, &buf_pool->mutex, SYNC_BUF_POOL); + mutex_create(buf_pool_LRU_list_mutex_key, + &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST); + rw_lock_create(buf_pool_page_hash_key, + &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH); + mutex_create(buf_pool_free_list_mutex_key, + &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST); + mutex_create(buf_pool_zip_free_mutex_key, + &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE); + mutex_create(buf_pool_zip_hash_mutex_key, + &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH); mutex_create(buf_pool_zip_mutex_key, &buf_pool->zip_mutex, SYNC_BUF_BLOCK); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); buf_pool_mutex_enter(buf_pool); if (buf_pool_size > 0) { @@ -1223,6 +1245,8 @@ mem_free(chunk); mem_free(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); buf_pool_mutex_exit(buf_pool); return(DB_ERROR); @@ -1253,6 +1277,8 @@ /* All fields are initialized by mem_zalloc(). */ + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); buf_pool_mutex_exit(buf_pool); return(DB_SUCCESS); @@ -1467,7 +1493,11 @@ ulint fold; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)); +#endif ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); ut_a(bpage->buf_fix_count == 0); @@ -1554,7 +1584,8 @@ try_again: btr_search_disable(); /* Empty the adaptive hash index again */ - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); shrink_again: if (buf_pool->n_chunks <= 1) { @@ -1625,7 +1656,7 @@ buf_LRU_make_block_old(&block->page); dirty++; - } else if (buf_LRU_free_block(&block->page, TRUE, NULL) + } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE) != BUF_LRU_FREED) { nonfree++; } @@ -1633,7 +1664,8 @@ mutex_exit(&block->mutex); } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); /* Request for a flush of the chunk if it helps. Do not flush if there are non-free blocks, since @@ -1683,7 +1715,8 @@ func_done: buf_pool->old_pool_size = buf_pool->curr_pool_size; func_exit: - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); btr_search_enable(); } @@ -1724,7 +1757,9 @@ hash_table_t* zip_hash; hash_table_t* page_hash; - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); /* Free, create, and populate the hash table. */ hash_table_free(buf_pool->page_hash); @@ -1765,8 +1800,9 @@ All such blocks are either in buf_pool->zip_clean or in buf_pool->flush_list. */ + mutex_enter(&buf_pool->zip_mutex); for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; - b = UT_LIST_GET_NEXT(list, b)) { + b = UT_LIST_GET_NEXT(zip_list, b)) { ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); ut_ad(!b->in_flush_list); ut_ad(b->in_LRU_list); @@ -1776,10 +1812,11 @@ HASH_INSERT(buf_page_t, hash, page_hash, buf_page_address_fold(b->space, b->offset), b); } + mutex_exit(&buf_pool->zip_mutex); buf_flush_list_mutex_enter(buf_pool); for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; - b = UT_LIST_GET_NEXT(list, b)) { + b = UT_LIST_GET_NEXT(flush_list, b)) { ut_ad(b->in_flush_list); ut_ad(b->in_LRU_list); ut_ad(b->in_page_hash); @@ -1806,7 +1843,9 @@ } buf_flush_list_mutex_exit(buf_pool); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); } /******************************************************************** @@ -1853,21 +1892,32 @@ buf_page_t* bpage; ulint i; buf_pool_t* buf_pool = buf_pool_get(space, offset); + mutex_t* block_mutex; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + rw_lock_x_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); + if (bpage) { + block_mutex = buf_page_get_mutex_enter(bpage); + ut_a(block_mutex); + } if (UNIV_LIKELY_NULL(bpage)) { if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { /* The page was loaded meanwhile. */ + rw_lock_x_unlock(&buf_pool->page_hash_latch); return(bpage); } /* Add to an existing watch. */ bpage->buf_fix_count++; + rw_lock_x_unlock(&buf_pool->page_hash_latch); + mutex_exit(block_mutex); return(NULL); } + /* buf_pool->watch is protected by zip_mutex for now */ + mutex_enter(&buf_pool->zip_mutex); for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) { bpage = &buf_pool->watch[i]; @@ -1891,10 +1941,12 @@ bpage->space = space; bpage->offset = offset; bpage->buf_fix_count = 1; - + bpage->buf_pool_index = buf_pool_index(buf_pool); ut_d(bpage->in_page_hash = TRUE); HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, bpage); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + mutex_exit(&buf_pool->zip_mutex); return(NULL); case BUF_BLOCK_ZIP_PAGE: ut_ad(bpage->in_page_hash); @@ -1912,6 +1964,8 @@ ut_error; /* Fix compiler warning */ + rw_lock_x_unlock(&buf_pool->page_hash_latch); + mutex_exit(&buf_pool->zip_mutex); return(NULL); } @@ -1941,6 +1995,8 @@ buf_chunk_t* chunks; buf_chunk_t* chunk; + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); buf_pool_mutex_enter(buf_pool); chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks); @@ -1959,6 +2015,8 @@ buf_pool->n_chunks++; } + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); buf_pool_mutex_exit(buf_pool); } @@ -2046,7 +2104,11 @@ space, offset) */ buf_page_t* watch) /*!< in/out: sentinel for watch */ { - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)); +#endif + ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch); ut_d(watch->in_page_hash = FALSE); @@ -2068,28 +2130,31 @@ buf_pool_t* buf_pool = buf_pool_get(space, offset); ulint fold = buf_page_address_fold(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_x_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); /* The page must exist because buf_pool_watch_set() increments buf_fix_count. */ ut_a(bpage); if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) { - mutex_t* mutex = buf_page_get_mutex(bpage); + mutex_t* mutex = buf_page_get_mutex_enter(bpage); - mutex_enter(mutex); ut_a(bpage->buf_fix_count > 0); bpage->buf_fix_count--; mutex_exit(mutex); } else { + mutex_enter(&buf_pool->zip_mutex); ut_a(bpage->buf_fix_count > 0); if (UNIV_LIKELY(!--bpage->buf_fix_count)) { buf_pool_watch_remove(buf_pool, fold, bpage); } + mutex_exit(&buf_pool->zip_mutex); } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_x_unlock(&buf_pool->page_hash_latch); } /****************************************************************//** @@ -2109,14 +2174,16 @@ buf_pool_t* buf_pool = buf_pool_get(space, offset); ulint fold = buf_page_address_fold(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); /* The page must exist because buf_pool_watch_set() increments buf_fix_count. */ ut_a(bpage); ret = !buf_pool_watch_is_sentinel(buf_pool, bpage); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(ret); } @@ -2133,13 +2200,15 @@ { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); ut_a(buf_page_in_file(bpage)); buf_LRU_make_block_young(bpage); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } /********************************************************************//** @@ -2163,14 +2232,20 @@ ut_a(buf_page_in_file(bpage)); if (buf_page_peek_if_too_old(bpage)) { - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); buf_LRU_make_block_young(bpage); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } else if (!access_time) { ulint time_ms = ut_time_ms(); - buf_pool_mutex_enter(buf_pool); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); + //buf_pool_mutex_enter(buf_pool); + if (block_mutex) { buf_page_set_accessed(bpage, time_ms); - buf_pool_mutex_exit(buf_pool); + mutex_exit(block_mutex); + } + //buf_pool_mutex_exit(buf_pool); } } @@ -2187,7 +2262,8 @@ buf_block_t* block; buf_pool_t* buf_pool = buf_pool_get(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset); @@ -2196,7 +2272,8 @@ block->check_index_page_at_flush = FALSE; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); } /********************************************************************//** @@ -2215,7 +2292,8 @@ ibool is_hashed; buf_pool_t* buf_pool = buf_pool_get(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset); @@ -2226,7 +2304,8 @@ is_hashed = block->is_hashed; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(is_hashed); } @@ -2248,7 +2327,8 @@ buf_page_t* bpage; buf_pool_t* buf_pool = buf_pool_get(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get(buf_pool, space, offset); @@ -2257,7 +2337,8 @@ bpage->file_page_was_freed = TRUE; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(bpage); } @@ -2278,7 +2359,8 @@ buf_page_t* bpage; buf_pool_t* buf_pool = buf_pool_get(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get(buf_pool, space, offset); @@ -2287,7 +2369,8 @@ bpage->file_page_was_freed = FALSE; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(bpage); } @@ -2322,8 +2405,9 @@ buf_pool->stat.n_page_gets++; for (;;) { - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); lookup: + rw_lock_s_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get(buf_pool, space, offset); if (bpage) { ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); @@ -2332,7 +2416,8 @@ /* Page not in buf_pool: needs to be read from file */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); buf_read_page(space, zip_size, offset); @@ -2344,10 +2429,15 @@ if (UNIV_UNLIKELY(!bpage->zip.data)) { /* There is no compressed page. */ err_exit: - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(NULL); } + block_mutex = buf_page_get_mutex_enter(bpage); + + rw_lock_s_unlock(&buf_pool->page_hash_latch); + ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); switch (buf_page_get_state(bpage)) { @@ -2356,19 +2446,19 @@ case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: case BUF_BLOCK_ZIP_FREE: + if (block_mutex) + mutex_exit(block_mutex); break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: - block_mutex = &buf_pool->zip_mutex; - mutex_enter(block_mutex); + ut_a(block_mutex == &buf_pool->zip_mutex); bpage->buf_fix_count++; goto got_block; case BUF_BLOCK_FILE_PAGE: - block_mutex = &((buf_block_t*) bpage)->mutex; - mutex_enter(block_mutex); + ut_a(block_mutex == &((buf_block_t*) bpage)->mutex); /* Discard the uncompressed page frame if possible. */ - if (buf_LRU_free_block(bpage, FALSE, NULL) + if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE) == BUF_LRU_FREED) { mutex_exit(block_mutex); @@ -2387,7 +2477,7 @@ must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; access_time = buf_page_is_accessed(bpage); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); mutex_exit(block_mutex); @@ -2696,7 +2786,7 @@ const buf_block_t* block) /*!< in: pointer to block, not dereferenced */ { - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) { /* The pointer should be aligned. */ @@ -2732,6 +2822,7 @@ ulint fix_type; ibool must_read; ulint retries = 0; + mutex_t* block_mutex = NULL; buf_pool_t* buf_pool = buf_pool_get(space, offset); ut_ad(mtr); @@ -2754,9 +2845,11 @@ fold = buf_page_address_fold(space, offset); loop: block = guess; - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); if (block) { + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); + /* If the guess is a compressed page descriptor that has been allocated by buf_buddy_alloc(), it may have been invalidated by buf_buddy_relocate(). In that @@ -2765,11 +2858,15 @@ the guess may be pointing to a buffer pool chunk that has been released when resizing the buffer pool. */ - if (!buf_block_is_uncompressed(buf_pool, block) + if (!block_mutex) { + block = guess = NULL; + } else if (!buf_block_is_uncompressed(buf_pool, block) || offset != block->page.offset || space != block->page.space || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) { + mutex_exit(block_mutex); + block = guess = NULL; } else { ut_ad(!block->page.in_zip_hash); @@ -2778,12 +2875,19 @@ } if (block == NULL) { + rw_lock_s_lock(&buf_pool->page_hash_latch); block = (buf_block_t*) buf_page_hash_get_low( buf_pool, space, offset, fold); + if (block) { + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); + ut_a(block_mutex); + } + rw_lock_s_unlock(&buf_pool->page_hash_latch); } loop2: if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) { + mutex_exit(block_mutex); block = NULL; } @@ -2795,12 +2899,14 @@ space, offset, fold); if (UNIV_LIKELY_NULL(block)) { - + block_mutex = buf_page_get_mutex((buf_page_t*)block); + ut_a(block_mutex); + ut_ad(mutex_own(block_mutex)); goto got_block; } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); if (mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH) { @@ -2848,7 +2954,8 @@ /* The page is being read to buffer pool, but we cannot wait around for the read to complete. */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(block_mutex); return(NULL); } @@ -2858,38 +2965,49 @@ ibool success; case BUF_BLOCK_FILE_PAGE: + if (block_mutex == &buf_pool->zip_mutex) { + /* it is wrong mutex... */ + mutex_exit(block_mutex); + goto loop; + } break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: + ut_ad(block_mutex == &buf_pool->zip_mutex); bpage = &block->page; /* Protect bpage->buf_fix_count. */ - mutex_enter(&buf_pool->zip_mutex); + //mutex_enter(&buf_pool->zip_mutex); if (bpage->buf_fix_count || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { /* This condition often occurs when the buffer is not buffer-fixed, but I/O-fixed by buf_page_init_for_read(). */ - mutex_exit(&buf_pool->zip_mutex); + //mutex_exit(&buf_pool->zip_mutex); wait_until_unfixed: /* The block is buffer-fixed or I/O-fixed. Try again later. */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(block_mutex); os_thread_sleep(WAIT_FOR_READ); goto loop; } /* Allocate an uncompressed page. */ - buf_pool_mutex_exit(buf_pool); - mutex_exit(&buf_pool->zip_mutex); + //buf_pool_mutex_exit(buf_pool); + //mutex_exit(&buf_pool->zip_mutex); + mutex_exit(block_mutex); block = buf_LRU_get_free_block(buf_pool, 0); ut_a(block); + block_mutex = &block->mutex; - buf_pool_mutex_enter(buf_pool); - mutex_enter(&block->mutex); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); + mutex_enter(block_mutex); { buf_page_t* hash_bpage; @@ -2902,35 +3020,47 @@ while buf_pool->mutex was released. Free the block that was allocated. */ - buf_LRU_block_free_non_file_page(block); - mutex_exit(&block->mutex); + buf_LRU_block_free_non_file_page(block, TRUE); + mutex_exit(block_mutex); block = (buf_block_t*) hash_bpage; + if (block) { + block_mutex = buf_page_get_mutex_enter((buf_page_t*)block); + ut_a(block_mutex); + } + rw_lock_x_unlock(&buf_pool->page_hash_latch); + mutex_exit(&buf_pool->LRU_list_mutex); goto loop2; } } + mutex_enter(&buf_pool->zip_mutex); + if (UNIV_UNLIKELY (bpage->buf_fix_count || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) { + mutex_exit(&buf_pool->zip_mutex); /* The block was buffer-fixed or I/O-fixed while buf_pool->mutex was not held by this thread. Free the block that was allocated and try again. This should be extremely unlikely. */ - buf_LRU_block_free_non_file_page(block); - mutex_exit(&block->mutex); + buf_LRU_block_free_non_file_page(block, TRUE); + //mutex_exit(&block->mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + mutex_exit(&buf_pool->LRU_list_mutex); goto wait_until_unfixed; } /* Move the compressed page from bpage to block, and uncompress it. */ - mutex_enter(&buf_pool->zip_mutex); - buf_relocate(bpage, &block->page); + + rw_lock_x_unlock(&buf_pool->page_hash_latch); + buf_block_init_low(block); block->lock_hash_val = lock_rec_hash(space, offset); @@ -2939,7 +3069,7 @@ if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) { - UT_LIST_REMOVE(list, buf_pool->zip_clean, + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, &block->page); ut_ad(!block->page.in_flush_list); } else { @@ -2956,19 +3086,24 @@ /* Insert at the front of unzip_LRU list */ buf_unzip_LRU_add_block(block, FALSE); + mutex_exit(&buf_pool->LRU_list_mutex); + block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); rw_lock_x_lock_func(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); - mutex_exit(&block->mutex); + mutex_exit(block_mutex); mutex_exit(&buf_pool->zip_mutex); + + buf_pool_mutex_enter(buf_pool); buf_pool->n_pend_unzip++; + buf_pool_mutex_exit(buf_pool); - buf_buddy_free(buf_pool, bpage, sizeof *bpage); + buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); /* Decompress the page and apply buffered operations while not holding buf_pool->mutex or block->mutex. */ @@ -2981,12 +3116,15 @@ } /* Unfix and unlatch the block. */ - buf_pool_mutex_enter(buf_pool); - mutex_enter(&block->mutex); + //buf_pool_mutex_enter(buf_pool); + block_mutex = &block->mutex; + mutex_enter(block_mutex); block->page.buf_fix_count--; buf_block_set_io_fix(block, BUF_IO_NONE); - mutex_exit(&block->mutex); + + buf_pool_mutex_enter(buf_pool); buf_pool->n_pend_unzip--; + buf_pool_mutex_exit(buf_pool); rw_lock_x_unlock(&block->lock); break; @@ -3002,7 +3140,7 @@ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - mutex_enter(&block->mutex); + //mutex_enter(&block->mutex); #if UNIV_WORD_SIZE == 4 /* On 32-bit systems, there is no padding in buf_page_t. On other systems, Valgrind could complain about uninitialized pad @@ -3015,7 +3153,7 @@ /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ - if (buf_LRU_free_block(&block->page, TRUE, NULL) + if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE) == BUF_LRU_FREED) { mutex_exit(&block->mutex); if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { @@ -3052,13 +3190,14 @@ buf_block_buf_fix_inc(block, file, line); - mutex_exit(&block->mutex); + //mutex_exit(&block->mutex); /* Check if this is the first access to the page */ access_time = buf_page_is_accessed(&block->page); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(block_mutex); buf_page_set_accessed_make_young(&block->page, access_time); @@ -3291,9 +3430,11 @@ buf_pool = buf_pool_from_block(block); if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) { - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); buf_LRU_make_block_young(&block->page); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } else if (!buf_page_is_accessed(&block->page)) { /* Above, we do a dirty read on purpose, to avoid mutex contention. The field buf_page_t::access_time @@ -3301,9 +3442,11 @@ field must be protected by mutex, however. */ ulint time_ms = ut_time_ms(); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&block->mutex); buf_page_set_accessed(&block->page, time_ms); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&block->mutex); } ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); @@ -3370,18 +3513,21 @@ ut_ad(mtr); ut_ad(mtr->state == MTR_ACTIVE); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); block = buf_block_hash_get(buf_pool, space_id, page_no); if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) { - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(NULL); } ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page)); mutex_enter(&block->mutex); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); @@ -3470,7 +3616,10 @@ buf_page_t* hash_page; buf_pool_t* buf_pool = buf_pool_get(space, offset); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)); +#endif ut_ad(mutex_own(&(block->mutex))); ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE); @@ -3499,11 +3648,14 @@ if (UNIV_LIKELY(!hash_page)) { } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) { /* Preserve the reference count. */ - ulint buf_fix_count = hash_page->buf_fix_count; + ulint buf_fix_count; + mutex_enter(&buf_pool->zip_mutex); + buf_fix_count = hash_page->buf_fix_count; ut_a(buf_fix_count > 0); block->page.buf_fix_count += buf_fix_count; buf_pool_watch_remove(buf_pool, fold, hash_page); + mutex_exit(&buf_pool->zip_mutex); } else { fprintf(stderr, "InnoDB: Error: page %lu %lu already found" @@ -3513,7 +3665,8 @@ (const void*) hash_page, (const void*) block); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG mutex_exit(&block->mutex); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_x_unlock(&buf_pool->page_hash_latch); buf_print(); buf_LRU_print(); buf_validate(); @@ -3597,7 +3750,9 @@ fold = buf_page_address_fold(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold); if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) { @@ -3606,9 +3761,15 @@ err_exit: if (block) { mutex_enter(&block->mutex); - buf_LRU_block_free_non_file_page(block); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + buf_LRU_block_free_non_file_page(block, FALSE); mutex_exit(&block->mutex); } + else { + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } bpage = NULL; goto func_exit; @@ -3631,6 +3792,8 @@ buf_page_init(space, offset, fold, block); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + /* The block must be put to the LRU list, to the old blocks */ buf_LRU_add_block(bpage, TRUE/* to old blocks */); @@ -3658,7 +3821,7 @@ been added to buf_pool->LRU and buf_pool->page_hash. */ mutex_exit(&block->mutex); - data = buf_buddy_alloc(buf_pool, zip_size, &lru); + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE); mutex_enter(&block->mutex); block->page.zip.data = data; @@ -3671,6 +3834,7 @@ buf_unzip_LRU_add_block(block, TRUE); } + mutex_exit(&buf_pool->LRU_list_mutex); mutex_exit(&block->mutex); } else { /* Defer buf_buddy_alloc() until after the block has @@ -3682,8 +3846,8 @@ control block (bpage), in order to avoid the invocation of buf_buddy_relocate_block() on uninitialized data. */ - data = buf_buddy_alloc(buf_pool, zip_size, &lru); - bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru); + data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE); + bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE); /* Initialize the buf_pool pointer. */ bpage->buf_pool_index = buf_pool_index(buf_pool); @@ -3702,8 +3866,11 @@ /* The block was added by some other thread. */ watch_page = NULL; - buf_buddy_free(buf_pool, bpage, sizeof *bpage); - buf_buddy_free(buf_pool, data, zip_size); + buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE); + buf_buddy_free(buf_pool, data, zip_size, TRUE); + + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); bpage = NULL; goto func_exit; @@ -3747,18 +3914,24 @@ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, bpage); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + /* The block must be put to the LRU list, to the old blocks */ buf_LRU_add_block(bpage, TRUE/* to old blocks */); buf_LRU_insert_zip_clean(bpage); + mutex_exit(&buf_pool->LRU_list_mutex); + buf_page_set_io_fix(bpage, BUF_IO_READ); mutex_exit(&buf_pool->zip_mutex); } + buf_pool_mutex_enter(buf_pool); buf_pool->n_pend_reads++; -func_exit: buf_pool_mutex_exit(buf_pool); +func_exit: + //buf_pool_mutex_exit(buf_pool); if (mode == BUF_READ_IBUF_PAGES_ONLY) { @@ -3800,7 +3973,9 @@ fold = buf_page_address_fold(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); block = (buf_block_t*) buf_page_hash_get_low( buf_pool, space, offset, fold); @@ -3816,7 +3991,9 @@ #endif /* UNIV_DEBUG_FILE_ACCESSES */ /* Page can be found in buf_pool */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); buf_block_free(free_block); @@ -3838,6 +4015,7 @@ mutex_enter(&block->mutex); buf_page_init(space, offset, fold, block); + rw_lock_x_unlock(&buf_pool->page_hash_latch); /* The block must be put to the LRU list */ buf_LRU_add_block(&block->page, FALSE); @@ -3864,7 +4042,7 @@ the reacquisition of buf_pool->mutex. We also must defer this operation until after the block descriptor has been added to buf_pool->LRU and buf_pool->page_hash. */ - data = buf_buddy_alloc(buf_pool, zip_size, &lru); + data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE); mutex_enter(&block->mutex); block->page.zip.data = data; @@ -3882,7 +4060,8 @@ buf_page_set_accessed(&block->page, time_ms); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); @@ -3933,6 +4112,8 @@ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); const ibool uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + ibool have_LRU_mutex = FALSE; + mutex_t* block_mutex; ut_a(buf_page_in_file(bpage)); @@ -4066,8 +4247,26 @@ } } + if (io_type == BUF_IO_WRITE + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) { + /* to keep consistency at buf_LRU_insert_zip_clean() */ + have_LRU_mutex = TRUE; /* optimistic */ + } +retry_mutex: + if (have_LRU_mutex) + mutex_enter(&buf_pool->LRU_list_mutex); + block_mutex = buf_page_get_mutex_enter(bpage); + ut_a(block_mutex); + if (io_type == BUF_IO_WRITE + && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY + || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) + && !have_LRU_mutex) { + mutex_exit(block_mutex); + have_LRU_mutex = TRUE; + goto retry_mutex; + } buf_pool_mutex_enter(buf_pool); - mutex_enter(buf_page_get_mutex(bpage)); #ifdef UNIV_IBUF_COUNT_DEBUG if (io_type == BUF_IO_WRITE || uncompressed) { @@ -4090,6 +4289,7 @@ the x-latch to this OS thread: do not let this confuse you in debugging! */ + ut_a(!have_LRU_mutex); ut_ad(buf_pool->n_pend_reads > 0); buf_pool->n_pend_reads--; buf_pool->stat.n_pages_read++; @@ -4107,6 +4307,9 @@ buf_flush_write_complete(bpage); + if (have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); + if (uncompressed) { rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock, BUF_IO_WRITE); @@ -4129,8 +4332,8 @@ } #endif /* UNIV_DEBUG */ - mutex_exit(buf_page_get_mutex(bpage)); buf_pool_mutex_exit(buf_pool); + mutex_exit(block_mutex); } /*********************************************************************//** @@ -4147,7 +4350,9 @@ ut_ad(buf_pool); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); chunk = buf_pool->chunks; @@ -4164,7 +4369,9 @@ } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); return(TRUE); } @@ -4212,7 +4419,8 @@ freed = buf_LRU_search_and_free_block(buf_pool, 100); } - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0); ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0); @@ -4225,7 +4433,8 @@ memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat)); buf_refresh_io_stats(buf_pool); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } /*********************************************************************//** @@ -4267,7 +4476,10 @@ ut_ad(buf_pool); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); + /* for keep the new latch order, it cannot validate correctly... */ chunk = buf_pool->chunks; @@ -4362,7 +4574,7 @@ /* Check clean compressed-only blocks. */ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; - b = UT_LIST_GET_NEXT(list, b)) { + b = UT_LIST_GET_NEXT(zip_list, b)) { ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); switch (buf_page_get_io_fix(b)) { case BUF_IO_NONE: @@ -4393,7 +4605,7 @@ buf_flush_list_mutex_enter(buf_pool); for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; - b = UT_LIST_GET_NEXT(list, b)) { + b = UT_LIST_GET_NEXT(flush_list, b)) { ut_ad(b->in_flush_list); ut_a(b->oldest_modification); n_flush++; @@ -4452,6 +4664,8 @@ } ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru); + /* because of latching order with block->mutex, we cannot get needed mutexes before that */ +/* if (UT_LIST_GET_LEN(buf_pool->free) != n_free) { fprintf(stderr, "Free list len %lu, free blocks %lu\n", (ulong) UT_LIST_GET_LEN(buf_pool->free), @@ -4462,8 +4676,11 @@ ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush); ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush); ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush); +*/ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); ut_a(buf_LRU_validate()); ut_a(buf_flush_validate(buf_pool)); @@ -4519,7 +4736,9 @@ index_ids = mem_alloc(size * sizeof *index_ids); counts = mem_alloc(sizeof(ulint) * size); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + mutex_enter(&buf_pool->free_list_mutex); buf_flush_list_mutex_enter(buf_pool); fprintf(stderr, @@ -4588,7 +4807,9 @@ } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + mutex_exit(&buf_pool->free_list_mutex); for (i = 0; i < n_found; i++) { index = dict_index_get_if_in_cache(index_ids[i]); @@ -4645,7 +4866,7 @@ buf_chunk_t* chunk; ulint fixed_pages_number = 0; - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); chunk = buf_pool->chunks; @@ -4679,7 +4900,7 @@ /* Traverse the lists of clean and dirty compressed-only blocks. */ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; - b = UT_LIST_GET_NEXT(list, b)) { + b = UT_LIST_GET_NEXT(zip_list, b)) { ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE); @@ -4691,7 +4912,7 @@ buf_flush_list_mutex_enter(buf_pool); for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; - b = UT_LIST_GET_NEXT(list, b)) { + b = UT_LIST_GET_NEXT(flush_list, b)) { ut_ad(b->in_flush_list); switch (buf_page_get_state(b)) { @@ -4717,7 +4938,7 @@ buf_flush_list_mutex_exit(buf_pool); mutex_exit(&buf_pool->zip_mutex); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); return(fixed_pages_number); } @@ -4873,6 +5094,8 @@ /* Find appropriate pool_info to store stats for this buffer pool */ pool_info = &all_pool_info[pool_id]; + mutex_enter(&buf_pool->LRU_list_mutex); + mutex_enter(&buf_pool->free_list_mutex); buf_pool_mutex_enter(buf_pool); buf_flush_list_mutex_enter(buf_pool); @@ -4983,6 +5206,8 @@ pool_info->unzip_cur = buf_LRU_stat_cur.unzip; buf_refresh_io_stats(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + mutex_exit(&buf_pool->free_list_mutex); buf_pool_mutex_exit(buf_pool); } @@ -5224,11 +5449,13 @@ { ulint len; - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->free_list_mutex); len = UT_LIST_GET_LEN(buf_pool->free); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->free_list_mutex); return(len); } diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c --- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900 +++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900 @@ -307,7 +307,7 @@ ut_d(block->page.in_flush_list = TRUE); block->page.oldest_modification = lsn; - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page); #ifdef UNIV_DEBUG_VALGRIND { @@ -401,14 +401,14 @@ > block->page.oldest_modification) { ut_ad(b->in_flush_list); prev_b = b; - b = UT_LIST_GET_NEXT(list, b); + b = UT_LIST_GET_NEXT(flush_list, b); } } if (prev_b == NULL) { - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); + UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page); } else { - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list, + UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b, &block->page); } @@ -434,7 +434,7 @@ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); //ut_ad(buf_pool_mutex_own(buf_pool)); #endif - //ut_ad(mutex_own(buf_page_get_mutex(bpage))); + ut_ad(mutex_own(buf_page_get_mutex(bpage))); //ut_ad(bpage->in_LRU_list); if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) { @@ -470,14 +470,14 @@ enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif - ut_a(buf_page_in_file(bpage)); + //ut_a(buf_page_in_file(bpage)); ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST); - if (bpage->oldest_modification != 0 + if (buf_page_in_file(bpage) && bpage->oldest_modification != 0 && buf_page_get_io_fix(bpage) == BUF_IO_NONE) { ut_ad(bpage->in_flush_list); @@ -508,7 +508,7 @@ { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(bpage->in_flush_list); @@ -526,11 +526,11 @@ return; case BUF_BLOCK_ZIP_DIRTY: buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE); - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage); + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage); buf_LRU_insert_zip_clean(bpage); break; case BUF_BLOCK_FILE_PAGE: - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage); + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage); break; } @@ -574,7 +574,7 @@ buf_page_t* prev_b = NULL; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); /* Must reside in the same buffer pool. */ ut_ad(buf_pool == buf_pool_from_bpage(dpage)); @@ -603,18 +603,18 @@ because we assert on in_flush_list in comparison function. */ ut_d(bpage->in_flush_list = FALSE); - prev = UT_LIST_GET_PREV(list, bpage); - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage); + prev = UT_LIST_GET_PREV(flush_list, bpage); + UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage); if (prev) { ut_ad(prev->in_flush_list); UT_LIST_INSERT_AFTER( - list, + flush_list, buf_pool->flush_list, prev, dpage); } else { UT_LIST_ADD_FIRST( - list, + flush_list, buf_pool->flush_list, dpage); } @@ -1083,7 +1083,7 @@ #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(!buf_pool_mutex_own(buf_pool)); + //ut_ad(!buf_pool_mutex_own(buf_pool)); #endif #ifdef UNIV_LOG_DEBUG @@ -1097,7 +1097,8 @@ io_fixed and oldest_modification != 0. Thus, it cannot be relocated in the buffer pool or removed from flush_list or LRU_list. */ - ut_ad(!buf_pool_mutex_own(buf_pool)); + //ut_ad(!buf_pool_mutex_own(buf_pool)); + ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); ut_ad(!buf_flush_list_mutex_own(buf_pool)); ut_ad(!mutex_own(buf_page_get_mutex(bpage))); ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE); @@ -1260,12 +1261,18 @@ ibool is_uncompressed; ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED)); +#endif ut_ad(buf_page_in_file(bpage)); block_mutex = buf_page_get_mutex(bpage); ut_ad(mutex_own(block_mutex)); + buf_pool_mutex_enter(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); + ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); buf_page_set_io_fix(bpage, BUF_IO_WRITE); @@ -1427,14 +1434,16 @@ buf_pool = buf_pool_get(space, i); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); /* We only want to flush pages from this buffer pool. */ bpage = buf_page_hash_get(buf_pool, space, i); if (!bpage) { - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); continue; } @@ -1446,11 +1455,9 @@ if (flush_type != BUF_FLUSH_LRU || i == offset || buf_page_is_old(bpage)) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - if (buf_flush_ready_for_flush(bpage, flush_type) + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type) && (i == offset || !bpage->buf_fix_count)) { /* We only try to flush those neighbors != offset where the buf fix @@ -1466,11 +1473,12 @@ ut_ad(!buf_pool_mutex_own(buf_pool)); count++; continue; - } else { + } else if (block_mutex) { mutex_exit(block_mutex); } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); } return(count); @@ -1503,21 +1511,25 @@ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); #endif /* UNIV_DEBUG */ - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(flush_type != BUF_FLUSH_LRU + || mutex_own(&buf_pool->LRU_list_mutex)); - block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); + block_mutex = buf_page_get_mutex_enter(bpage); - ut_a(buf_page_in_file(bpage)); + //ut_a(buf_page_in_file(bpage)); - if (buf_flush_ready_for_flush(bpage, flush_type)) { + if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) { ulint space; ulint offset; buf_pool_t* buf_pool; buf_pool = buf_pool_from_bpage(bpage); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + if (flush_type == BUF_FLUSH_LRU) { + mutex_exit(&buf_pool->LRU_list_mutex); + } /* These fields are protected by both the buffer pool mutex and block mutex. */ @@ -1533,13 +1545,18 @@ *count, n_to_flush); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + if (flush_type == BUF_FLUSH_LRU) { + mutex_enter(&buf_pool->LRU_list_mutex); + } flushed = TRUE; - } else { + } else if (block_mutex) { mutex_exit(block_mutex); } - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(flush_type != BUF_FLUSH_LRU + || mutex_own(&buf_pool->LRU_list_mutex)); return(flushed); } @@ -1560,7 +1577,8 @@ buf_page_t* bpage; ulint count = 0; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); do { /* Start from the end of the list looking for a @@ -1582,7 +1600,8 @@ should be flushed, we factor in this value. */ buf_lru_flush_page_count += count; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); return(count); } @@ -1610,9 +1629,10 @@ { ulint len; buf_page_t* bpage; + buf_page_t* prev_bpage = NULL; ulint count = 0; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); /* If we have flushed enough, leave the loop */ do { @@ -1631,6 +1651,7 @@ if (bpage) { ut_a(bpage->oldest_modification > 0); + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage); } if (!bpage || bpage->oldest_modification >= lsn_limit) { @@ -1672,9 +1693,17 @@ break; } - bpage = UT_LIST_GET_PREV(list, bpage); + bpage = UT_LIST_GET_PREV(flush_list, bpage); - ut_ad(!bpage || bpage->in_flush_list); + //ut_ad(!bpage || bpage->in_flush_list); + if (bpage != prev_bpage) { + /* the search might warp.. retrying */ + buf_flush_list_mutex_exit(buf_pool); + break; + } + if (bpage) { + prev_bpage = UT_LIST_GET_PREV(flush_list, bpage); + } buf_flush_list_mutex_exit(buf_pool); @@ -1683,7 +1712,7 @@ } while (count < min_n && bpage != NULL && len > 0); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); return(count); } @@ -1722,13 +1751,15 @@ || sync_thread_levels_empty_gen(TRUE)); #endif /* UNIV_SYNC_DEBUG */ - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); /* Note: The buffer pool mutex is released and reacquired within the flush functions. */ switch(flush_type) { case BUF_FLUSH_LRU: + mutex_enter(&buf_pool->LRU_list_mutex); count = buf_flush_LRU_list_batch(buf_pool, min_n); + mutex_exit(&buf_pool->LRU_list_mutex); break; case BUF_FLUSH_LIST: count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit); @@ -1737,7 +1768,7 @@ ut_error; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); buf_flush_buffered_writes(); @@ -1993,7 +2024,7 @@ retry: //buf_pool_mutex_enter(buf_pool); if (have_LRU_mutex) - buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); n_replaceable = UT_LIST_GET_LEN(buf_pool->free); @@ -2010,15 +2041,15 @@ bpage = UT_LIST_GET_LAST(buf_pool->LRU); continue; } - block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); + block_mutex = buf_page_get_mutex_enter(bpage); - if (buf_flush_ready_for_replace(bpage)) { + if (block_mutex && buf_flush_ready_for_replace(bpage)) { n_replaceable++; } - mutex_exit(block_mutex); + if (block_mutex) { + mutex_exit(block_mutex); + } distance++; @@ -2027,7 +2058,7 @@ //buf_pool_mutex_exit(buf_pool); if (have_LRU_mutex) - buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) { @@ -2226,7 +2257,7 @@ ut_ad(buf_flush_list_mutex_own(buf_pool)); - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, + UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list, ut_ad(ut_list_node_313->in_flush_list)); bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); @@ -2266,7 +2297,7 @@ rnode = rbt_next(buf_pool->flush_rbt, rnode); } - bpage = UT_LIST_GET_NEXT(list, bpage); + bpage = UT_LIST_GET_NEXT(flush_list, bpage); ut_a(!bpage || om >= bpage->oldest_modification); } diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c --- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900 +++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900 @@ -143,8 +143,9 @@ void buf_LRU_block_free_hashed_page( /*===========================*/ - buf_block_t* block); /*!< in: block, must contain a file page and + buf_block_t* block, /*!< in: block, must contain a file page and be in a state where it can be freed */ + ibool have_page_hash_mutex); /******************************************************************//** Determines if the unzip_LRU list should be used for evicting a victim @@ -154,15 +155,20 @@ ibool buf_LRU_evict_from_unzip_LRU( /*=========================*/ - buf_pool_t* buf_pool) + buf_pool_t* buf_pool, + ibool have_LRU_mutex) { ulint io_avg; ulint unzip_avg; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + if (!have_LRU_mutex) + mutex_enter(&buf_pool->LRU_list_mutex); /* If the unzip_LRU list is empty, we can only use the LRU. */ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) { + if (!have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); return(FALSE); } @@ -171,14 +177,20 @@ decompressed pages in the buffer pool. */ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) { + if (!have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); return(FALSE); } /* If eviction hasn't started yet, we assume by default that a workload is disk bound. */ if (buf_pool->freed_page_clock == 0) { + if (!have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); return(TRUE); } + if (!have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); /* Calculate the average over past intervals, and add the values of the current interval. */ @@ -246,19 +258,23 @@ page_arr = ut_malloc( sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); scan_again: num_entries = 0; bpage = UT_LIST_GET_LAST(buf_pool->LRU); while (bpage != NULL) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); buf_page_t* prev_bpage; - mutex_enter(block_mutex); prev_bpage = UT_LIST_GET_PREV(LRU, bpage); + if (!block_mutex) { + goto next_page; + } + ut_a(buf_page_in_file(bpage)); if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE @@ -287,14 +303,16 @@ /* Array full. We release the buf_pool->mutex to obey the latching order. */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); buf_LRU_drop_page_hash_batch( id, zip_size, page_arr, num_entries); num_entries = 0; - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); } else { mutex_exit(block_mutex); } @@ -319,7 +337,8 @@ } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); /* Drop any remaining batch of search hashed pages. */ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries); @@ -341,7 +360,9 @@ ibool all_freed; scan_again: - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); all_freed = TRUE; @@ -369,8 +390,16 @@ all_freed = FALSE; } else { - mutex_t* block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); + + if (!block_mutex) { + /* It may be impossible case... + Something wrong, so will be scan_again */ + + all_freed = FALSE; + + goto next_page_no_mutex; + } if (bpage->buf_fix_count > 0) { @@ -429,7 +458,9 @@ ulint page_no; ulint zip_size; - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); zip_size = buf_page_get_zip_size(bpage); page_no = buf_page_get_page_no(bpage); @@ -454,7 +485,7 @@ if (buf_LRU_block_remove_hashed_page(bpage, TRUE) != BUF_BLOCK_ZIP_FREE) { buf_LRU_block_free_hashed_page((buf_block_t*) - bpage); + bpage, TRUE); } else { /* The block_mutex should have been released by buf_LRU_block_remove_hashed_page() @@ -486,7 +517,9 @@ bpage = prev_bpage; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); if (!all_freed) { os_thread_sleep(20000); @@ -532,7 +565,9 @@ buf_page_t* b; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); + ut_ad(mutex_own(&buf_pool->flush_list_mutex)); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE); /* Find the first successor of bpage in the LRU list @@ -540,17 +575,17 @@ b = bpage; do { b = UT_LIST_GET_NEXT(LRU, b); - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE); + } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list)); /* Insert bpage before b, i.e., after the predecessor of b. */ if (b) { - b = UT_LIST_GET_PREV(list, b); + b = UT_LIST_GET_PREV(zip_list, b); } if (b) { - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage); + UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage); } else { - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage); + UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage); } } @@ -563,18 +598,19 @@ buf_LRU_free_from_unzip_LRU_list( /*=============================*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint n_iterations) /*!< in: how many times this has + ulint n_iterations, /*!< in: how many times this has been called repeatedly without result: a high value means that we should search farther; we will search n_iterations / 5 of the unzip_LRU list, or nothing if n_iterations >= 5 */ + ibool have_LRU_mutex) { buf_block_t* block; ulint distance; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); /* Theoratically it should be much easier to find a victim from unzip_LRU as we can choose even a dirty block (as we'll @@ -584,7 +620,7 @@ if we have done five iterations so far. */ if (UNIV_UNLIKELY(n_iterations >= 5) - || !buf_LRU_evict_from_unzip_LRU(buf_pool)) { + || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) { return(FALSE); } @@ -592,18 +628,25 @@ distance = 100 + (n_iterations * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5; +restart: for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0); block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) { enum buf_lru_free_block_status freed; + mutex_enter(&block->mutex); + if (!block->in_unzip_LRU_list || !block->page.in_LRU_list + || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) { + mutex_exit(&block->mutex); + goto restart; + } + ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(block->in_unzip_LRU_list); ut_ad(block->page.in_LRU_list); - mutex_enter(&block->mutex); - freed = buf_LRU_free_block(&block->page, FALSE, NULL); + freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex); mutex_exit(&block->mutex); switch (freed) { @@ -637,21 +680,23 @@ buf_LRU_free_from_common_LRU_list( /*==============================*/ buf_pool_t* buf_pool, - ulint n_iterations) + ulint n_iterations, /*!< in: how many times this has been called repeatedly without result: a high value means that we should search farther; if n_iterations < 10, then we search n_iterations / 10 * buf_pool->curr_size pages from the end of the LRU list */ + ibool have_LRU_mutex) { buf_page_t* bpage; ulint distance; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); distance = 100 + (n_iterations * buf_pool->curr_size) / 10; +restart: for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0); bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) { @@ -659,14 +704,23 @@ enum buf_lru_free_block_status freed; unsigned accessed; mutex_t* block_mutex - = buf_page_get_mutex(bpage); + = buf_page_get_mutex_enter(bpage); + + if (!block_mutex) { + goto restart; + } + + if (!bpage->in_LRU_list + || !buf_page_in_file(bpage)) { + mutex_exit(block_mutex); + goto restart; + } ut_ad(buf_page_in_file(bpage)); ut_ad(bpage->in_LRU_list); - mutex_enter(block_mutex); accessed = buf_page_is_accessed(bpage); - freed = buf_LRU_free_block(bpage, TRUE, NULL); + freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex); mutex_exit(block_mutex); switch (freed) { @@ -718,16 +772,23 @@ n_iterations / 5 of the unzip_LRU list. */ { ibool freed = FALSE; + ibool have_LRU_mutex = FALSE; - buf_pool_mutex_enter(buf_pool); + if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)) + have_LRU_mutex = TRUE; + + //buf_pool_mutex_enter(buf_pool); + if (have_LRU_mutex) + mutex_enter(&buf_pool->LRU_list_mutex); - freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations); + freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex); if (!freed) { freed = buf_LRU_free_from_common_LRU_list( - buf_pool, n_iterations); + buf_pool, n_iterations, have_LRU_mutex); } + buf_pool_mutex_enter(buf_pool); if (!freed) { buf_pool->LRU_flush_ended = 0; } else if (buf_pool->LRU_flush_ended > 0) { @@ -735,6 +796,8 @@ } buf_pool_mutex_exit(buf_pool); + if (have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); return(freed); } @@ -795,7 +858,9 @@ buf_pool = buf_pool_from_array(i); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); + mutex_enter(&buf_pool->free_list_mutex); if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) @@ -805,7 +870,9 @@ ret = TRUE; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + mutex_exit(&buf_pool->free_list_mutex); } return(ret); @@ -823,9 +890,10 @@ { buf_block_t* block; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); - block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free); + mutex_enter(&buf_pool->free_list_mutex); + block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free); if (block) { @@ -834,7 +902,9 @@ ut_ad(!block->page.in_flush_list); ut_ad(!block->page.in_LRU_list); ut_a(!buf_page_in_file(&block->page)); - UT_LIST_REMOVE(list, buf_pool->free, (&block->page)); + UT_LIST_REMOVE(free, buf_pool->free, (&block->page)); + + mutex_exit(&buf_pool->free_list_mutex); mutex_enter(&block->mutex); @@ -844,6 +914,8 @@ ut_ad(buf_pool_from_block(block) == buf_pool); mutex_exit(&block->mutex); + } else { + mutex_exit(&buf_pool->free_list_mutex); } return(block); @@ -868,7 +940,7 @@ ibool mon_value_was = FALSE; ibool started_monitor = FALSE; loop: - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) { @@ -951,8 +1023,10 @@ ibool lru; page_zip_set_size(&block->page.zip, zip_size); + mutex_enter(&buf_pool->LRU_list_mutex); block->page.zip.data = buf_buddy_alloc( - buf_pool, zip_size, &lru); + buf_pool, zip_size, &lru, FALSE); + mutex_exit(&buf_pool->LRU_list_mutex); UNIV_MEM_DESC(block->page.zip.data, zip_size, block); } else { @@ -960,7 +1034,7 @@ block->page.zip.data = NULL; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; @@ -972,7 +1046,7 @@ /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); freed = buf_LRU_search_and_free_block(buf_pool, n_iterations); @@ -1058,7 +1132,8 @@ ulint new_len; ut_a(buf_pool->LRU_old); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN); ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX); #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5) @@ -1124,7 +1199,8 @@ { buf_page_t* bpage; - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN); /* We first initialize all blocks in the LRU list as old and then use @@ -1159,13 +1235,14 @@ ut_ad(buf_pool); ut_ad(bpage); ut_ad(buf_page_in_file(bpage)); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); if (buf_page_belongs_to_unzip_LRU(bpage)) { buf_block_t* block = (buf_block_t*) bpage; ut_ad(block->in_unzip_LRU_list); - ut_d(block->in_unzip_LRU_list = FALSE); + block->in_unzip_LRU_list = FALSE; UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block); } @@ -1183,7 +1260,8 @@ ut_ad(buf_pool); ut_ad(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_a(buf_page_in_file(bpage)); @@ -1260,12 +1338,13 @@ ut_ad(buf_pool); ut_ad(block); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_a(buf_page_belongs_to_unzip_LRU(&block->page)); ut_ad(!block->in_unzip_LRU_list); - ut_d(block->in_unzip_LRU_list = TRUE); + block->in_unzip_LRU_list = TRUE; if (old) { UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block); @@ -1286,7 +1365,8 @@ ut_ad(buf_pool); ut_ad(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_a(buf_page_in_file(bpage)); @@ -1337,7 +1417,8 @@ ut_ad(buf_pool); ut_ad(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_a(buf_page_in_file(bpage)); ut_ad(!bpage->in_LRU_list); @@ -1416,7 +1497,8 @@ { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); if (bpage->old) { buf_pool->stat.n_pages_made_young++; @@ -1458,19 +1540,20 @@ buf_page_t* bpage, /*!< in: block to be freed */ ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released) + ibool* buf_pool_mutex_released, /*!< in: pointer to a variable that will be assigned TRUE if buf_pool_mutex was temporarily released, or NULL */ + ibool have_LRU_mutex) { buf_page_t* b = NULL; buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); mutex_t* block_mutex = buf_page_get_mutex(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(mutex_own(block_mutex)); ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); + //ut_ad(bpage->in_LRU_list); ut_ad(!bpage->in_flush_list == !bpage->oldest_modification); #if UNIV_WORD_SIZE == 4 /* On 32-bit systems, there is no padding in buf_page_t. On @@ -1479,7 +1562,7 @@ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); #endif - if (!buf_page_can_relocate(bpage)) { + if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) { /* Do not free buffer-fixed or I/O-fixed blocks. */ return(BUF_LRU_NOT_FREED); @@ -1511,15 +1594,15 @@ If it cannot be allocated (without freeing a block from the LRU list), refuse to free bpage. */ alloc: - buf_pool_mutex_exit_forbid(buf_pool); - b = buf_buddy_alloc(buf_pool, sizeof *b, NULL); - buf_pool_mutex_exit_allow(buf_pool); + //buf_pool_mutex_exit_forbid(buf_pool); + b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE); + //buf_pool_mutex_exit_allow(buf_pool); if (UNIV_UNLIKELY(!b)) { return(BUF_LRU_CANNOT_RELOCATE); } - memcpy(b, bpage, sizeof *b); + //memcpy(b, bpage, sizeof *b); } #ifdef UNIV_DEBUG @@ -1530,6 +1613,39 @@ } #endif /* UNIV_DEBUG */ + /* not to break latch order, must re-enter block_mutex */ + mutex_exit(block_mutex); + + if (!have_LRU_mutex) + mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */ + rw_lock_x_lock(&buf_pool->page_hash_latch); + mutex_enter(block_mutex); + + /* recheck states of block */ + if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage) + || !buf_page_can_relocate(bpage)) { +not_freed: + if (b) { + buf_buddy_free(buf_pool, b, sizeof *b, TRUE); + } + if (!have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + return(BUF_LRU_NOT_FREED); + } else if (zip || !bpage->zip.data) { + if (bpage->oldest_modification) + goto not_freed; + } else if (bpage->oldest_modification) { + if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY); + goto not_freed; + } + } + + if (b) { + memcpy(b, bpage, sizeof *b); + } + if (buf_LRU_block_remove_hashed_page(bpage, zip) != BUF_BLOCK_ZIP_FREE) { ut_a(bpage->buf_fix_count == 0); @@ -1546,6 +1662,10 @@ ut_a(!hash_b); + while (prev_b && !prev_b->in_LRU_list) { + prev_b = UT_LIST_GET_PREV(LRU, prev_b); + } + b->state = b->oldest_modification ? BUF_BLOCK_ZIP_DIRTY : BUF_BLOCK_ZIP_PAGE; @@ -1642,7 +1762,9 @@ *buf_pool_mutex_released = TRUE; } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); mutex_exit(block_mutex); /* Remove possible adaptive hash index on the page. @@ -1674,7 +1796,9 @@ : BUF_NO_CHECKSUM_MAGIC); } - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + if (have_LRU_mutex) + mutex_enter(&buf_pool->LRU_list_mutex); mutex_enter(block_mutex); if (b) { @@ -1684,13 +1808,17 @@ mutex_exit(&buf_pool->zip_mutex); } - buf_LRU_block_free_hashed_page((buf_block_t*) bpage); + buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE); } else { /* The block_mutex should have been released by buf_LRU_block_remove_hashed_page() when it returns BUF_BLOCK_ZIP_FREE. */ ut_ad(block_mutex == &buf_pool->zip_mutex); mutex_enter(block_mutex); + + if (!have_LRU_mutex) + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); } return(BUF_LRU_FREED); @@ -1702,13 +1830,14 @@ void buf_LRU_block_free_non_file_page( /*=============================*/ - buf_block_t* block) /*!< in: block, must not contain a file page */ + buf_block_t* block, /*!< in: block, must not contain a file page */ + ibool have_page_hash_mutex) { void* data; buf_pool_t* buf_pool = buf_pool_from_block(block); ut_ad(block); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(mutex_own(&block->mutex)); switch (buf_block_get_state(block)) { @@ -1742,18 +1871,21 @@ if (data) { block->page.zip.data = NULL; mutex_exit(&block->mutex); - buf_pool_mutex_exit_forbid(buf_pool); + //buf_pool_mutex_exit_forbid(buf_pool); buf_buddy_free( - buf_pool, data, page_zip_get_size(&block->page.zip)); + buf_pool, data, page_zip_get_size(&block->page.zip), + have_page_hash_mutex); - buf_pool_mutex_exit_allow(buf_pool); + //buf_pool_mutex_exit_allow(buf_pool); mutex_enter(&block->mutex); page_zip_set_size(&block->page.zip, 0); } - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page)); + mutex_enter(&buf_pool->free_list_mutex); + UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page)); ut_d(block->page.in_free_list = TRUE); + mutex_exit(&buf_pool->free_list_mutex); UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE); } @@ -1783,7 +1915,11 @@ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); ut_ad(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)); +#endif ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); @@ -1891,7 +2027,9 @@ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG mutex_exit(buf_page_get_mutex(bpage)); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); buf_print(); buf_LRU_print(); buf_validate(); @@ -1912,17 +2050,17 @@ ut_a(bpage->zip.data); ut_a(buf_page_get_zip_size(bpage)); - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage); + UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage); mutex_exit(&buf_pool->zip_mutex); - buf_pool_mutex_exit_forbid(buf_pool); + //buf_pool_mutex_exit_forbid(buf_pool); buf_buddy_free( buf_pool, bpage->zip.data, - page_zip_get_size(&bpage->zip)); + page_zip_get_size(&bpage->zip), TRUE); - buf_buddy_free(buf_pool, bpage, sizeof(*bpage)); - buf_pool_mutex_exit_allow(buf_pool); + buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE); + //buf_pool_mutex_exit_allow(buf_pool); UNIV_MEM_UNDESC(bpage); return(BUF_BLOCK_ZIP_FREE); @@ -1945,13 +2083,13 @@ ut_ad(!bpage->in_flush_list); ut_ad(!bpage->in_LRU_list); mutex_exit(&((buf_block_t*) bpage)->mutex); - buf_pool_mutex_exit_forbid(buf_pool); + //buf_pool_mutex_exit_forbid(buf_pool); buf_buddy_free( buf_pool, data, - page_zip_get_size(&bpage->zip)); + page_zip_get_size(&bpage->zip), TRUE); - buf_pool_mutex_exit_allow(buf_pool); + //buf_pool_mutex_exit_allow(buf_pool); mutex_enter(&((buf_block_t*) bpage)->mutex); page_zip_set_size(&bpage->zip, 0); } @@ -1977,18 +2115,19 @@ void buf_LRU_block_free_hashed_page( /*===========================*/ - buf_block_t* block) /*!< in: block, must contain a file page and + buf_block_t* block, /*!< in: block, must contain a file page and be in a state where it can be freed */ + ibool have_page_hash_mutex) { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_block(block); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_block(block); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif ut_ad(mutex_own(&block->mutex)); buf_block_set_state(block, BUF_BLOCK_MEMORY); - buf_LRU_block_free_non_file_page(block); + buf_LRU_block_free_non_file_page(block, have_page_hash_mutex); } /**********************************************************************//** @@ -2015,7 +2154,8 @@ } if (adjust) { - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); if (ratio != buf_pool->LRU_old_ratio) { buf_pool->LRU_old_ratio = ratio; @@ -2027,7 +2167,8 @@ } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } else { buf_pool->LRU_old_ratio = ratio; } @@ -2132,7 +2273,8 @@ ulint new_len; ut_ad(buf_pool); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { @@ -2193,16 +2335,22 @@ ut_a(buf_pool->LRU_old_len == old_len); - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, + mutex_exit(&buf_pool->LRU_list_mutex); + mutex_enter(&buf_pool->free_list_mutex); + + UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free, ut_ad(ut_list_node_313->in_free_list)); for (bpage = UT_LIST_GET_FIRST(buf_pool->free); bpage != NULL; - bpage = UT_LIST_GET_NEXT(list, bpage)) { + bpage = UT_LIST_GET_NEXT(free, bpage)) { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED); } + mutex_exit(&buf_pool->free_list_mutex); + mutex_enter(&buf_pool->LRU_list_mutex); + UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU, ut_ad(ut_list_node_313->in_unzip_LRU_list && ut_list_node_313->page.in_LRU_list)); @@ -2216,7 +2364,8 @@ ut_a(buf_page_belongs_to_unzip_LRU(&block->page)); } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } /**********************************************************************//** @@ -2252,7 +2401,8 @@ const buf_page_t* bpage; ut_ad(buf_pool); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->LRU_list_mutex); bpage = UT_LIST_GET_FIRST(buf_pool->LRU); @@ -2309,7 +2459,8 @@ bpage = UT_LIST_GET_NEXT(LRU, bpage); } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->LRU_list_mutex); } /**********************************************************************//** diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c --- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900 +++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900 @@ -311,6 +311,7 @@ return(0); } + buf_pool_mutex_exit(buf_pool); /* Check that almost all pages in the area have been accessed; if offset == low, the accesses must be in a descending order, otherwise, @@ -329,6 +330,7 @@ fail_count = 0; + rw_lock_s_lock(&buf_pool->page_hash_latch); for (i = low; i < high; i++) { bpage = buf_page_hash_get(buf_pool, space, i); @@ -356,7 +358,8 @@ if (fail_count > threshold) { /* Too many failures: return */ - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(0); } @@ -371,7 +374,8 @@ bpage = buf_page_hash_get(buf_pool, space, offset); if (bpage == NULL) { - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(0); } @@ -397,7 +401,8 @@ pred_offset = fil_page_get_prev(frame); succ_offset = fil_page_get_next(frame); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); if ((offset == low) && (succ_offset == offset + 1)) { diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900 @@ -245,6 +245,10 @@ # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */ {&buf_pool_mutex_key, "buf_pool_mutex", 0}, {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0}, + {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0}, + {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0}, + {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0}, + {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0}, {&cache_last_read_mutex_key, "cache_last_read_mutex", 0}, {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0}, {&dict_sys_mutex_key, "dict_sys_mutex", 0}, @@ -295,6 +299,7 @@ {&archive_lock_key, "archive_lock", 0}, # endif /* UNIV_LOG_ARCHIVE */ {&btr_search_latch_key, "btr_search_latch", 0}, + {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0}, # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK {&buf_block_lock_key, "buf_block_lock", 0}, # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */ diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc --- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900 +++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900 @@ -1565,7 +1565,8 @@ buf_pool = buf_pool_from_array(i); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + mutex_enter(&buf_pool->zip_free_mutex); for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) { buf_buddy_stat_t* buddy_stat; @@ -1595,7 +1596,8 @@ } } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + mutex_exit(&buf_pool->zip_free_mutex); if (status) { break; diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c --- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900 +++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900 @@ -3783,9 +3783,11 @@ ulint fold = buf_page_address_fold(space, page_no); buf_pool_t* buf_pool = buf_pool_get(space, page_no); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); if (UNIV_LIKELY_NULL(bpage)) { /* A buffer pool watch has been set or the diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h --- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900 @@ -51,10 +51,11 @@ buf_pool_t* buf_pool, /*!< buffer pool in which the block resides */ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */ - ibool* lru) /*!< in: pointer to a variable that will be assigned + ibool* lru, /*!< in: pointer to a variable that will be assigned TRUE if storage was allocated from the LRU list and buf_pool->mutex was temporarily released, or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) __attribute__((malloc)); /**********************************************************************//** @@ -67,7 +68,8 @@ /*!< buffer pool in which the block resides */ void* buf, /*!< in: block to be freed, must not be pointed to by the buffer pool */ - ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */ + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */ + ibool have_page_hash_mutex) __attribute__((nonnull)); #ifndef UNIV_NONINL diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic --- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900 @@ -46,10 +46,11 @@ /*!< in: buffer pool in which the page resides */ ulint i, /*!< in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ - ibool* lru) /*!< in: pointer to a variable that will be assigned + ibool* lru, /*!< in: pointer to a variable that will be assigned TRUE if storage was allocated from the LRU list and buf_pool->mutex was temporarily released, or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) __attribute__((malloc)); /**********************************************************************//** @@ -61,8 +62,9 @@ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ void* buf, /*!< in: block to be freed, must not be pointed to by the buffer pool */ - ulint i) /*!< in: index of buf_pool->zip_free[], + ulint i, /*!< in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ + ibool have_page_hash_mutex) __attribute__((nonnull)); /**********************************************************************//** @@ -102,16 +104,17 @@ the page resides */ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */ - ibool* lru) /*!< in: pointer to a variable + ibool* lru, /*!< in: pointer to a variable that will be assigned TRUE if storage was allocated from the LRU list and buf_pool->mutex was temporarily released, or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) { - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); - return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru)); + return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex)); } /**********************************************************************//** @@ -123,12 +126,25 @@ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ void* buf, /*!< in: block to be freed, must not be pointed to by the buffer pool */ - ulint size) /*!< in: block size, up to + ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */ + ibool have_page_hash_mutex) { - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + + if (!have_page_hash_mutex) { + mutex_enter(&buf_pool->LRU_list_mutex); + rw_lock_x_lock(&buf_pool->page_hash_latch); + } - buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size)); + mutex_enter(&buf_pool->zip_free_mutex); + buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE); + mutex_exit(&buf_pool->zip_free_mutex); + + if (!have_page_hash_mutex) { + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } } #ifdef UNIV_MATERIALIZE diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h --- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900 +++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900 @@ -203,6 +203,20 @@ /*==========================*/ /********************************************************************//** +*/ +UNIV_INLINE +void +buf_pool_page_hash_x_lock_all(void); +/*================================*/ + +/********************************************************************//** +*/ +UNIV_INLINE +void +buf_pool_page_hash_x_unlock_all(void); +/*==================================*/ + +/********************************************************************//** Creates the buffer pool. @return own: buf_pool object, NULL if not enough memory or error */ UNIV_INTERN @@ -832,6 +846,15 @@ const buf_page_t* bpage) /*!< in: pointer to control block */ __attribute__((pure)); +/************************************************************************* +Gets the mutex of a block and enter the mutex with consistency. */ +UNIV_INLINE +mutex_t* +buf_page_get_mutex_enter( +/*=========================*/ + const buf_page_t* bpage) /*!< in: pointer to control block */ + __attribute__((pure)); + /*********************************************************************//** Get the flush type of a page. @return flush type */ @@ -1313,7 +1336,7 @@ All these are protected by buf_pool->mutex. */ /* @{ */ - UT_LIST_NODE_T(buf_page_t) list; + /* UT_LIST_NODE_T(buf_page_t) list; */ /*!< based on state, this is a list node, protected either by buf_pool->mutex or by @@ -1341,6 +1364,10 @@ BUF_BLOCK_REMOVE_HASH or BUF_BLOCK_READY_IN_USE. */ + /* resplit for optimistic use */ + UT_LIST_NODE_T(buf_page_t) free; + UT_LIST_NODE_T(buf_page_t) flush_list; + UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */ #ifdef UNIV_DEBUG ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list; when buf_pool->flush_list_mutex is @@ -1433,11 +1460,11 @@ a block is in the unzip_LRU list if page.state == BUF_BLOCK_FILE_PAGE and page.zip.data != NULL */ -#ifdef UNIV_DEBUG +//#ifdef UNIV_DEBUG ibool in_unzip_LRU_list;/*!< TRUE if the page is in the decompressed LRU list; used in debugging */ -#endif /* UNIV_DEBUG */ +//#endif /* UNIV_DEBUG */ mutex_t mutex; /*!< mutex protecting this block: state (also protected by the buffer pool mutex), io_fix, buf_fix_count, @@ -1612,6 +1639,11 @@ pool instance, protects compressed only pages (of type buf_page_t, not buf_block_t */ + mutex_t LRU_list_mutex; + rw_lock_t page_hash_latch; + mutex_t free_list_mutex; + mutex_t zip_free_mutex; + mutex_t zip_hash_mutex; ulint instance_no; /*!< Array index of this buffer pool instance */ ulint old_pool_size; /*!< Old pool size in bytes */ diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic --- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900 @@ -274,7 +274,7 @@ case BUF_BLOCK_ZIP_FREE: /* This is a free page in buf_pool->zip_free[]. Such pages should only be accessed by the buddy allocator. */ - ut_error; + /* ut_error; */ /* optimistic */ break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: @@ -317,9 +317,14 @@ { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + if (buf_pool_watch_is_sentinel(buf_pool, bpage)) { + /* TODO: this code is the interim. should be confirmed later. */ + return(&buf_pool->zip_mutex); + } + switch (buf_page_get_state(bpage)) { case BUF_BLOCK_ZIP_FREE: - ut_error; + /* ut_error; */ /* optimistic */ return(NULL); case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: @@ -329,6 +334,28 @@ } } +/************************************************************************* +Gets the mutex of a block and enter the mutex with consistency. */ +UNIV_INLINE +mutex_t* +buf_page_get_mutex_enter( +/*=========================*/ + const buf_page_t* bpage) /*!< in: pointer to control block */ +{ + mutex_t* block_mutex; + + while(1) { + block_mutex = buf_page_get_mutex(bpage); + if (!block_mutex) + return block_mutex; + + mutex_enter(block_mutex); + if (block_mutex == buf_page_get_mutex(bpage)) + return block_mutex; + mutex_exit(block_mutex); + } +} + /*********************************************************************//** Get the flush type of a page. @return flush type */ @@ -425,8 +452,8 @@ enum buf_io_fix io_fix) /*!< in: io_fix state */ { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif ut_ad(mutex_own(buf_page_get_mutex(bpage))); @@ -456,14 +483,14 @@ const buf_page_t* bpage) /*!< control block being relocated */ { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); + //ut_ad(bpage->in_LRU_list); - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE && bpage->buf_fix_count == 0); } @@ -477,8 +504,8 @@ const buf_page_t* bpage) /*!< in: control block */ { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif ut_ad(buf_page_in_file(bpage)); @@ -498,7 +525,8 @@ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); #endif /* UNIV_DEBUG */ ut_a(buf_page_in_file(bpage)); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_ad(bpage->in_LRU_list); #ifdef UNIV_LRU_DEBUG @@ -545,9 +573,10 @@ ulint time_ms) /*!< in: ut_time_ms() */ { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif + ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_a(buf_page_in_file(bpage)); if (!bpage->access_time) { @@ -761,19 +790,19 @@ /*===========*/ buf_block_t* block) /*!< in, own: block to be freed */ { - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block); + //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); mutex_enter(&block->mutex); ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE); - buf_LRU_block_free_non_file_page(block); + buf_LRU_block_free_non_file_page(block, FALSE); mutex_exit(&block->mutex); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); } #endif /* !UNIV_HOTBACKUP */ @@ -821,17 +850,17 @@ page frame */ { ib_uint64_t lsn; - mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); + mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - if (buf_page_in_file(bpage)) { + if (block_mutex && buf_page_in_file(bpage)) { lsn = bpage->newest_modification; } else { lsn = 0; } - mutex_exit(block_mutex); + if (block_mutex) { + mutex_exit(block_mutex); + } return(lsn); } @@ -849,7 +878,7 @@ #ifdef UNIV_SYNC_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block); - ut_ad((buf_pool_mutex_own(buf_pool) + ut_ad((mutex_own(&buf_pool->LRU_list_mutex) && (block->page.buf_fix_count == 0)) || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); #endif /* UNIV_SYNC_DEBUG */ @@ -979,7 +1008,11 @@ buf_page_t* bpage; ut_ad(buf_pool); - ut_ad(buf_pool_mutex_own(buf_pool)); + //ut_ad(buf_pool_mutex_own(buf_pool)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX) + || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED)); +#endif ut_ad(fold == buf_page_address_fold(space, offset)); /* Look for the page in the hash table */ @@ -1064,11 +1097,13 @@ const buf_page_t* bpage; buf_pool_t* buf_pool = buf_pool_get(space, offset); - buf_pool_mutex_enter(buf_pool); + //buf_pool_mutex_enter(buf_pool); + rw_lock_s_lock(&buf_pool->page_hash_latch); bpage = buf_page_hash_get(buf_pool, space, offset); - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + rw_lock_s_unlock(&buf_pool->page_hash_latch); return(bpage != NULL); } @@ -1196,4 +1231,38 @@ buf_pool_mutex_exit(buf_pool); } } + +/********************************************************************//** +*/ +UNIV_INLINE +void +buf_pool_page_hash_x_lock_all(void) +/*===============================*/ +{ + ulint i; + + for (i = 0; i < srv_buf_pool_instances; i++) { + buf_pool_t* buf_pool; + + buf_pool = buf_pool_from_array(i); + rw_lock_x_lock(&buf_pool->page_hash_latch); + } +} + +/********************************************************************//** +*/ +UNIV_INLINE +void +buf_pool_page_hash_x_unlock_all(void) +/*=================================*/ +{ + ulint i; + + for (i = 0; i < srv_buf_pool_instances; i++) { + buf_pool_t* buf_pool; + + buf_pool = buf_pool_from_array(i); + rw_lock_x_unlock(&buf_pool->page_hash_latch); + } +} #endif /* !UNIV_HOTBACKUP */ diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h --- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900 @@ -113,10 +113,11 @@ buf_page_t* bpage, /*!< in: block to be freed */ ibool zip, /*!< in: TRUE if should remove also the compressed page of an uncompressed page */ - ibool* buf_pool_mutex_released); + ibool* buf_pool_mutex_released, /*!< in: pointer to a variable that will be assigned TRUE if buf_pool->mutex was temporarily released, or NULL */ + ibool have_LRU_mutex); /******************************************************************//** Try to free a replaceable block. @return TRUE if found and freed */ @@ -163,7 +164,8 @@ void buf_LRU_block_free_non_file_page( /*=============================*/ - buf_block_t* block); /*!< in: block, must not contain a file page */ + buf_block_t* block, /*!< in: block, must not contain a file page */ + ibool have_page_hash_mutex); /******************************************************************//** Adds a block to the LRU list. */ UNIV_INTERN diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h --- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900 @@ -112,6 +112,7 @@ extern mysql_pfs_key_t archive_lock_key; # endif /* UNIV_LOG_ARCHIVE */ extern mysql_pfs_key_t btr_search_latch_key; +extern mysql_pfs_key_t buf_pool_page_hash_key; extern mysql_pfs_key_t buf_block_lock_key; # ifdef UNIV_SYNC_DEBUG extern mysql_pfs_key_t buf_block_debug_latch_key; diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h --- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900 @@ -75,6 +75,10 @@ extern mysql_pfs_key_t buffer_block_mutex_key; extern mysql_pfs_key_t buf_pool_mutex_key; extern mysql_pfs_key_t buf_pool_zip_mutex_key; +extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key; +extern mysql_pfs_key_t buf_pool_free_list_mutex_key; +extern mysql_pfs_key_t buf_pool_zip_free_mutex_key; +extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key; extern mysql_pfs_key_t cache_last_read_mutex_key; extern mysql_pfs_key_t dict_foreign_err_mutex_key; extern mysql_pfs_key_t dict_sys_mutex_key; @@ -660,7 +664,7 @@ #define SYNC_TRX_LOCK_HEAP 298 #define SYNC_TRX_SYS_HEADER 290 #define SYNC_LOG 170 -#define SYNC_LOG_FLUSH_ORDER 147 +#define SYNC_LOG_FLUSH_ORDER 156 #define SYNC_RECV 168 #define SYNC_WORK_QUEUE 162 #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */ @@ -670,8 +674,13 @@ SYNC_SEARCH_SYS, as memory allocation can call routines there! Otherwise the level is SYNC_MEM_HASH. */ +#define SYNC_BUF_LRU_LIST 158 +#define SYNC_BUF_PAGE_HASH 157 +#define SYNC_BUF_BLOCK 155 /* Block mutex */ +#define SYNC_BUF_FREE_LIST 153 +#define SYNC_BUF_ZIP_FREE 152 +#define SYNC_BUF_ZIP_HASH 151 #define SYNC_BUF_POOL 150 /* Buffer pool mutex */ -#define SYNC_BUF_BLOCK 146 /* Block mutex */ #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */ #define SYNC_DOUBLEWRITE 140 #define SYNC_ANY_LATCH 135 @@ -703,7 +712,7 @@ os_fast_mutex; /*!< We use this OS mutex in place of lock_word when atomic operations are not enabled */ #endif - ulint waiters; /*!< This ulint is set to 1 if there are (or + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or may be) threads waiting in the global wait array for this mutex to be released. Otherwise, this is 0. */ diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900 @@ -3094,7 +3094,7 @@ level += log_sys->max_checkpoint_age - (lsn - oldest_modification); } - bpage = UT_LIST_GET_NEXT(list, bpage); + bpage = UT_LIST_GET_NEXT(flush_list, bpage); n_blocks++; } @@ -3180,7 +3180,7 @@ found = TRUE; break; } - bpage = UT_LIST_GET_NEXT(list, bpage); + bpage = UT_LIST_GET_NEXT(flush_list, bpage); new_blocks_num++; } if (!found) { diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c --- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900 @@ -265,7 +265,7 @@ mutex->lock_word = 0; #endif mutex->event = os_event_create(NULL); - mutex_set_waiters(mutex, 0); + mutex->waiters = 0; #ifdef UNIV_DEBUG mutex->magic_n = MUTEX_MAGIC_N; #endif /* UNIV_DEBUG */ @@ -444,6 +444,15 @@ mutex_t* mutex, /*!< in: mutex */ ulint n) /*!< in: value to set */ { +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + ut_ad(mutex); + + if (n) { + os_compare_and_swap_ulint(&mutex->waiters, 0, 1); + } else { + os_compare_and_swap_ulint(&mutex->waiters, 1, 0); + } +#else volatile ulint* ptr; /* declared volatile to ensure that the value is stored to memory */ ut_ad(mutex); @@ -452,6 +461,7 @@ *ptr = n; /* Here we assume that the write of a single word in memory is atomic */ +#endif } /******************************************************************//** @@ -1193,7 +1203,12 @@ ut_error; } break; + case SYNC_BUF_LRU_LIST: case SYNC_BUF_FLUSH_LIST: + case SYNC_BUF_PAGE_HASH: + case SYNC_BUF_FREE_LIST: + case SYNC_BUF_ZIP_FREE: + case SYNC_BUF_ZIP_HASH: case SYNC_BUF_POOL: /* We can have multiple mutexes of this type therefore we can only check whether the greater than condition holds. */ @@ -1211,7 +1226,8 @@ buffer block (block->mutex or buf_pool->zip_mutex). */ if (!sync_thread_levels_g(array, level, FALSE)) { ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL)); + /* the exact rule is not fixed yet, for now */ + //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST)); } break; case SYNC_REC_LOCK: