# name : innodb_opt_lru_count.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! --- a/storage/innobase/buf/buf0buddy.c +++ b/storage/innobase/buf/buf0buddy.c @@ -123,7 +123,7 @@ ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i)); - bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); + bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]); if (bpage) { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -890,9 +890,9 @@ block->page.in_zip_hash = FALSE; block->page.in_flush_list = FALSE; block->page.in_free_list = FALSE; - block->page.in_LRU_list = FALSE; block->in_unzip_LRU_list = FALSE; #endif /* UNIV_DEBUG */ + block->page.in_LRU_list = FALSE; #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG block->n_pointers = 0; #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ @@ -1403,7 +1403,7 @@ memcpy(dpage, bpage, sizeof *dpage); - ut_d(bpage->in_LRU_list = FALSE); + bpage->in_LRU_list = FALSE; ut_d(bpage->in_page_hash = FALSE); /* relocate buf_pool->LRU */ @@ -3225,8 +3225,8 @@ bpage->in_zip_hash = FALSE; bpage->in_flush_list = FALSE; bpage->in_free_list = FALSE; - bpage->in_LRU_list = FALSE; #endif /* UNIV_DEBUG */ + bpage->in_LRU_list = FALSE; ut_d(bpage->in_page_hash = TRUE); @@ -3391,7 +3391,7 @@ ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE); /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(buf_pool); + buf_flush_free_margin(buf_pool, FALSE); frame = block->frame; --- a/storage/innobase/buf/buf0flu.c +++ b/storage/innobase/buf/buf0flu.c @@ -431,19 +431,21 @@ buf_page_in_file(bpage) and in the LRU list */ { #ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool_mutex_own(buf_pool)); + //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + //ut_ad(buf_pool_mutex_own(buf_pool)); #endif - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - ut_ad(bpage->in_LRU_list); + //ut_ad(mutex_own(buf_page_get_mutex(bpage))); + //ut_ad(bpage->in_LRU_list); - if (UNIV_LIKELY(buf_page_in_file(bpage))) { + if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) { return(bpage->oldest_modification == 0 && buf_page_get_io_fix(bpage) == BUF_IO_NONE && bpage->buf_fix_count == 0); } + /* permited not to own LRU_mutex.. */ +/* ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: buffer block state %lu" @@ -451,6 +453,7 @@ (ulong) buf_page_get_state(bpage)); ut_print_buf(stderr, bpage, sizeof(buf_page_t)); putc('\n', stderr); +*/ return(FALSE); } @@ -2049,8 +2052,14 @@ buf_page_t* bpage; ulint n_replaceable; ulint distance = 0; + ibool have_LRU_mutex = FALSE; - buf_pool_mutex_enter(buf_pool); + if(UT_LIST_GET_LEN(buf_pool->unzip_LRU)) + have_LRU_mutex = TRUE; +retry: + //buf_pool_mutex_enter(buf_pool); + if (have_LRU_mutex) + buf_pool_mutex_enter(buf_pool); n_replaceable = UT_LIST_GET_LEN(buf_pool->free); @@ -2061,7 +2070,13 @@ + BUF_FLUSH_EXTRA_MARGIN(buf_pool)) && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_t* block_mutex; + if (!bpage->in_LRU_list) { + /* reatart. but it is very optimistic */ + bpage = UT_LIST_GET_LAST(buf_pool->LRU); + continue; + } + block_mutex = buf_page_get_mutex(bpage); mutex_enter(block_mutex); @@ -2076,11 +2091,18 @@ bpage = UT_LIST_GET_PREV(LRU, bpage); } - buf_pool_mutex_exit(buf_pool); + //buf_pool_mutex_exit(buf_pool); + if (have_LRU_mutex) + buf_pool_mutex_exit(buf_pool); if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) { return(0); + } else if (!have_LRU_mutex) { + /* confirm it again with LRU_mutex for exactness */ + have_LRU_mutex = TRUE; + distance = 0; + goto retry; } return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool) @@ -2098,7 +2120,8 @@ void buf_flush_free_margin( /*==================*/ - buf_pool_t* buf_pool) /*!< in: Buffer pool instance */ + buf_pool_t* buf_pool, /*!< in: Buffer pool instance */ + ibool wait) { ulint n_to_flush; @@ -2109,7 +2132,7 @@ n_flushed = buf_flush_LRU(buf_pool, n_to_flush); - if (n_flushed == ULINT_UNDEFINED) { + if (wait && n_flushed == ULINT_UNDEFINED) { /* There was an LRU type flush batch already running; let us wait for it to end */ @@ -2122,8 +2145,9 @@ Flushes pages from the end of all the LRU lists. */ UNIV_INTERN void -buf_flush_free_margins(void) +buf_flush_free_margins( /*========================*/ + ibool wait) { ulint i; @@ -2132,7 +2156,7 @@ buf_pool = buf_pool_from_array(i); - buf_flush_free_margin(buf_pool); + buf_flush_free_margin(buf_pool, wait); } } --- a/storage/innobase/buf/buf0lru.c +++ b/storage/innobase/buf/buf0lru.c @@ -934,7 +934,7 @@ /* No free block was found: try to flush the LRU list */ - buf_flush_free_margin(buf_pool); + buf_flush_free_margin(buf_pool, TRUE); ++srv_buf_pool_wait_free; os_aio_simulated_wake_handler_threads(); @@ -1131,7 +1131,7 @@ /* Remove the block from the LRU list */ UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage); - ut_d(bpage->in_LRU_list = FALSE); + bpage->in_LRU_list = FALSE; buf_unzip_LRU_remove_block_if_needed(bpage); @@ -1210,7 +1210,7 @@ ut_ad(!bpage->in_LRU_list); UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage); - ut_d(bpage->in_LRU_list = TRUE); + bpage->in_LRU_list = TRUE; if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { @@ -1280,7 +1280,7 @@ buf_pool->LRU_old_len++; } - ut_d(bpage->in_LRU_list = TRUE); + bpage->in_LRU_list = TRUE; if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { @@ -1524,7 +1524,7 @@ buf_page_set_old(b, buf_page_is_old(b)); #endif /* UNIV_LRU_DEBUG */ } else { - ut_d(b->in_LRU_list = FALSE); + b->in_LRU_list = FALSE; buf_LRU_add_block_low(b, buf_page_is_old(b)); } --- a/storage/innobase/buf/buf0rea.c +++ b/storage/innobase/buf/buf0rea.c @@ -367,7 +367,7 @@ } /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(buf_pool); + buf_flush_free_margin(buf_pool, TRUE); /* Increment number of I/O operations used for LRU policy. */ buf_LRU_stat_inc_io(); @@ -641,7 +641,7 @@ os_aio_simulated_wake_handler_threads(); /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(buf_pool); + buf_flush_free_margin(buf_pool, TRUE); #ifdef UNIV_DEBUG if (buf_debug_prints && (count > 0)) { @@ -729,7 +729,7 @@ os_aio_simulated_wake_handler_threads(); /* Flush pages from the end of all the LRU lists if necessary */ - buf_flush_free_margins(); + buf_flush_free_margins(FALSE); #ifdef UNIV_DEBUG if (buf_debug_prints) { @@ -823,7 +823,7 @@ os_aio_simulated_wake_handler_threads(); /* Flush pages from the end of all the LRU lists if necessary */ - buf_flush_free_margins(); + buf_flush_free_margins(FALSE); #ifdef UNIV_DEBUG if (buf_debug_prints) { --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1424,11 +1424,11 @@ UT_LIST_NODE_T(buf_page_t) LRU; /*!< node of the LRU list */ -#ifdef UNIV_DEBUG +//#ifdef UNIV_DEBUG ibool in_LRU_list; /*!< TRUE if the page is in the LRU list; used in debugging */ -#endif /* UNIV_DEBUG */ +//#endif /* UNIV_DEBUG */ unsigned old:1; /*!< TRUE if the block is in the old blocks in buf_pool->LRU_old */ unsigned freed_page_clock:31;/*!< the value of --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -65,13 +65,15 @@ void buf_flush_free_margin( /*==================*/ - buf_pool_t* buf_pool); + buf_pool_t* buf_pool, + ibool wait); /*********************************************************************//** Flushes pages from the end of all the LRU lists. */ UNIV_INTERN void -buf_flush_free_margins(void); +buf_flush_free_margins( /*=========================*/ + ibool wait); #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** Initializes a page for writing to the tablespace. */