# name : innodb_dict_size_limit.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! --- a/storage/innobase/btr/btr0sea.c +++ b/storage/innobase/btr/btr0sea.c @@ -1187,6 +1187,178 @@ mem_free(folds); } +/************************************************************************ +Drops a page hash index based on index */ +UNIV_INTERN +void +btr_search_drop_page_hash_index_on_index( +/*=====================================*/ + dict_index_t* index) /* in: record descriptor */ +{ + + hash_table_t* table; + buf_block_t* block; + ulint n_fields; + ulint n_bytes; + const page_t* page; + const rec_t* rec; + ulint fold; + ulint prev_fold; + index_id_t index_id; + ulint n_cached; + ulint n_recs; + ulint* folds; + ulint i, j; + mem_heap_t* heap = NULL; + ulint* offsets; + ibool released_search_latch; + + rw_lock_s_lock(&btr_search_latch); + + table = btr_search_sys->hash_index; + + for (j = 0; j < srv_buf_pool_instances; j++) { + buf_pool_t* buf_pool; + + buf_pool = buf_pool_from_array(j); + + do { + buf_chunk_t* chunks = buf_pool->chunks; + buf_chunk_t* chunk = chunks + buf_pool->n_chunks; + + released_search_latch = FALSE; + + while (--chunk >= chunks) { + block = chunk->blocks; + i = chunk->size; + +retry: + for (; i--; block++) { + if (buf_block_get_state(block) + != BUF_BLOCK_FILE_PAGE + || block->index != index + || !block->index) { + continue; + } + + page = block->frame; + + /* from btr_search_drop_page_hash_index() */ + n_fields = block->curr_n_fields; + n_bytes = block->curr_n_bytes; + + + /* keeping latch order */ + rw_lock_s_unlock(&btr_search_latch); + released_search_latch = TRUE; + rw_lock_x_lock(&block->lock); + + + ut_a(n_fields + n_bytes > 0); + + n_recs = page_get_n_recs(page); + + /* Calculate and cache fold values into an array for fast deletion + from the hash index */ + + folds = mem_alloc(n_recs * sizeof(ulint)); + + n_cached = 0; + + rec = page_get_infimum_rec(page); + rec = page_rec_get_next_low(rec, page_is_comp(page)); + + index_id = btr_page_get_index_id(page); + + ut_a(index_id == index->id); + + prev_fold = 0; + + offsets = NULL; + + while (!page_rec_is_supremum(rec)) { + offsets = rec_get_offsets(rec, index, offsets, + n_fields + (n_bytes > 0), &heap); + ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); + fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); + + if (fold == prev_fold && prev_fold != 0) { + + goto next_rec; + } + + /* Remove all hash nodes pointing to this page from the + hash chain */ + + folds[n_cached] = fold; + n_cached++; +next_rec: + rec = page_rec_get_next_low(rec, page_rec_is_comp(rec)); + prev_fold = fold; + } + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_empty(heap); + } + + rw_lock_x_lock(&btr_search_latch); + + if (UNIV_UNLIKELY(!block->index)) { + goto cleanup; + } + + ut_a(block->index == index); + + if (UNIV_UNLIKELY(block->curr_n_fields != n_fields) + || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) { + rw_lock_x_unlock(&btr_search_latch); + rw_lock_x_unlock(&block->lock); + + mem_free(folds); + + rw_lock_s_lock(&btr_search_latch); + goto retry; + } + + for (i = 0; i < n_cached; i++) { + + ha_remove_all_nodes_to_page(table, folds[i], page); + } + + ut_a(index->search_info->ref_count > 0); + index->search_info->ref_count--; + + block->index = NULL; + +cleanup: +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + if (UNIV_UNLIKELY(block->n_pointers)) { + /* Corruption */ + ut_print_timestamp(stderr); + fprintf(stderr, +"InnoDB: The adaptive hash index is corrupted. After dropping\n" +"InnoDB: the hash index to a page of %s, %lu hash nodes still remain.\n", + index->name, (ulong) block->n_pointers); + } +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + rw_lock_x_unlock(&btr_search_latch); + rw_lock_x_unlock(&block->lock); + + mem_free(folds); + + rw_lock_s_lock(&btr_search_latch); + } + } + } while (released_search_latch); + } + + rw_lock_s_unlock(&btr_search_latch); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } +} + /********************************************************************//** Drops a possible page hash index when a page is evicted from the buffer pool or freed in a file segment. */ --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -294,14 +294,14 @@ # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */ #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */ -/** A chunk of buffers. The buffer pool is allocated in chunks. */ -struct buf_chunk_struct{ - ulint mem_size; /*!< allocated size of the chunk */ - ulint size; /*!< size of frames[] and blocks[] */ - void* mem; /*!< pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /*!< array of buffer control blocks */ -}; +/** A chunk of buffers. The buffer pool is allocated in chunks. (moved to buf0buf.h)*/ +//struct buf_chunk_struct{ +// ulint mem_size; /*!< allocated size of the chunk */ +// ulint size; /*!< size of frames[] and blocks[] */ +// void* mem; /*!< pointer to the memory area which +// was allocated for the frames */ +// buf_block_t* blocks; /*!< array of buffer control blocks */ +//}; #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** --- a/storage/innobase/dict/dict0boot.c +++ b/storage/innobase/dict/dict0boot.c @@ -284,6 +284,7 @@ system tables */ /*-------------------------*/ table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0); + table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); @@ -336,6 +337,7 @@ /*-------------------------*/ table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0); + table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); @@ -368,6 +370,7 @@ /*-------------------------*/ table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0); + table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); @@ -413,6 +416,7 @@ /*-------------------------*/ table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0); + table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); --- a/storage/innobase/dict/dict0crea.c +++ b/storage/innobase/dict/dict0crea.c @@ -1209,6 +1209,9 @@ /* Foreign constraint system tables have already been created, and they are ok */ + table1->n_mysql_handles_opened = 1; /* for pin */ + table2->n_mysql_handles_opened = 1; /* for pin */ + mutex_exit(&(dict_sys->mutex)); return(DB_SUCCESS); @@ -1290,6 +1293,11 @@ trx_commit_for_mysql(trx); + table1 = dict_table_get_low("SYS_FOREIGN"); + table2 = dict_table_get_low("SYS_FOREIGN_COLS"); + table1->n_mysql_handles_opened = 1; /* for pin */ + table2->n_mysql_handles_opened = 1; /* for pin */ + row_mysql_unlock_data_dictionary(trx); trx_free_for_mysql(trx); --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -626,6 +626,8 @@ table = dict_table_get_on_id_low(table_id); + dict_table_LRU_trim(table); + mutex_exit(&(dict_sys->mutex)); return(table); @@ -744,6 +746,8 @@ table->n_mysql_handles_opened++; } + dict_table_LRU_trim(table); + mutex_exit(&(dict_sys->mutex)); if (table != NULL) { @@ -1264,6 +1268,64 @@ dict_mem_table_free(table); } +/************************************************************************** +Frees tables from the end of table_LRU if the dictionary cache occupies +too much space. */ +UNIV_INTERN +void +dict_table_LRU_trim( +/*================*/ + dict_table_t* self) +{ + dict_table_t* table; + dict_table_t* prev_table; + dict_foreign_t* foreign; + ulint n_removed; + ulint n_have_parent; + ulint cached_foreign_tables; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&(dict_sys->mutex))); +#endif /* UNIV_SYNC_DEBUG */ + +retry: + n_removed = n_have_parent = 0; + table = UT_LIST_GET_LAST(dict_sys->table_LRU); + + while ( srv_dict_size_limit && table + && ((dict_sys->table_hash->n_cells + + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t) + + dict_sys->size) > srv_dict_size_limit ) { + prev_table = UT_LIST_GET_PREV(table_LRU, table); + + if (table == self || table->n_mysql_handles_opened) + goto next_loop; + + cached_foreign_tables = 0; + foreign = UT_LIST_GET_FIRST(table->foreign_list); + while (foreign != NULL) { + if (foreign->referenced_table) + cached_foreign_tables++; + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + if (cached_foreign_tables == 0) { + dict_table_remove_from_cache(table); + n_removed++; + } else { + n_have_parent++; + } +next_loop: + table = prev_table; + } + + if ( srv_dict_size_limit && n_have_parent && n_removed + && ((dict_sys->table_hash->n_cells + + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t) + + dict_sys->size) > srv_dict_size_limit ) + goto retry; +} + /****************************************************************//** If the given column name is reserved for InnoDB system columns, return TRUE. @@ -1768,6 +1830,11 @@ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); ut_ad(mutex_own(&(dict_sys->mutex))); + /* remove all entry of the index from adaptive hash index, + because removing from adaptive hash index needs dict_index */ + if (btr_search_enabled && srv_dict_size_limit) + btr_search_drop_page_hash_index_on_index(index); + /* We always create search info whether or not adaptive hash index is enabled or not. */ info = index->search_info; --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -677,6 +677,8 @@ (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG}, {"dblwr_writes", (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG}, + {"dict_tables", + (char*) &export_vars.innodb_dict_tables, SHOW_LONG}, {"have_atomic_builtins", (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL}, {"log_waits", @@ -11813,6 +11815,11 @@ NULL, NULL, 0, 0, 1, 0); #endif +static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit, + PLUGIN_VAR_RQCMDARG, + "Limit the allocated memory for dictionary cache. (0: unlimited)", + NULL, NULL, 0, 0, LONG_MAX, 0); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(autoextend_increment), @@ -11882,6 +11889,7 @@ MYSQL_SYSVAR(flush_neighbor_pages), MYSQL_SYSVAR(read_ahead), MYSQL_SYSVAR(adaptive_flushing_method), + MYSQL_SYSVAR(dict_size_limit), MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(use_native_aio), MYSQL_SYSVAR(change_buffering), --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ b/storage/innobase/ibuf/ibuf0ibuf.c @@ -575,6 +575,7 @@ /* Use old-style record format for the insert buffer. */ table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0); + table->n_mysql_handles_opened = 1; /* for pin */ dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0); --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -140,6 +140,13 @@ s- or x-latched, or an index page for which we know that block->buf_fix_count == 0 */ +/************************************************************************ +Drops a page hash index based on index */ +UNIV_INTERN +void +btr_search_drop_page_hash_index_on_index( +/*=====================================*/ + dict_index_t* index); /* in: record descriptor */ /********************************************************************//** Drops a possible page hash index when a page is evicted from the buffer pool or freed in a file segment. */ --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1595,6 +1595,15 @@ #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ +/** A chunk of buffers. The buffer pool is allocated in chunks. */ +struct buf_chunk_struct{ + ulint mem_size; /*!< allocated size of the chunk */ + ulint size; /*!< size of frames[] and blocks[] */ + void* mem; /*!< pointer to the memory area which + was allocated for the frames */ + buf_block_t* blocks; /*!< array of buffer control blocks */ +}; + /** @brief The buffer pool statistics structure. */ struct buf_pool_stat_struct{ ulint n_page_gets; /*!< number of page gets performed; --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1200,6 +1200,12 @@ /*====================================*/ dict_table_t* table, /*!< in: table */ const char* name); /*!< in: name of the index to find */ + +UNIV_INTERN +void +dict_table_LRU_trim( +/*================*/ + dict_table_t* self); /* Buffers for storing detailed information about the latest foreign key and unique key errors */ extern FILE* dict_foreign_err_file; --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -825,6 +825,13 @@ HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, dict_table_t*, table, ut_ad(table->cached), !strcmp(table->name, table_name)); + + /* make young in table_LRU */ + if (table) { + UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); + UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); + } + return(table); } @@ -918,6 +925,12 @@ table = dict_load_table_on_id(table_id); } + /* make young in table_LRU */ + if (table) { + UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); + UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); + } + ut_ad(!table || table->cached); /* TODO: should get the type information from MySQL */ --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -234,6 +234,7 @@ extern ulint srv_read_ahead; extern ulint srv_adaptive_flushing_method; +extern ulint srv_dict_size_limit; /*-------------------------------------------*/ extern ulint srv_n_rows_inserted; @@ -717,6 +718,7 @@ ulint innodb_data_writes; /*!< I/O write requests */ ulint innodb_data_written; /*!< Data bytes written */ ulint innodb_data_reads; /*!< I/O read requests */ + ulint innodb_dict_tables; ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ ulint innodb_buffer_pool_pages_data; /*!< Data pages */ ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */ --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -417,6 +417,8 @@ UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */ UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */ UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */ + +UNIV_INTERN ulint srv_dict_size_limit = 0; /*-------------------------------------------*/ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; @@ -2225,6 +2227,7 @@ export_vars.innodb_data_reads = os_n_file_reads; export_vars.innodb_data_writes = os_n_file_writes; export_vars.innodb_data_written = srv_data_written; + export_vars.innodb_dict_tables= (dict_sys ? UT_LIST_GET_LEN(dict_sys->table_LRU) : 0); export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets; export_vars.innodb_buffer_pool_write_requests = srv_buf_pool_write_requests; --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_dict_size_limit_basic.result @@ -0,0 +1,3 @@ +SELECT @@global.innodb_dict_size_limit; +@@global.innodb_dict_size_limit +0 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_dict_size_limit_basic.test @@ -0,0 +1 @@ +SELECT @@global.innodb_dict_size_limit;