# name : innodb_stats.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! --- a/storage/innobase/dict/dict0boot.c +++ b/storage/innobase/dict/dict0boot.c @@ -266,6 +266,29 @@ /* Get the dictionary header */ dict_hdr = dict_hdr_get(&mtr); + if (mach_read_from_8(dict_hdr + DICT_HDR_XTRADB_MARK) + != DICT_HDR_XTRADB_FLAG) { + /* not extended yet by XtraDB, need to be extended */ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, &mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, &mtr); + mlog_write_ull(dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, &mtr); + } + mtr_commit(&mtr); + /* restart mtr */ + mtr_start(&mtr); + dict_hdr = dict_hdr_get(&mtr); + } + /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover @@ -425,7 +448,7 @@ table->id = DICT_FIELDS_ID; dict_table_add_to_cache(table, heap); dict_sys->sys_fields = table; - mem_heap_free(heap); + mem_heap_empty(heap); index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", DICT_HDR_SPACE, @@ -442,6 +465,45 @@ FALSE); ut_a(error == DB_SUCCESS); + /*-------------------------*/ + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 4, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "NON_NULL_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif +#if DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2 +#error "DICT_SYS_STATS_NON_NULL_VALS_FIELD != 3 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_STATS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_free(heap); + mtr_commit(&mtr); /*-------------------------*/ @@ -455,6 +517,7 @@ dict_load_sys_table(dict_sys->sys_columns); dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); + dict_load_sys_table(dict_sys->sys_stats); mutex_exit(&(dict_sys->mutex)); } --- a/storage/innobase/dict/dict0crea.c +++ b/storage/innobase/dict/dict0crea.c @@ -508,6 +508,56 @@ } /*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_STATS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_stats_tuple( +/*========================*/ + const dict_index_t* index, + ulint i, + mem_heap_t* heap) +{ + dict_table_t* sys_stats; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + + ut_ad(index); + ut_ad(heap); + + sys_stats = dict_sys->sys_stats; + + entry = dtuple_create(heap, 4 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_stats); + + /* 0: INDEX_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, index->id); + dfield_set_data(dfield, ptr, 8); + /* 1: KEY_COLS -----------------------*/ + dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/); + ptr = mem_heap_alloc(heap, 4); + mach_write_to_4(ptr, i); + dfield_set_data(dfield, ptr, 4); + /* 4: DIFF_VALS ----------------------*/ + dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, 0); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); + /* 5: NON_NULL_VALS ------------------*/ + dfield = dtuple_get_nth_field(entry, 3/*NON_NULL_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, 0); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); + + return(entry); +} + +/*****************************************************************//** Creates the tuple with which the index entry is searched for writing the index tree root page number, if such a tree is created. @return the tuple for search */ @@ -617,6 +667,27 @@ } /***************************************************************//** +Builds a row for storing stats to insert. +@return DB_SUCCESS */ +static +ulint +dict_build_stats_def_step( +/*======================*/ + ind_node_t* node) +{ + dict_index_t* index; + dtuple_t* row; + + index = node->index; + + row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap); + + ins_node_set_new_row(node->stats_def, row); + + return(DB_SUCCESS); +} + +/***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static @@ -936,6 +1007,49 @@ dict_sys->sys_fields, heap); node->field_def->common.parent = node; + if (srv_use_sys_stats_table) { + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + } else { + node->stats_def = NULL; + } + + node->commit_node = commit_node_create(heap); + node->commit_node->common.parent = node; + + return(node); +} + +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap) +{ + ind_node_t* node; + + node = mem_heap_alloc(heap, sizeof(ind_node_t)); + + node->common.type = QUE_NODE_INSERT_STATS; + + node->index = index; + + node->state = INDEX_BUILD_STATS_COLS; + node->page_no = FIL_NULL; + node->heap = mem_heap_create(256); + + node->ind_def = NULL; + node->field_def = NULL; + + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + node->stats_no = 0; + node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; @@ -1086,6 +1200,7 @@ node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; + node->stats_no = 0; thr->run_node = node->ind_def; @@ -1131,7 +1246,31 @@ goto function_exit; } - node->state = INDEX_CREATE_INDEX_TREE; + if (srv_use_sys_stats_table + && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) { + node->state = INDEX_BUILD_STATS_COLS; + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } + } + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } } if (node->state == INDEX_CREATE_INDEX_TREE) { @@ -1177,6 +1316,66 @@ return(NULL); } + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/****************************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ind_node_t* node; + ulint err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + + trx = thr_get_trx(thr); + + node = thr->run_node; + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = INDEX_BUILD_STATS_COLS; + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_COMMIT_WORK; + } + } + + if (node->state == INDEX_COMMIT_WORK) { + + /* do not commit transaction here for now */ + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + } else { + return(NULL); + } + thr->run_node = que_node_get_parent(node); return(thr); --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -755,7 +755,7 @@ print an error message and return without doing anything. */ dict_update_statistics(table, TRUE /* only update stats - if they have not been initialized */); + if they have not been initialized */, FALSE); } return(table); @@ -4354,6 +4354,295 @@ } /*********************************************************************//** +functions to use SYS_STATS system table. */ +static +ibool +dict_reload_statistics( +/*===================*/ + dict_table_t* table, + ulint* sum_of_index_sizes) +{ + dict_index_t* index; + ulint size; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + if (index == NULL) { + /* Table definition is corrupt */ + + return(FALSE); + } + + heap = mem_heap_create(1000); + + while (index) { + size = btr_get_size(index, BTR_TOTAL_SIZE); + + index->stat_index_size = size; + + *sum_of_index_sizes += size; + + size = btr_get_size(index, BTR_N_LEAF_PAGES); + + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + const rec_t* rec; + ulint n_fields; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + ib_int64_t* stat_n_non_null_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + stat_n_non_null_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)) + != index->id) { + /* not found: even 1 if not found should not be alowed */ + fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)" + " not found in SYS_STATS\n", + index->table_name, index->name, i, n_cols); + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + /* don't count */ + i--; + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + ut_a(i == key_cols); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_diff_key_vals_tmp[i] = mach_read_from_8(field); + + if (n_fields > DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_NON_NULL_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_non_null_key_vals_tmp[i] = mach_read_from_8(field); + } else { + /* not enough fields: should be older */ + fprintf(stderr, "InnoDB: Notice: stats for %s/%s (%lu/%lu)" + " in SYS_STATS seems older format. " + "Please execute ANALYZE TABLE for it.\n", + index->table_name, index->name, i, n_cols); + + stat_n_non_null_key_vals_tmp[i] = ((ib_int64_t)(-1)); + } +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + for (i = 0; i <= n_cols; i++) { + index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + if (stat_n_non_null_key_vals_tmp[i] == ((ib_int64_t)(-1))) { + /* approximate value */ + index->stat_n_non_null_key_vals[i] = stat_n_diff_key_vals_tmp[n_cols]; + } else { + index->stat_n_non_null_key_vals[i] = stat_n_non_null_key_vals_tmp[i]; + } + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); + return(TRUE); +} + +static +void +dict_store_statistics( +/*==================*/ + dict_table_t* table) +{ + dict_index_t* index; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + ut_a(index); + + heap = mem_heap_create(1000); + + while (index) { +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + ulint rests; + const rec_t* rec; + ulint n_fields; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + ib_int64_t* stat_n_non_null_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + stat_n_non_null_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + for (i = 0; i <= n_cols; i++) { + stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + stat_n_non_null_key_vals_tmp[i] = index->stat_n_non_null_key_vals[i]; + } + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + rests = n_cols + 1; + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)) + != index->id) { + /* not found */ + + + break; + } + + btr_pcur_store_position(&pcur, &mtr); + + if (rec_get_deleted_flag(rec, 0)) { + /* don't count */ + i--; + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + if (n_fields <= DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + /* not update for the older smaller format */ + fprintf(stderr, "InnoDB: Notice: stats for %s/%s (%lu/%lu)" + " in SYS_STATS seems older format. Please ANALYZE TABLE it.\n", + index->table_name, index->name, i, n_cols); + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_ull((byte*)field, stat_n_diff_key_vals_tmp[key_cols], &mtr); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_NON_NULL_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_ull((byte*)field, stat_n_non_null_key_vals_tmp[key_cols], &mtr); + + rests--; + +next_rec: + mtr_commit(&mtr); + mtr_start(&mtr); + btr_pcur_restore_position(BTR_MODIFY_LEAF, &pcur, &mtr); + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (rests) { + fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries" + " of %s/%s to SYS_STATS system table.\n", + rests, index->table_name, index->name); + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); +} + +/*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN @@ -4361,10 +4650,11 @@ dict_update_statistics( /*===================*/ dict_table_t* table, /*!< in/out: table */ - ibool only_calc_if_missing_stats)/*!< in: only + ibool only_calc_if_missing_stats,/*!< in: only update/recalc the stats if they have not been initialized yet, otherwise do nothing */ + ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; ulint sum_of_index_sizes = 0; @@ -4381,6 +4671,27 @@ return; } + if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) { + dict_table_stats_lock(table, RW_X_LATCH); + + /* reload statistics from SYS_STATS table */ + if (dict_reload_statistics(table, &sum_of_index_sizes)) { + /* success */ +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: reload_statistics succeeded for %s.\n", + table->name); +#endif + goto end; + } + + dict_table_stats_unlock(table, RW_X_LATCH); + } +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", + table->name); +#endif + sum_of_index_sizes = 0; + /* Find out the sizes of the indexes and how many different values for the key they approximately have */ @@ -4445,6 +4756,11 @@ index = dict_table_get_next_index(index); } while (index); + if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) { + /* store statistics to SYS_STATS table */ + dict_store_statistics(table); + } +end: index = dict_table_get_first_index(table); table->stat_n_rows = index->stat_n_diff_key_vals[ @@ -4462,6 +4778,78 @@ dict_table_stats_unlock(table, RW_X_LATCH); } +/*********************************************************************//** +*/ +UNIV_INTERN +ibool +dict_is_older_statistics( +/*=====================*/ + dict_index_t* index) +{ + mem_heap_t* heap; + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + ulint n_fields; + ulint len; + byte* buf; + mtr_t mtr; + + heap = mem_heap_create(100); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + +next_rec: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)) + != index->id) { + /* not found */ + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + /* no statistics == not older statistics */ + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + goto next_rec; + } + + n_fields = rec_get_n_fields_old(rec); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + if (n_fields > DICT_SYS_STATS_NON_NULL_VALS_FIELD) { + return(FALSE); + } else { + return(TRUE); + } +} + /**********************************************************************//** Prints info of a foreign key constraint. */ static @@ -4539,7 +4927,8 @@ ut_ad(mutex_own(&(dict_sys->mutex))); - dict_update_statistics(table, FALSE /* update even if initialized */); + if (srv_stats_auto_update) + dict_update_statistics(table, FALSE /* update even if initialized */, FALSE); dict_table_stats_lock(table, RW_S_LATCH); --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -50,7 +50,8 @@ "SYS_COLUMNS", "SYS_FIELDS", "SYS_FOREIGN", - "SYS_FOREIGN_COLS" + "SYS_FOREIGN_COLS", + "SYS_STATS" }; /* If this flag is TRUE, then we will load the cluster index's (and tables') @@ -348,12 +349,13 @@ } if ((status & DICT_TABLE_UPDATE_STATS) + && srv_stats_auto_update && dict_table_get_first_index(*table)) { /* Update statistics if DICT_TABLE_UPDATE_STATS is set */ dict_update_statistics(*table, FALSE /* update even if - initialized */); + initialized */, FALSE); } return(NULL); @@ -587,6 +589,75 @@ //#endif /* FOREIGN_NOT_USED */ /********************************************************************//** +This function parses a SYS_STATS record and extract necessary +information from the record and return to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_stats_rec( +/*=============================*/ + mem_heap_t* heap __attribute__((unused)), /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_STATS rec */ + index_id_t* index_id, /*!< out: INDEX_ID */ + ulint* key_cols, /*!< out: KEY_COLS */ + ib_uint64_t* diff_vals, /*!< out: DIFF_VALS */ + ib_uint64_t* non_null_vals) /*!< out: NON_NULL_VALS */ +{ + ulint len; + const byte* field; + ulint n_fields; + + if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) { + return("delete-marked record in SYS_STATS"); + } + + n_fields = rec_get_n_fields_old(rec); + + if (UNIV_UNLIKELY(n_fields < 5)) { + return("wrong number of columns in SYS_STATS record"); + } + + field = rec_get_nth_field_old(rec, 0/*INDEX_ID*/, &len); + if (UNIV_UNLIKELY(len != 8)) { +err_len: + return("incorrect column length in SYS_STATS"); + } + *index_id = mach_read_from_8(field); + + field = rec_get_nth_field_old(rec, 1/*KEY_COLS*/, &len); + if (UNIV_UNLIKELY(len != 4)) { + goto err_len; + } + *key_cols = mach_read_from_4(field); + + rec_get_nth_field_offs_old(rec, 2/*DB_TRX_ID*/, &len); + if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + rec_get_nth_field_offs_old(rec, 3/*DB_ROLL_PTR*/, &len); + if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + field = rec_get_nth_field_old(rec, 4/*DIFF_VALS*/, &len); + if (UNIV_UNLIKELY(len != 8)) { + goto err_len; + } + *diff_vals = mach_read_from_8(field); + + if (n_fields < 6) { + *non_null_vals = ((ib_uint64_t)(-1)); + } else { + field = rec_get_nth_field_old(rec, 5/*NON_NULL_VALS*/, &len); + if (UNIV_UNLIKELY(len != 8)) { + goto err_len; + } + *non_null_vals = mach_read_from_8(field); + } + + return(NULL); +} +/********************************************************************//** Determine the flags of a table described in SYS_TABLES. @return compressed page size in kilobytes; or 0 if the tablespace is uncompressed, ULINT_UNDEFINED on error */ --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -188,6 +188,7 @@ static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; static my_bool innobase_large_prefix = FALSE; +static my_bool innobase_use_sys_stats_table = FALSE; static char* internal_innobase_data_file_path = NULL; @@ -2468,6 +2469,8 @@ goto error; } + srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -5256,6 +5259,10 @@ error = row_insert_for_mysql((byte*) record, prebuilt); +#ifdef EXTENDED_FOR_USERSTAT + if (error == DB_SUCCESS) rows_changed++; +#endif + /* Handle duplicate key errors */ if (auto_inc_used) { ulint err; @@ -5591,6 +5598,10 @@ } } +#ifdef EXTENDED_FOR_USERSTAT + if (error == DB_SUCCESS) rows_changed++; +#endif + innodb_srv_conc_exit_innodb(trx); error = convert_error_code_to_mysql(error, @@ -5644,6 +5655,10 @@ error = row_update_for_mysql((byte*) record, prebuilt); +#ifdef EXTENDED_FOR_USERSTAT + if (error == DB_SUCCESS) rows_changed++; +#endif + innodb_srv_conc_exit_innodb(trx); error = convert_error_code_to_mysql( @@ -5965,6 +5980,11 @@ case DB_SUCCESS: error = 0; table->status = 0; +#ifdef EXTENDED_FOR_USERSTAT + rows_read++; + if (active_index < MAX_KEY) + index_rows_read[active_index]++; +#endif break; case DB_RECORD_NOT_FOUND: error = HA_ERR_KEY_NOT_FOUND; @@ -6196,6 +6216,11 @@ case DB_SUCCESS: error = 0; table->status = 0; +#ifdef EXTENDED_FOR_USERSTAT + rows_read++; + if (active_index < MAX_KEY) + index_rows_read[active_index]++; +#endif break; case DB_RECORD_NOT_FOUND: error = HA_ERR_END_OF_FILE; @@ -8149,11 +8174,35 @@ /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ + if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) + && called_from_analyze) { + /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ + /* they need to be created. */ + dict_index_t* index; + + prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics"; + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + + for (index = dict_table_get_first_index(ib_table); + index != NULL; + index = dict_table_get_next_index(index)) { + if (dict_is_older_statistics(index)) { + row_delete_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } + row_insert_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + } + prebuilt->trx->op_info = "updating table statistics"; dict_update_statistics(ib_table, FALSE /* update even if stats - are initialized */); + are initialized */, called_from_analyze); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -8238,7 +8287,7 @@ are asked by MySQL to avoid locking. Another reason to avoid the call is that it uses quite a lot of CPU. See Bug#38185. */ - if (flag & HA_STATUS_NO_LOCK + if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock || !(flag & HA_STATUS_VARIABLE_EXTRA)) { /* We do not update delete_length if no locking is requested so the "old" value can @@ -11511,6 +11560,26 @@ "The number of index pages to sample when calculating statistics (default 8)", NULL, NULL, 8, 1, ~0ULL, 0); +static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update, + PLUGIN_VAR_RQCMDARG, + "Enable/Disable InnoDB's auto update statistics of indexes. " + "(except for ANALYZE TABLE command) 0:disable 1:enable", + NULL, NULL, 1, 0, 1, 0); + +static MYSQL_SYSVAR_ULONG(stats_update_need_lock, srv_stats_update_need_lock, + PLUGIN_VAR_RQCMDARG, + "Enable/Disable InnoDB's update statistics which needs to lock dictionary. " + "e.g. Data_free.", + NULL, NULL, 1, 0, 1, 0); + +static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable to use SYS_STATS system table to store statistics statically, " + "And avoids to calculate statistics at every first open of the tables. " + "This option may make the opportunities of update statistics less. " + "So you should use ANALYZE TABLE command intentionally.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -11883,6 +11952,9 @@ MYSQL_SYSVAR(recovery_update_relay_log), MYSQL_SYSVAR(rollback_on_timeout), MYSQL_SYSVAR(stats_on_metadata), + MYSQL_SYSVAR(stats_auto_update), + MYSQL_SYSVAR(stats_update_need_lock), + MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(stats_method), @@ -11957,7 +12029,10 @@ i_s_innodb_sys_columns, i_s_innodb_sys_fields, i_s_innodb_sys_foreign, -i_s_innodb_sys_foreign_cols +i_s_innodb_sys_foreign_cols, +i_s_innodb_sys_stats, +i_s_innodb_table_stats, +i_s_innodb_index_stats mysql_declare_plugin_end; /** @brief Initialize the default value of innodb_commit_concurrency. --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -49,6 +49,7 @@ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */ #include "trx0rseg.h" /* for trx_rseg_struct */ #include "trx0sys.h" /* for trx_sys */ +#include "dict0dict.h" /* for dict_sys */ } #define OK(expr) \ @@ -3511,6 +3512,225 @@ STRUCT_FLD(flags, 0UL) }; +/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_stats */ +static ST_FIELD_INFO innodb_sys_stats_fields_info[] = +{ +#define SYS_STATS_INDEX_ID 0 + {STRUCT_FLD(field_name, "INDEX_ID"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define SYS_STATS_KEY_COLS 1 + {STRUCT_FLD(field_name, "KEY_COLS"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define SYS_STATS_DIFF_VALS 2 + {STRUCT_FLD(field_name, "DIFF_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define SYS_STATS_NON_NULL_VALS 3 + {STRUCT_FLD(field_name, "NON_NULL_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; +/**********************************************************************//** +Function to fill information_schema.innodb_sys_stats +@return 0 on success */ +static +int +i_s_dict_fill_sys_stats( +/*====================*/ + THD* thd, /*!< in: thread */ + index_id_t index_id, /*!< in: INDEX_ID */ + ulint key_cols, /*!< in: KEY_COLS */ + ib_uint64_t diff_vals, /*!< in: DIFF_VALS */ + ib_uint64_t non_null_vals, /*!< in: NON_NULL_VALS */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + Field** fields; + + DBUG_ENTER("i_s_dict_fill_sys_stats"); + + fields = table_to_fill->field; + + OK(fields[SYS_STATS_INDEX_ID]->store(longlong(index_id), TRUE)); + + OK(fields[SYS_STATS_KEY_COLS]->store(key_cols)); + + OK(fields[SYS_STATS_DIFF_VALS]->store(longlong(diff_vals), TRUE)); + + if (non_null_vals == ((ib_uint64_t)(-1))) { + fields[SYS_STATS_NON_NULL_VALS]->set_null(); + } else { + OK(fields[SYS_STATS_NON_NULL_VALS]->store(longlong(non_null_vals), TRUE)); + fields[SYS_STATS_NON_NULL_VALS]->set_notnull(); + } + + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.innodb_sys_stats table. +@return 0 on success */ +static +int +i_s_sys_stats_fill_table( +/*=====================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + COND* cond) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + DBUG_ENTER("i_s_sys_stats_fill_table"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_STATS); + + while (rec) { + const char* err_msg; + index_id_t index_id; + ulint key_cols; + ib_uint64_t diff_vals; + ib_uint64_t non_null_vals; + + /* Extract necessary information from a SYS_FOREIGN_COLS row */ + err_msg = dict_process_sys_stats_rec( + heap, rec, &index_id, &key_cols, &diff_vals, &non_null_vals); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (!err_msg) { + i_s_dict_fill_sys_stats( + thd, index_id, key_cols, diff_vals, non_null_vals, + tables->table); + } else { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, + err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_stats +@return 0 on success */ +static +int +innodb_sys_stats_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("innodb_sys_stats_init"); + + schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = innodb_sys_stats_fields_info; + schema->fill_table = i_s_sys_stats_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_SYS_STATS"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, "Percona"), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "XtraDB SYS_STATS table"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, innodb_sys_stats_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* reserved for dependency checking */ + /* void* */ + STRUCT_FLD(__reserved1, NULL), + + /* flags for plugin */ + /* unsigned long */ + STRUCT_FLD(flags, 0UL) +}; + /*********************************************************************** */ static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] = @@ -3677,3 +3897,349 @@ /* unsigned long */ STRUCT_FLD(flags, 0UL), }; + +/*********************************************************************** +*/ +static ST_FIELD_INFO i_s_innodb_table_stats_info[] = +{ + {STRUCT_FLD(field_name, "table_schema"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "table_name"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "rows"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "clust_size"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "other_size"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "modified"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +static ST_FIELD_INFO i_s_innodb_index_stats_info[] = +{ + {STRUCT_FLD(field_name, "table_schema"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "table_name"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "index_name"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "fields"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "rows_per_key"), + STRUCT_FLD(field_length, 256), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "index_total_pages"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "index_leaf_pages"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +static +int +i_s_innodb_table_stats_fill( +/*========================*/ + THD* thd, + TABLE_LIST* tables, + COND* cond) +{ + TABLE* i_s_table = (TABLE *) tables->table; + int status = 0; + dict_table_t* table; + + DBUG_ENTER("i_s_innodb_table_stats_fill"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + mutex_enter(&(dict_sys->mutex)); + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + char buf[NAME_LEN * 2 + 2]; + char* ptr; + + if (table->stat_clustered_index_size == 0) { + table = UT_LIST_GET_NEXT(table_LRU, table); + continue; + } + + buf[NAME_LEN * 2 + 1] = 0; + strncpy(buf, table->name, NAME_LEN * 2 + 1); + ptr = strchr(buf, '/'); + if (ptr) { + *ptr = '\0'; + ++ptr; + } else { + ptr = buf; + } + + field_store_string(i_s_table->field[0], buf); + field_store_string(i_s_table->field[1], ptr); + i_s_table->field[2]->store(table->stat_n_rows); + i_s_table->field[3]->store(table->stat_clustered_index_size); + i_s_table->field[4]->store(table->stat_sum_of_other_index_sizes); + i_s_table->field[5]->store(table->stat_modified_counter); + + if (schema_table_store_record(thd, i_s_table)) { + status = 1; + break; + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + mutex_exit(&(dict_sys->mutex)); + + DBUG_RETURN(status); +} + +static +int +i_s_innodb_index_stats_fill( +/*========================*/ + THD* thd, + TABLE_LIST* tables, + COND* cond) +{ + TABLE* i_s_table = (TABLE *) tables->table; + int status = 0; + dict_table_t* table; + dict_index_t* index; + + DBUG_ENTER("i_s_innodb_index_stats_fill"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + mutex_enter(&(dict_sys->mutex)); + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + if (table->stat_clustered_index_size == 0) { + table = UT_LIST_GET_NEXT(table_LRU, table); + continue; + } + + ib_int64_t n_rows = table->stat_n_rows; + + if (n_rows < 0) { + n_rows = 0; + } + + index = dict_table_get_first_index(table); + + while (index) { + char buff[256+1]; + char row_per_keys[256+1]; + char buf[NAME_LEN * 2 + 2]; + char* ptr; + ulint i; + + buf[NAME_LEN * 2 + 1] = 0; + strncpy(buf, table->name, NAME_LEN * 2 + 1); + ptr = strchr(buf, '/'); + if (ptr) { + *ptr = '\0'; + ++ptr; + } else { + ptr = buf; + } + + field_store_string(i_s_table->field[0], buf); + field_store_string(i_s_table->field[1], ptr); + field_store_string(i_s_table->field[2], index->name); + i_s_table->field[3]->store(index->n_uniq); + + row_per_keys[0] = '\0'; + + /* It is remained optimistic operation still for now */ + //dict_index_stat_mutex_enter(index); + if (index->stat_n_diff_key_vals) { + for (i = 1; i <= index->n_uniq; i++) { + ib_int64_t rec_per_key; + if (index->stat_n_diff_key_vals[i]) { + rec_per_key = n_rows / index->stat_n_diff_key_vals[i]; + } else { + rec_per_key = n_rows; + } + ut_snprintf(buff, 256, (i == index->n_uniq)?"%llu":"%llu, ", + rec_per_key); + strncat(row_per_keys, buff, 256 - strlen(row_per_keys)); + } + } + //dict_index_stat_mutex_exit(index); + + field_store_string(i_s_table->field[4], row_per_keys); + + i_s_table->field[5]->store(index->stat_index_size); + i_s_table->field[6]->store(index->stat_n_leaf_pages); + + if (schema_table_store_record(thd, i_s_table)) { + status = 1; + break; + } + + index = dict_table_get_next_index(index); + } + + if (status == 1) { + break; + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + mutex_exit(&(dict_sys->mutex)); + + DBUG_RETURN(status); +} + +static +int +i_s_innodb_table_stats_init( +/*========================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_table_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_table_stats_info; + schema->fill_table = i_s_innodb_table_stats_fill; + + DBUG_RETURN(0); +} + +static +int +i_s_innodb_index_stats_init( +/*========================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_index_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_index_stats_info; + schema->fill_table = i_s_innodb_index_stats_fill; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_table_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_TABLE_STATS"), + STRUCT_FLD(author, "Percona"), + STRUCT_FLD(descr, "InnoDB table statistics in memory"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_table_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL), + STRUCT_FLD(flags, 0UL) +}; + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_index_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_INDEX_STATS"), + STRUCT_FLD(author, "Percona"), + STRUCT_FLD(descr, "InnoDB index statistics in memory"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_index_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL), + STRUCT_FLD(flags, 0UL) +}; --- a/storage/innobase/handler/i_s.h +++ b/storage/innobase/handler/i_s.h @@ -43,5 +43,8 @@ extern struct st_mysql_plugin i_s_innodb_sys_foreign; extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols; extern struct st_mysql_plugin i_s_innodb_rseg; +extern struct st_mysql_plugin i_s_innodb_sys_stats; +extern struct st_mysql_plugin i_s_innodb_table_stats; +extern struct st_mysql_plugin i_s_innodb_index_stats; #endif /* i_s_h */ --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -104,6 +104,7 @@ #define DICT_COLUMNS_ID 2 #define DICT_INDEXES_ID 3 #define DICT_FIELDS_ID 4 +#define DICT_STATS_ID 6 /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID 5 @@ -131,10 +132,13 @@ #define DICT_HDR_INDEXES 44 /* Root of the index index tree */ #define DICT_HDR_FIELDS 48 /* Root of the index field index tree */ +#define DICT_HDR_STATS 52 /* Root of the stats tree */ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace segment into which the dictionary header is created */ + +#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */ /*-------------------------------------------------------------*/ /* The field numbers in the SYS_TABLES clustered index */ @@ -146,11 +150,16 @@ #define DICT_SYS_INDEXES_TYPE_FIELD 6 #define DICT_SYS_INDEXES_NAME_FIELD 4 +#define DICT_SYS_STATS_DIFF_VALS_FIELD 4 +#define DICT_SYS_STATS_NON_NULL_VALS_FIELD 5 + /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is updated */ #define DICT_HDR_ROW_ID_WRITE_MARGIN 256 +#define DICT_HDR_XTRADB_FLAG 0x5854524144425F31ULL /* "XTRADB_1" */ + #ifndef UNIV_NONINL #include "dict0boot.ic" #endif --- a/storage/innobase/include/dict0crea.h +++ b/storage/innobase/include/dict0crea.h @@ -53,6 +53,14 @@ dict_index_t* index, /*!< in: index to create, built as a memory data structure */ mem_heap_t* heap); /*!< in: heap where created */ +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap); /***********************************************************//** Creates a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -62,6 +70,13 @@ /*===================*/ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr); +/***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -170,6 +185,7 @@ ins_node_t* field_def; /* child node which does the inserts of the field definitions; the row to be inserted is built by the parent node */ + ins_node_t* stats_def; commit_node_t* commit_node; /* child node which performs a commit after a successful index creation */ @@ -180,6 +196,7 @@ dict_table_t* table; /*!< table which owns the index */ dtuple_t* ind_row;/* index definition row built */ ulint field_no;/* next field definition to insert */ + ulint stats_no; mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ }; @@ -189,6 +206,7 @@ #define INDEX_CREATE_INDEX_TREE 3 #define INDEX_COMMIT_WORK 4 #define INDEX_ADD_TO_CACHE 5 +#define INDEX_BUILD_STATS_COLS 6 #ifndef UNIV_NONINL #include "dict0crea.ic" --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1126,10 +1126,18 @@ dict_update_statistics( /*===================*/ dict_table_t* table, /*!< in/out: table */ - ibool only_calc_if_missing_stats);/*!< in: only + ibool only_calc_if_missing_stats, /*!< in: only update/recalc the stats if they have not been initialized yet, otherwise do nothing */ + ibool sync); +/*********************************************************************//** +*/ +UNIV_INTERN +ibool +dict_is_older_statistics( +/*=====================*/ + dict_index_t* index); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN @@ -1244,6 +1252,7 @@ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ dict_table_t* sys_fields; /*!< SYS_FIELDS table */ + dict_table_t* sys_stats; /*!< SYS_STATS table */ }; #endif /* !UNIV_HOTBACKUP */ --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -41,6 +41,7 @@ SYS_FIELDS, SYS_FOREIGN, SYS_FOREIGN_COLS, + SYS_STATS, /* This must be last item. Defines the number of system tables. */ SYS_NUM_SYSTEM_TABLES @@ -327,6 +328,20 @@ const char** ref_col_name, /*!< out: referenced column name in referenced table */ ulint* pos); /*!< out: column position */ +/********************************************************************//** +This function parses a SYS_STATS record and extract necessary +information from the record and return to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_stats_rec( +/*=============================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_STATS rec */ + index_id_t* index_id, /*!< out: INDEX_ID */ + ulint* key_cols, /*!< out: KEY_COLS */ + ib_uint64_t* diff_vals, /*!< out: DIFF_VALS */ + ib_uint64_t* non_null_vals); /*!< out: NON_NULL_VALS */ #ifndef UNIV_NONINL #include "dict0load.ic" #endif --- a/storage/innobase/include/que0que.h +++ b/storage/innobase/include/que0que.h @@ -492,6 +492,8 @@ #define QUE_NODE_CALL 31 #define QUE_NODE_EXIT 32 +#define QUE_NODE_INSERT_STATS 34 + /* Query thread states */ #define QUE_THR_RUNNING 1 #define QUE_THR_PROCEDURE_WAIT 2 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -387,6 +387,22 @@ then checked for not being too large. */ /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** +*/ +UNIV_INTERN +int +row_delete_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -216,6 +216,9 @@ extern ibool srv_innodb_status; extern unsigned long long srv_stats_sample_pages; +extern ulint srv_stats_auto_update; +extern ulint srv_stats_update_need_lock; +extern ibool srv_use_sys_stats_table; extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; --- a/storage/innobase/que/que0que.c +++ b/storage/innobase/que/que0que.c @@ -621,11 +621,21 @@ que_graph_free_recursive(cre_ind->ind_def); que_graph_free_recursive(cre_ind->field_def); + if (srv_use_sys_stats_table) + que_graph_free_recursive(cre_ind->stats_def); que_graph_free_recursive(cre_ind->commit_node); mem_heap_free(cre_ind->heap); break; + case QUE_NODE_INSERT_STATS: + cre_ind = node; + + que_graph_free_recursive(cre_ind->stats_def); + que_graph_free_recursive(cre_ind->commit_node); + + mem_heap_free(cre_ind->heap); + break; case QUE_NODE_PROC: que_graph_free_stat_list(((proc_node_t*)node)->stat_list); @@ -1138,6 +1148,8 @@ str = "CREATE TABLE"; } else if (type == QUE_NODE_CREATE_INDEX) { str = "CREATE INDEX"; + } else if (type == QUE_NODE_INSERT_STATS) { + str = "INSERT TO SYS_STATS"; } else if (type == QUE_NODE_FOR) { str = "FOR LOOP"; } else if (type == QUE_NODE_RETURN) { @@ -1255,6 +1267,8 @@ thr = dict_create_table_step(thr); } else if (type == QUE_NODE_CREATE_INDEX) { thr = dict_create_index_step(thr); + } else if (type == QUE_NODE_INSERT_STATS) { + thr = dict_insert_stats_step(thr); } else if (type == QUE_NODE_ROW_PRINTF) { thr = row_printf_step(thr); } else { --- a/storage/innobase/row/row0ins.c +++ b/storage/innobase/row/row0ins.c @@ -2018,6 +2018,8 @@ } #ifdef UNIV_DEBUG + if (!srv_use_sys_stats_table + || index != UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { page_t* page = btr_cur_get_page(&cursor); rec_t* first_rec = page_rec_get_next( --- a/storage/innobase/row/row0merge.c +++ b/storage/innobase/row/row0merge.c @@ -2019,6 +2019,8 @@ "UPDATE SYS_INDEXES SET NAME=CONCAT('" TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n" "COMMIT WORK;\n" + /* Drop the statistics of the index. */ + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" /* Drop the index definition and the B-tree. */ --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -922,6 +922,9 @@ table->stat_modified_counter = counter + 1; + if (!srv_stats_auto_update) + return; + /* Calculate new statistics if 1 / 16 of table has been modified since the last time a statistics batch was run, or if stat_modified_counter > 2 000 000 000 (to avoid wrap-around). @@ -932,7 +935,7 @@ || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { dict_update_statistics(table, FALSE /* update even if stats - are initialized */); + are initialized */, TRUE); } } @@ -2077,6 +2080,71 @@ } /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx) +{ + ind_node_t* node; + mem_heap_t* heap; + que_thr_t* thr; + ulint err; + + //ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "try to insert rows to SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + heap = mem_heap_create(512); + + node = ind_insert_stats_graph_create(index, heap); + + thr = pars_complete_graph_for_exec(node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); + que_run_threads(thr); + + err = trx->error_state; + + que_graph_free((que_t*) que_node_get_parent(thr)); + + trx->op_info = ""; + + return((int) err); +} + +/*********************************************************************//** +*/ +UNIV_INTERN +int +row_delete_stats_for_mysql( +/*=============================*/ + dict_index_t* index, + trx_t* trx) +{ + pars_info_t* info = pars_info_create(); + + trx->op_info = "delete rows from SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + pars_info_add_ull_literal(info, "indexid", index->id); + + return((int) que_eval_sql(info, + "PROCEDURE DELETE_STATISTICS_PROC () IS\n" + "BEGIN\n" + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" + "END;\n" + , TRUE, trx)); +} + +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -3001,7 +3069,7 @@ dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); dict_update_statistics(table, FALSE /* update even if stats are - initialized */); + initialized */, TRUE); trx_commit_for_mysql(trx); @@ -3312,6 +3380,8 @@ " IF (SQL % NOTFOUND) THEN\n" " found := 0;\n" " ELSE\n" + " DELETE FROM SYS_STATS\n" + " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_FIELDS\n" " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_INDEXES\n" --- a/storage/innobase/row/row0row.c +++ b/storage/innobase/row/row0row.c @@ -364,6 +364,14 @@ rec_len = rec_offs_n_fields(offsets); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + if (rec_len < dict_index_get_n_fields(index)) { + /* the new record should be extended */ + rec_len = dict_index_get_n_fields(index); + } + } + entry = dtuple_create(heap, rec_len); dtuple_set_n_fields_cmp(entry, @@ -375,6 +383,14 @@ for (i = 0; i < rec_len; i++) { dfield = dtuple_get_nth_field(entry, i); + + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && i >= rec_offs_n_fields(offsets)) { + dfield_set_null(dfield); + continue; + } + field = rec_get_nth_field(rec, offsets, i, &len); dfield_set_data(dfield, field, len); --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -439,6 +439,12 @@ 0); } + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && upd_field->field_no >= rec_offs_n_fields(offsets)) { + return(TRUE); + } + old_len = rec_offs_nth_size(offsets, upd_field->field_no); if (rec_offs_comp(offsets) @@ -879,6 +885,18 @@ for (i = 0; i < dtuple_get_n_fields(entry); i++) { + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes) + && i >= rec_offs_n_fields(offsets)) { + dfield = dtuple_get_nth_field(entry, i); + + upd_field = upd_get_nth_field(update, n_diff); + dfield_copy(&(upd_field->new_val), dfield); + upd_field_set_field_no(upd_field, i, index, trx); + n_diff++; + goto skip_compare; + } + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -400,6 +400,9 @@ /* When estimating number of different key values in an index, sample this many index pages */ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; +UNIV_INTERN ulint srv_stats_auto_update = 1; +UNIV_INTERN ulint srv_stats_update_need_lock = 1; +UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE; --- a/storage/innobase/trx/trx0rec.c +++ b/storage/innobase/trx/trx0rec.c @@ -669,14 +669,27 @@ /* Save to the undo log the old values of the columns to be updated. */ if (update) { + ulint extended = 0; + if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - ptr += mach_write_compressed(ptr, upd_get_n_fields(update)); + if (srv_use_sys_stats_table + && index == UT_LIST_GET_FIRST(dict_sys->sys_stats->indexes)) { + for (i = 0; i < upd_get_n_fields(update); i++) { + ulint pos = upd_get_nth_field(update, i)->field_no; + + if (pos >= rec_offs_n_fields(offsets)) { + extended++; + } + } + } + + ptr += mach_write_compressed(ptr, upd_get_n_fields(update) - extended); - for (i = 0; i < upd_get_n_fields(update); i++) { + for (i = 0; i < upd_get_n_fields(update) - extended; i++) { ulint pos = upd_get_nth_field(update, i)->field_no; --- /dev/null +++ b/mysql-test/r/percona_innodb_use_sys_stats_table.result @@ -0,0 +1,3 @@ +show variables like 'innodb_use_sys_stats%'; +Variable_name Value +innodb_use_sys_stats_table ON --- /dev/null +++ b/mysql-test/t/percona_innodb_use_sys_stats_table-master.opt @@ -0,0 +1 @@ +--innodb_use_sys_stats_table --- /dev/null +++ b/mysql-test/t/percona_innodb_use_sys_stats_table.test @@ -0,0 +1,2 @@ +--source include/have_innodb.inc +show variables like 'innodb_use_sys_stats%';