# name : innodb_stats.patch # introduced : 11 or before # maintainer : Yasufumi # #!!! notice !!! # Any small change to this file in the main branch # should be done or reviewed by the maintainer! diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c --- a/storage/innobase/btr/btr0cur.c 2010-12-03 15:49:59.165212710 +0900 +++ b/storage/innobase/btr/btr0cur.c 2010-12-03 17:19:24.834126874 +0900 @@ -1010,6 +1010,107 @@ } } +/**********************************************************************//** +Positions a cursor at a randomly chosen position within a B-tree +after the given path +@return TRUE if the position is at the first page, and cursor must point + the first record for used by the caller.*/ +UNIV_INTERN +ibool +btr_cur_open_at_rnd_pos_after_path( +/*====================*/ + dict_index_t* index, /*!< in: index */ + ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ + btr_path_t* first_rec_path, + btr_cur_t* cursor, /*!< in/out: B-tree cursor */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_cur_t* page_cursor; + btr_path_t* slot; + ibool is_first_rec = TRUE; + ulint page_no; + ulint space; + ulint zip_size; + ulint height; + rec_t* node_ptr; + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); + + if (latch_mode == BTR_MODIFY_TREE) { + mtr_x_lock(dict_index_get_lock(index), mtr); + } else { + mtr_s_lock(dict_index_get_lock(index), mtr); + } + + page_cursor = btr_cur_get_page_cur(cursor); + cursor->index = index; + + space = dict_index_get_space(index); + zip_size = dict_table_zip_size(index->table); + page_no = dict_index_get_page(index); + + height = ULINT_UNDEFINED; + slot = first_rec_path; + + for (;;) { + buf_block_t* block; + page_t* page; + + block = buf_page_get_gen(space, zip_size, page_no, + RW_NO_LATCH, NULL, BUF_GET, + __FILE__, __LINE__, mtr); + page = buf_block_get_frame(block); + ut_ad(index->id == btr_page_get_index_id(page)); + + if (height == ULINT_UNDEFINED) { + /* We are in the root node */ + + height = btr_page_get_level(page, mtr); + } + + if (height == 0) { + btr_cur_latch_leaves(page, space, zip_size, page_no, + latch_mode, cursor, mtr); + } + + if (is_first_rec && slot->nth_rec != ULINT_UNDEFINED) { + if (height == 0) { + /* must open the first rec */ + page_cur_open_on_nth_user_rec(block, page_cursor, slot->nth_rec); + } else { + is_first_rec = page_cur_open_on_rnd_user_rec_after_nth(block, + page_cursor, slot->nth_rec); + } + } else { + is_first_rec = FALSE; + page_cur_open_on_rnd_user_rec(block, page_cursor); + } + + if (height == 0) { + break; + } + + ut_ad(height > 0); + + height--; + slot++; + + node_ptr = page_cur_get_rec(page_cursor); + offsets = rec_get_offsets(node_ptr, cursor->index, offsets, + ULINT_UNDEFINED, &heap); + /* Go to the child node */ + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); + } + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + return (is_first_rec); +} + /*==================== B-TREE INSERT =========================*/ /*************************************************************//** @@ -3479,6 +3580,154 @@ } /*******************************************************************//** +Estimates the number of pages which have not null value of the key of n_cols. +@return estimated number of pages */ +UNIV_INTERN +ulint +btr_estimate_n_pages_not_null( +/*=========================*/ + dict_index_t* index, /*!< in: index */ + ulint n_cols, /*!< in: The cols should be not null */ + btr_path_t* path1) /*!< in: path1[BTR_PATH_ARRAY_N_SLOTS] */ +{ + dtuple_t* tuple1; + btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS]; + btr_cur_t cursor; + btr_path_t* slot1; + btr_path_t* slot2; + ibool diverged; + ibool diverged_lot; + ulint divergence_level; + ulint n_pages; + ulint i; + mtr_t mtr; + mem_heap_t* heap; + + heap = mem_heap_create(n_cols * sizeof(dfield_t) + + sizeof(dtuple_t)); + + /* make tuple1 (NULL,NULL,,,) from n_cols */ + tuple1 = dtuple_create(heap, n_cols); + dict_index_copy_types(tuple1, index, n_cols); + + for (i = 0; i < n_cols; i++) { + dfield_set_null(dtuple_get_nth_field(tuple1, i)); + } + + mtr_start(&mtr); + + cursor.path_arr = path1; + + btr_cur_search_to_nth_level(index, 0, tuple1, PAGE_CUR_G, + BTR_SEARCH_LEAF | BTR_ESTIMATE, + &cursor, 0, __FILE__, __LINE__, &mtr); + + mtr_commit(&mtr); + + + + mtr_start(&mtr); + + cursor.path_arr = path2; + + btr_cur_open_at_index_side(FALSE, index, + BTR_SEARCH_LEAF | BTR_ESTIMATE, + &cursor, &mtr); + + mtr_commit(&mtr); + + mem_heap_free(heap); + + /* We have the path information for the range in path1 and path2 */ + + n_pages = 1; + diverged = FALSE; /* This becomes true when the path is not + the same any more */ + diverged_lot = FALSE; /* This becomes true when the paths are + not the same or adjacent any more */ + divergence_level = 1000000; /* This is the level where paths diverged + a lot */ + for (i = 0; ; i++) { + ut_ad(i < BTR_PATH_ARRAY_N_SLOTS); + + slot1 = path1 + i; + slot2 = path2 + i; + + if ((slot1 + 1)->nth_rec == ULINT_UNDEFINED + || (slot2 + 1)->nth_rec == ULINT_UNDEFINED) { + + if (i > divergence_level + 1) { + /* In trees whose height is > 1 our algorithm + tends to underestimate: multiply the estimate + by 2: */ + + n_pages = n_pages * 2; + } + + /* Do not estimate the number of rows in the range + to over 1 / 2 of the estimated rows in the whole + table */ + + if (n_pages > index->stat_n_leaf_pages / 2) { + n_pages = index->stat_n_leaf_pages / 2; + + /* If there are just 0 or 1 rows in the table, + then we estimate all rows are in the range */ + + if (n_pages == 0) { + n_pages = index->stat_n_leaf_pages; + } + } + + return(n_pages); + } + + if (!diverged && slot1->nth_rec != slot2->nth_rec) { + + diverged = TRUE; + + if (slot1->nth_rec < slot2->nth_rec) { + n_pages = slot2->nth_rec - slot1->nth_rec; + + if (n_pages > 1) { + diverged_lot = TRUE; + divergence_level = i; + } + } else { + /* Maybe the tree has changed between + searches */ + + return(10); + } + + } else if (diverged && !diverged_lot) { + + if (slot1->nth_rec < slot1->n_recs + || slot2->nth_rec > 1) { + + diverged_lot = TRUE; + divergence_level = i; + + n_pages = 0; + + if (slot1->nth_rec < slot1->n_recs) { + n_pages += slot1->n_recs + - slot1->nth_rec; + } + + if (slot2->nth_rec > 1) { + n_pages += slot2->nth_rec - 1; + } + } + } else if (diverged_lot) { + + n_pages = (n_pages * (slot1->n_recs + slot2->n_recs)) + / 2; + } + } +} + +/*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). The estimates are stored in the array index->stat_n_diff_key_vals. */ @@ -3507,18 +3756,38 @@ ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; ulint* offsets_rec = offsets_rec_; ulint* offsets_next_rec= offsets_next_rec_; + ulint stats_method = srv_stats_method; + btr_path_t first_rec_path[BTR_PATH_ARRAY_N_SLOTS]; + ulint effective_pages; /* effective leaf pages */ rec_offs_init(offsets_rec_); rec_offs_init(offsets_next_rec_); n_cols = dict_index_get_n_unique(index); + if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { + /* estimate effective pages and path for the first effective record */ + /* TODO: make it work also for n_cols > 1. */ + effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path); + + if (!effective_pages) { + for (j = 0; j <= n_cols; j++) { + index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages; + } + return; + } else if (effective_pages > index->stat_n_leaf_pages) { + effective_pages = index->stat_n_leaf_pages; + } + } else { + effective_pages = index->stat_n_leaf_pages; + } + n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t)); /* It makes no sense to test more pages than are contained in the index, thus we lower the number if it is too high */ - if (srv_stats_sample_pages > index->stat_index_size) { - if (index->stat_index_size > 0) { - n_sample_pages = index->stat_index_size; + if (srv_stats_sample_pages > effective_pages) { + if (effective_pages > 0) { + n_sample_pages = effective_pages; } else { n_sample_pages = 1; } @@ -3530,9 +3799,15 @@ for (i = 0; i < n_sample_pages; i++) { rec_t* supremum; + ibool is_first_page = TRUE; mtr_start(&mtr); + if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { + is_first_page = btr_cur_open_at_rnd_pos_after_path(index, BTR_SEARCH_LEAF, + first_rec_path, &cursor, &mtr); + } else { btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); + } /* Count the number of different key values for each prefix of the key on this index page. If the prefix does not determine @@ -3543,7 +3818,13 @@ page = btr_cur_get_page(&cursor); supremum = page_get_supremum_rec(page); + if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) { + /* the cursor should be the first record of the page. */ + /* Counting should be started from here. */ + rec = btr_cur_get_rec(&cursor); + } else { rec = page_rec_get_next(page_get_infimum_rec(page)); + } if (rec != supremum) { not_empty_flag = 1; @@ -3552,7 +3833,8 @@ } while (rec != supremum) { - rec_t* next_rec = page_rec_get_next(rec); + rec_t* next_rec; + next_rec = page_rec_get_next(rec); if (next_rec == supremum) { break; } @@ -3566,7 +3848,10 @@ cmp_rec_rec_with_match(rec, next_rec, offsets_rec, offsets_next_rec, index, &matched_fields, - &matched_bytes); + &matched_bytes, + (stats_method==SRV_STATS_METHOD_NULLS_NOT_EQUAL) ? + SRV_STATS_METHOD_NULLS_NOT_EQUAL : + SRV_STATS_METHOD_NULLS_EQUAL); for (j = matched_fields + 1; j <= n_cols; j++) { /* We add one if this index record has @@ -3627,7 +3912,7 @@ for (j = 0; j <= n_cols; j++) { index->stat_n_diff_key_vals[j] = ((n_diff[j] - * (ib_int64_t)index->stat_n_leaf_pages + * (ib_int64_t)effective_pages + n_sample_pages - 1 + total_external_size + not_empty_flag) @@ -3642,7 +3927,7 @@ different key values, or even more. Let us try to approximate that: */ - add_on = index->stat_n_leaf_pages + add_on = effective_pages / (10 * (n_sample_pages + total_external_size)); @@ -3651,6 +3936,15 @@ } index->stat_n_diff_key_vals[j] += add_on; + + if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) { + /* index->stat_n_diff_key_vals[k] is used for calc rec_per_key, + as "stats.records / index->stat_n_diff_key_vals[x]". + So it should be adjusted to the value which is based on whole of the index. */ + index->stat_n_diff_key_vals[j] = + index->stat_n_diff_key_vals[j] * (ib_int64_t)index->stat_n_leaf_pages + / (ib_int64_t)effective_pages; + } } mem_free(n_diff); diff -ruN a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c --- a/storage/innobase/dict/dict0boot.c 2010-12-03 15:48:03.034036843 +0900 +++ b/storage/innobase/dict/dict0boot.c 2010-12-03 17:19:24.835112632 +0900 @@ -266,6 +266,29 @@ /* Get the dictionary header */ dict_hdr = dict_hdr_get(&mtr); + if (mach_read_from_8(dict_hdr + DICT_HDR_XTRADB_MARK) + != DICT_HDR_XTRADB_FLAG) { + /* not extended yet by XtraDB, need to be extended */ + ulint root_page_no; + + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_STATS_ID, + dict_ind_redundant, &mtr); + if (root_page_no == FIL_NULL) { + fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n"); + srv_use_sys_stats_table = FALSE; + } else { + mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no, + MLOG_4BYTES, &mtr); + mlog_write_ull(dict_hdr + DICT_HDR_XTRADB_MARK, + DICT_HDR_XTRADB_FLAG, &mtr); + } + mtr_commit(&mtr); + /* restart mtr */ + mtr_start(&mtr); + dict_hdr = dict_hdr_get(&mtr); + } + /* Because we only write new row ids to disk-based data structure (dictionary header) when it is divisible by DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover @@ -425,7 +448,7 @@ table->id = DICT_FIELDS_ID; dict_table_add_to_cache(table, heap); dict_sys->sys_fields = table; - mem_heap_free(heap); + mem_heap_empty(heap); index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", DICT_HDR_SPACE, @@ -442,6 +465,41 @@ FALSE); ut_a(error == DB_SUCCESS); + /*-------------------------*/ + table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0); + table->n_mysql_handles_opened = 1; /* for pin */ + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0); + + /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */ +#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2 +#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2" +#endif + + table->id = DICT_STATS_ID; + dict_table_add_to_cache(table, heap); + dict_sys->sys_stats = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_STATS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "KEY_COLS", 0); + + index->id = DICT_STATS_ID; + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_STATS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + mem_heap_free(heap); + mtr_commit(&mtr); /*-------------------------*/ @@ -455,6 +513,7 @@ dict_load_sys_table(dict_sys->sys_columns); dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); + dict_load_sys_table(dict_sys->sys_stats); mutex_exit(&(dict_sys->mutex)); } diff -ruN a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c --- a/storage/innobase/dict/dict0crea.c 2010-12-03 15:48:03.036081059 +0900 +++ b/storage/innobase/dict/dict0crea.c 2010-12-03 17:19:24.836964976 +0900 @@ -508,6 +508,51 @@ } /*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_STATS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_stats_tuple( +/*========================*/ + const dict_index_t* index, + ulint i, + mem_heap_t* heap) +{ + dict_table_t* sys_stats; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + + ut_ad(index); + ut_ad(heap); + + sys_stats = dict_sys->sys_stats; + + entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_stats); + + /* 0: INDEX_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, index->id); + dfield_set_data(dfield, ptr, 8); + /* 1: KEY_COLS -----------------------*/ + dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/); + ptr = mem_heap_alloc(heap, 4); + mach_write_to_4(ptr, i); + dfield_set_data(dfield, ptr, 4); + /* 4: DIFF_VALS ----------------------*/ + dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/); + ptr = mem_heap_alloc(heap, 8); + mach_write_to_8(ptr, 0); /* initial value is 0 */ + dfield_set_data(dfield, ptr, 8); + + return(entry); +} + +/*****************************************************************//** Creates the tuple with which the index entry is searched for writing the index tree root page number, if such a tree is created. @return the tuple for search */ @@ -617,6 +662,27 @@ } /***************************************************************//** +Builds a row for storing stats to insert. +@return DB_SUCCESS */ +static +ulint +dict_build_stats_def_step( +/*======================*/ + ind_node_t* node) +{ + dict_index_t* index; + dtuple_t* row; + + index = node->index; + + row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap); + + ins_node_set_new_row(node->stats_def, row); + + return(DB_SUCCESS); +} + +/***************************************************************//** Creates an index tree for the index if it is not a member of a cluster. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static @@ -937,6 +1003,49 @@ dict_sys->sys_fields, heap); node->field_def->common.parent = node; + if (srv_use_sys_stats_table) { + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + } else { + node->stats_def = NULL; + } + + node->commit_node = commit_node_create(heap); + node->commit_node->common.parent = node; + + return(node); +} + +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap) +{ + ind_node_t* node; + + node = mem_heap_alloc(heap, sizeof(ind_node_t)); + + node->common.type = QUE_NODE_INSERT_STATS; + + node->index = index; + + node->state = INDEX_BUILD_STATS_COLS; + node->page_no = FIL_NULL; + node->heap = mem_heap_create(256); + + node->ind_def = NULL; + node->field_def = NULL; + + node->stats_def = ins_node_create(INS_DIRECT, + dict_sys->sys_stats, heap); + node->stats_def->common.parent = node; + node->stats_no = 0; + node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; @@ -1087,6 +1196,7 @@ node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; + node->stats_no = 0; thr->run_node = node->ind_def; @@ -1132,7 +1242,31 @@ goto function_exit; } - node->state = INDEX_CREATE_INDEX_TREE; + if (srv_use_sys_stats_table + && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) { + node->state = INDEX_BUILD_STATS_COLS; + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } + } + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_CREATE_INDEX_TREE; + } } if (node->state == INDEX_CREATE_INDEX_TREE) { @@ -1178,6 +1312,66 @@ return(NULL); } + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/****************************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ind_node_t* node; + ulint err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + + trx = thr_get_trx(thr); + + node = thr->run_node; + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = INDEX_BUILD_STATS_COLS; + } + + if (node->state == INDEX_BUILD_STATS_COLS) { + if (node->stats_no <= dict_index_get_n_unique(node->index)) { + + err = dict_build_stats_def_step(node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->stats_no++; + + thr->run_node = node->stats_def; + + return(thr); + } else { + node->state = INDEX_COMMIT_WORK; + } + } + + if (node->state == INDEX_COMMIT_WORK) { + + /* do not commit transaction here for now */ + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + } else { + return(NULL); + } + thr->run_node = que_node_get_parent(node); return(thr); diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c --- a/storage/innobase/dict/dict0dict.c 2010-12-03 15:48:03.040222428 +0900 +++ b/storage/innobase/dict/dict0dict.c 2010-12-03 17:19:24.841947690 +0900 @@ -756,7 +756,7 @@ print an error message and return without doing anything. */ dict_update_statistics(table, TRUE /* only update stats - if they have not been initialized */); + if they have not been initialized */, FALSE); } return(table); @@ -4304,6 +4304,240 @@ } /*********************************************************************//** +functions to use SYS_STATS system table. */ +static +ibool +dict_reload_statistics( +/*===================*/ + dict_table_t* table, + ulint* sum_of_index_sizes) +{ + dict_index_t* index; + ulint size; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + if (index == NULL) { + /* Table definition is corrupt */ + + return(FALSE); + } + + heap = mem_heap_create(1000); + + while (index) { + size = btr_get_size(index, BTR_TOTAL_SIZE); + + index->stat_index_size = size; + + *sum_of_index_sizes += size; + + size = btr_get_size(index, BTR_N_LEAF_PAGES); + + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)) + != index->id) { + /* not found: even 1 if not found should not be alowed */ + fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)" + " not found in SYS_STATS\n", + index->table_name, index->name, i, n_cols); + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + return(FALSE); + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + ut_a(i == key_cols); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + stat_n_diff_key_vals_tmp[i] = mach_read_from_8(field); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + for (i = 0; i <= n_cols; i++) { + index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i]; + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); + return(TRUE); +} + +static +void +dict_store_statistics( +/*==================*/ + dict_table_t* table) +{ + dict_index_t* index; + mem_heap_t* heap; + + index = dict_table_get_first_index(table); + + ut_a(index); + + heap = mem_heap_create(1000); + + while (index) { +/*===========================================*/ +{ + dict_table_t* sys_stats; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + ulint key_cols; + ulint n_cols; + ulint rests; + const rec_t* rec; + const byte* field; + ulint len; + ib_int64_t* stat_n_diff_key_vals_tmp; + byte* buf; + ulint i; + mtr_t mtr; + + n_cols = dict_index_get_n_unique(index); + stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t)); + + for (i = 0; i <= n_cols; i++) { + stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i]; + } + + sys_stats = dict_sys->sys_stats; + sys_index = UT_LIST_GET_FIRST(sys_stats->indexes); + ut_a(!dict_table_is_comp(sys_stats)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = mem_heap_alloc(heap, 8); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + mtr_start(&mtr); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_MODIFY_LEAF, &pcur, &mtr); + rests = n_cols + 1; + for (i = 0; i <= n_cols; i++) { + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len)) + != index->id) { + /* not found */ + + + break; + } + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + field = rec_get_nth_field_old(rec, 1, &len); + ut_a(len == 4); + + key_cols = mach_read_from_4(field); + + field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len); + ut_a(len == 8); + + mlog_write_ull((byte*)field, stat_n_diff_key_vals_tmp[key_cols], &mtr); + + rests--; + +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (rests) { + fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries" + " of %s/%s to SYS_STATS system table.\n", + rests, index->table_name, index->name); + } +} +/*===========================================*/ + + index = dict_table_get_next_index(index); + } + + mem_heap_free(heap); +} + +/*********************************************************************//** Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ UNIV_INTERN @@ -4311,10 +4545,11 @@ dict_update_statistics( /*===================*/ dict_table_t* table, /*!< in/out: table */ - ibool only_calc_if_missing_stats)/*!< in: only + ibool only_calc_if_missing_stats,/*!< in: only update/recalc the stats if they have not been initialized yet, otherwise do nothing */ + ibool sync) /*!< in: TRUE if must update SYS_STATS */ { dict_index_t* index; ulint sum_of_index_sizes = 0; @@ -4331,6 +4566,27 @@ return; } + if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) { + dict_table_stats_lock(table, RW_X_LATCH); + + /* reload statistics from SYS_STATS table */ + if (dict_reload_statistics(table, &sum_of_index_sizes)) { + /* success */ +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: reload_statistics succeeded for %s.\n", + table->name); +#endif + goto end; + } + + dict_table_stats_unlock(table, RW_X_LATCH); + } +#ifdef UNIV_DEBUG + fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n", + table->name); +#endif + sum_of_index_sizes = 0; + /* Find out the sizes of the indexes and how many different values for the key they approximately have */ @@ -4391,6 +4647,11 @@ index = dict_table_get_next_index(index); } while (index); + if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) { + /* store statistics to SYS_STATS table */ + dict_store_statistics(table); + } +end: index = dict_table_get_first_index(table); table->stat_n_rows = index->stat_n_diff_key_vals[ @@ -4485,7 +4746,8 @@ ut_ad(mutex_own(&(dict_sys->mutex))); - dict_update_statistics(table, FALSE /* update even if initialized */); + if (srv_stats_auto_update) + dict_update_statistics(table, FALSE /* update even if initialized */, FALSE); dict_table_stats_lock(table, RW_S_LATCH); diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c --- a/storage/innobase/dict/dict0load.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/dict/dict0load.c 2010-12-03 17:19:24.845947460 +0900 @@ -50,7 +50,8 @@ "SYS_COLUMNS", "SYS_FIELDS", "SYS_FOREIGN", - "SYS_FOREIGN_COLS" + "SYS_FOREIGN_COLS", + "SYS_STATS" }; /****************************************************************//** Compare the name of an index column. @@ -343,12 +344,13 @@ } if ((status & DICT_TABLE_UPDATE_STATS) + && srv_stats_auto_update && dict_table_get_first_index(*table)) { /* Update statistics if DICT_TABLE_UPDATE_STATS is set */ dict_update_statistics(*table, FALSE /* update even if - initialized */); + initialized */, FALSE); } return(NULL); @@ -582,6 +584,61 @@ //#endif /* FOREIGN_NOT_USED */ /********************************************************************//** +This function parses a SYS_STATS record and extract necessary +information from the record and return to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_stats_rec( +/*=============================*/ + mem_heap_t* heap __attribute__((unused)), /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_STATS rec */ + index_id_t* index_id, /*!< out: INDEX_ID */ + ulint* key_cols, /*!< out: KEY_COLS */ + ib_uint64_t* diff_vals) /*!< out: DIFF_VALS */ +{ + ulint len; + const byte* field; + + if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) { + return("delete-marked record in SYS_STATS"); + } + + if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) != 5)) { + return("wrong number of columns in SYS_STATS record"); + } + + field = rec_get_nth_field_old(rec, 0/*INDEX_ID*/, &len); + if (UNIV_UNLIKELY(len != 8)) { +err_len: + return("incorrect column length in SYS_STATS"); + } + *index_id = mach_read_from_8(field); + + field = rec_get_nth_field_old(rec, 1/*KEY_COLS*/, &len); + if (UNIV_UNLIKELY(len != 4)) { + goto err_len; + } + *key_cols = mach_read_from_4(field); + + rec_get_nth_field_offs_old(rec, 2/*DB_TRX_ID*/, &len); + if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + rec_get_nth_field_offs_old(rec, 3/*DB_ROLL_PTR*/, &len); + if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + field = rec_get_nth_field_old(rec, 4/*DIFF_VALS*/, &len); + if (UNIV_UNLIKELY(len != 8)) { + goto err_len; + } + *diff_vals = mach_read_from_8(field); + + return(NULL); +} +/********************************************************************//** Determine the flags of a table described in SYS_TABLES. @return compressed page size in kilobytes; or 0 if the tablespace is uncompressed, ULINT_UNDEFINED on error */ diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc --- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:17:03.665960357 +0900 +++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:22:21.586939783 +0900 @@ -187,6 +187,7 @@ static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; +static my_bool innobase_use_sys_stats_table = FALSE; static char* internal_innobase_data_file_path = NULL; @@ -2388,6 +2389,8 @@ goto error; } + srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table; + /* -------------- Log files ---------------------------*/ /* The default dir for log files is the datadir of MySQL */ @@ -5192,6 +5195,10 @@ error = row_insert_for_mysql((byte*) record, prebuilt); +#ifdef EXTENDED_FOR_USERSTAT + if (error == DB_SUCCESS) rows_changed++; +#endif + /* Handle duplicate key errors */ if (auto_inc_used) { ulint err; @@ -5528,6 +5535,10 @@ } } +#ifdef EXTENDED_FOR_USERSTAT + if (error == DB_SUCCESS) rows_changed++; +#endif + innodb_srv_conc_exit_innodb(trx); error = convert_error_code_to_mysql(error, @@ -5581,6 +5592,10 @@ error = row_update_for_mysql((byte*) record, prebuilt); +#ifdef EXTENDED_FOR_USERSTAT + if (error == DB_SUCCESS) rows_changed++; +#endif + innodb_srv_conc_exit_innodb(trx); error = convert_error_code_to_mysql( @@ -5899,6 +5914,11 @@ case DB_SUCCESS: error = 0; table->status = 0; +#ifdef EXTENDED_FOR_USERSTAT + rows_read++; + if (active_index >= 0 && active_index < MAX_KEY) + index_rows_read[active_index]++; +#endif break; case DB_RECORD_NOT_FOUND: error = HA_ERR_KEY_NOT_FOUND; @@ -6108,6 +6128,11 @@ case DB_SUCCESS: error = 0; table->status = 0; +#ifdef EXTENDED_FOR_USERSTAT + rows_read++; + if (active_index >= 0 && active_index < MAX_KEY) + index_rows_read[active_index]++; +#endif break; case DB_RECORD_NOT_FOUND: error = HA_ERR_END_OF_FILE; @@ -7999,11 +8024,31 @@ /* In sql_show we call with this flag: update then statistics so that they are up-to-date */ + if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) + && called_from_analyze) { + /* If the indexes on the table don't have enough rows in SYS_STATS system table, */ + /* they need to be created. */ + dict_index_t* index; + + prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics"; + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + + for (index = dict_table_get_first_index(ib_table); + index != NULL; + index = dict_table_get_next_index(index)) { + row_insert_stats_for_mysql(index, prebuilt->trx); + innobase_commit_low(prebuilt->trx); + } + + ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED); + } + prebuilt->trx->op_info = "updating table statistics"; dict_update_statistics(ib_table, FALSE /* update even if stats - are initialized */); + are initialized */, called_from_analyze); prebuilt->trx->op_info = "returning various info to MySQL"; } @@ -8081,7 +8126,7 @@ are asked by MySQL to avoid locking. Another reason to avoid the call is that it uses quite a lot of CPU. See Bug#38185. */ - if (flag & HA_STATUS_NO_LOCK + if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock || !(flag & HA_STATUS_VARIABLE_EXTRA)) { /* We do not update delete_length if no locking is requested so the "old" value can @@ -11281,6 +11326,45 @@ "The number of index pages to sample when calculating statistics (default 8)", NULL, NULL, 8, 1, ~0ULL, 0); +const char *innobase_stats_method_names[]= +{ + "nulls_equal", + "nulls_unequal", + "nulls_ignored", + NullS +}; +TYPELIB innobase_stats_method_typelib= +{ + array_elements(innobase_stats_method_names) - 1, "innobase_stats_method_typelib", + innobase_stats_method_names, NULL +}; +static MYSQL_SYSVAR_ENUM(stats_method, srv_stats_method, + PLUGIN_VAR_RQCMDARG, + "Specifies how InnoDB index statistics collection code should threat NULLs. " + "Possible values of name are same to for 'myisam_stats_method'. " + "This is startup parameter.", + NULL, NULL, 0, &innobase_stats_method_typelib); + +static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update, + PLUGIN_VAR_RQCMDARG, + "Enable/Disable InnoDB's auto update statistics of indexes. " + "(except for ANALYZE TABLE command) 0:disable 1:enable", + NULL, NULL, 1, 0, 1, 0); + +static MYSQL_SYSVAR_ULONG(stats_update_need_lock, srv_stats_update_need_lock, + PLUGIN_VAR_RQCMDARG, + "Enable/Disable InnoDB's update statistics which needs to lock dictionary. " + "e.g. Data_free.", + NULL, NULL, 1, 0, 1, 0); + +static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Enable to use SYS_STATS system table to store statistics statically, " + "And avoids to calculate statistics at every first open of the tables. " + "This option may make the opportunities of update statistics less. " + "So you should use ANALYZE TABLE command intentionally.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, PLUGIN_VAR_OPCMDARG, "Enable InnoDB adaptive hash index (enabled by default). " @@ -11604,6 +11688,10 @@ MYSQL_SYSVAR(overwrite_relay_log_info), MYSQL_SYSVAR(rollback_on_timeout), MYSQL_SYSVAR(stats_on_metadata), + MYSQL_SYSVAR(stats_method), + MYSQL_SYSVAR(stats_auto_update), + MYSQL_SYSVAR(stats_update_need_lock), + MYSQL_SYSVAR(use_sys_stats_table), MYSQL_SYSVAR(stats_sample_pages), MYSQL_SYSVAR(adaptive_hash_index), MYSQL_SYSVAR(replication_delay), @@ -11672,7 +11760,10 @@ i_s_innodb_sys_columns, i_s_innodb_sys_fields, i_s_innodb_sys_foreign, -i_s_innodb_sys_foreign_cols +i_s_innodb_sys_foreign_cols, +i_s_innodb_sys_stats, +i_s_innodb_table_stats, +i_s_innodb_index_stats mysql_declare_plugin_end; /** @brief Initialize the default value of innodb_commit_concurrency. diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc --- a/storage/innobase/handler/i_s.cc 2010-12-03 17:17:03.666956117 +0900 +++ b/storage/innobase/handler/i_s.cc 2010-12-03 17:19:24.880964526 +0900 @@ -49,6 +49,7 @@ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */ #include "trx0rseg.h" /* for trx_rseg_struct */ #include "trx0sys.h" /* for trx_sys */ +#include "dict0dict.h" /* for dict_sys */ } static const char plugin_author[] = "Innobase Oy"; @@ -3457,6 +3458,203 @@ STRUCT_FLD(__reserved1, NULL) }; +/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_stats */ +static ST_FIELD_INFO innodb_sys_stats_fields_info[] = +{ +#define SYS_STATS_INDEX_ID 0 + {STRUCT_FLD(field_name, "INDEX_ID"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define SYS_STATS_KEY_COLS 1 + {STRUCT_FLD(field_name, "KEY_COLS"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define SYS_STATS_DIFF_VALS 2 + {STRUCT_FLD(field_name, "DIFF_VALS"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; +/**********************************************************************//** +Function to fill information_schema.innodb_sys_stats +@return 0 on success */ +static +int +i_s_dict_fill_sys_stats( +/*====================*/ + THD* thd, /*!< in: thread */ + index_id_t index_id, /*!< in: INDEX_ID */ + ulint key_cols, /*!< in: KEY_COLS */ + ib_uint64_t diff_vals, /*!< in: DIFF_VALS */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + Field** fields; + + DBUG_ENTER("i_s_dict_fill_sys_stats"); + + fields = table_to_fill->field; + + OK(fields[SYS_STATS_INDEX_ID]->store(longlong(index_id), TRUE)); + + OK(fields[SYS_STATS_KEY_COLS]->store(key_cols)); + + OK(fields[SYS_STATS_DIFF_VALS]->store(longlong(diff_vals), TRUE)); + + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.innodb_sys_stats table. +@return 0 on success */ +static +int +i_s_sys_stats_fill_table( +/*=====================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + COND* cond) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + DBUG_ENTER("i_s_sys_stats_fill_table"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_STATS); + + while (rec) { + const char* err_msg; + index_id_t index_id; + ulint key_cols; + ib_uint64_t diff_vals; + + /* Extract necessary information from a SYS_FOREIGN_COLS row */ + err_msg = dict_process_sys_stats_rec( + heap, rec, &index_id, &key_cols, &diff_vals); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (!err_msg) { + i_s_dict_fill_sys_stats( + thd, index_id, key_cols, diff_vals, + tables->table); + } else { + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, + err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_stats +@return 0 on success */ +static +int +innodb_sys_stats_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("innodb_sys_stats_init"); + + schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = innodb_sys_stats_fields_info; + schema->fill_table = i_s_sys_stats_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_SYS_STATS"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, plugin_author), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "XtraDB SYS_STATS table"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, innodb_sys_stats_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* reserved for dependency checking */ + /* void* */ + STRUCT_FLD(__reserved1, NULL) +}; + /*********************************************************************** */ static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] = @@ -3619,3 +3817,347 @@ /* void* */ STRUCT_FLD(__reserved1, NULL) }; + +/*********************************************************************** +*/ +static ST_FIELD_INFO i_s_innodb_table_stats_info[] = +{ + {STRUCT_FLD(field_name, "table_schema"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "table_name"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "rows"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "clust_size"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "other_size"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "modified"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +static ST_FIELD_INFO i_s_innodb_index_stats_info[] = +{ + {STRUCT_FLD(field_name, "table_schema"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "table_name"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "index_name"), + STRUCT_FLD(field_length, NAME_LEN), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "fields"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "rows_per_key"), + STRUCT_FLD(field_length, 256), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "index_total_pages"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + {STRUCT_FLD(field_name, "index_leaf_pages"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +static +int +i_s_innodb_table_stats_fill( +/*========================*/ + THD* thd, + TABLE_LIST* tables, + COND* cond) +{ + TABLE* i_s_table = (TABLE *) tables->table; + int status = 0; + dict_table_t* table; + + DBUG_ENTER("i_s_innodb_table_stats_fill"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + mutex_enter(&(dict_sys->mutex)); + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + char buf[NAME_LEN * 2 + 2]; + char* ptr; + + if (table->stat_clustered_index_size == 0) { + table = UT_LIST_GET_NEXT(table_LRU, table); + continue; + } + + buf[NAME_LEN * 2 + 1] = 0; + strncpy(buf, table->name, NAME_LEN * 2 + 1); + ptr = strchr(buf, '/'); + if (ptr) { + *ptr = '\0'; + ++ptr; + } else { + ptr = buf; + } + + field_store_string(i_s_table->field[0], buf); + field_store_string(i_s_table->field[1], ptr); + i_s_table->field[2]->store(table->stat_n_rows); + i_s_table->field[3]->store(table->stat_clustered_index_size); + i_s_table->field[4]->store(table->stat_sum_of_other_index_sizes); + i_s_table->field[5]->store(table->stat_modified_counter); + + if (schema_table_store_record(thd, i_s_table)) { + status = 1; + break; + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + mutex_exit(&(dict_sys->mutex)); + + DBUG_RETURN(status); +} + +static +int +i_s_innodb_index_stats_fill( +/*========================*/ + THD* thd, + TABLE_LIST* tables, + COND* cond) +{ + TABLE* i_s_table = (TABLE *) tables->table; + int status = 0; + dict_table_t* table; + dict_index_t* index; + + DBUG_ENTER("i_s_innodb_index_stats_fill"); + + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(0); + } + + mutex_enter(&(dict_sys->mutex)); + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + if (table->stat_clustered_index_size == 0) { + table = UT_LIST_GET_NEXT(table_LRU, table); + continue; + } + + ib_int64_t n_rows = table->stat_n_rows; + + if (n_rows < 0) { + n_rows = 0; + } + + index = dict_table_get_first_index(table); + + while (index) { + char buff[256+1]; + char row_per_keys[256+1]; + char buf[NAME_LEN * 2 + 2]; + char* ptr; + ulint i; + + buf[NAME_LEN * 2 + 1] = 0; + strncpy(buf, table->name, NAME_LEN * 2 + 1); + ptr = strchr(buf, '/'); + if (ptr) { + *ptr = '\0'; + ++ptr; + } else { + ptr = buf; + } + + field_store_string(i_s_table->field[0], buf); + field_store_string(i_s_table->field[1], ptr); + field_store_string(i_s_table->field[2], index->name); + i_s_table->field[3]->store(index->n_uniq); + + row_per_keys[0] = '\0'; + + /* It is remained optimistic operation still for now */ + //dict_index_stat_mutex_enter(index); + if (index->stat_n_diff_key_vals) { + for (i = 1; i <= index->n_uniq; i++) { + ib_int64_t rec_per_key; + if (index->stat_n_diff_key_vals[i]) { + rec_per_key = n_rows / index->stat_n_diff_key_vals[i]; + } else { + rec_per_key = n_rows; + } + ut_snprintf(buff, 256, (i == index->n_uniq)?"%llu":"%llu, ", + rec_per_key); + strncat(row_per_keys, buff, 256 - strlen(row_per_keys)); + } + } + //dict_index_stat_mutex_exit(index); + + field_store_string(i_s_table->field[4], row_per_keys); + + i_s_table->field[5]->store(index->stat_index_size); + i_s_table->field[6]->store(index->stat_n_leaf_pages); + + if (schema_table_store_record(thd, i_s_table)) { + status = 1; + break; + } + + index = dict_table_get_next_index(index); + } + + if (status == 1) { + break; + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + mutex_exit(&(dict_sys->mutex)); + + DBUG_RETURN(status); +} + +static +int +i_s_innodb_table_stats_init( +/*========================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_table_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_table_stats_info; + schema->fill_table = i_s_innodb_table_stats_fill; + + DBUG_RETURN(0); +} + +static +int +i_s_innodb_index_stats_init( +/*========================*/ + void* p) +{ + DBUG_ENTER("i_s_innodb_index_stats_init"); + ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = i_s_innodb_index_stats_info; + schema->fill_table = i_s_innodb_index_stats_fill; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_table_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_TABLE_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB table statistics in memory"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_table_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; + +UNIV_INTERN struct st_mysql_plugin i_s_innodb_index_stats = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "INNODB_INDEX_STATS"), + STRUCT_FLD(author, plugin_author), + STRUCT_FLD(descr, "InnoDB index statistics in memory"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_innodb_index_stats_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, 0x0100 /* 1.0 */), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL) +}; diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h --- a/storage/innobase/handler/i_s.h 2010-12-03 17:17:03.668953884 +0900 +++ b/storage/innobase/handler/i_s.h 2010-12-03 17:19:24.882947826 +0900 @@ -41,5 +41,8 @@ extern struct st_mysql_plugin i_s_innodb_sys_foreign; extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols; extern struct st_mysql_plugin i_s_innodb_rseg; +extern struct st_mysql_plugin i_s_innodb_sys_stats; +extern struct st_mysql_plugin i_s_innodb_table_stats; +extern struct st_mysql_plugin i_s_innodb_index_stats; #endif /* i_s_h */ diff -ruN a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h --- a/storage/innobase/include/dict0boot.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/dict0boot.h 2010-12-03 17:19:24.885947372 +0900 @@ -104,6 +104,7 @@ #define DICT_COLUMNS_ID 2 #define DICT_INDEXES_ID 3 #define DICT_FIELDS_ID 4 +#define DICT_STATS_ID 6 /* The following is a secondary index on SYS_TABLES */ #define DICT_TABLE_IDS_ID 5 @@ -131,10 +132,13 @@ #define DICT_HDR_INDEXES 44 /* Root of the index index tree */ #define DICT_HDR_FIELDS 48 /* Root of the index field index tree */ +#define DICT_HDR_STATS 52 /* Root of the stats tree */ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace segment into which the dictionary header is created */ + +#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */ /*-------------------------------------------------------------*/ /* The field number of the page number field in the sys_indexes table @@ -144,11 +148,15 @@ #define DICT_SYS_INDEXES_TYPE_FIELD 6 #define DICT_SYS_INDEXES_NAME_FIELD 4 +#define DICT_SYS_STATS_DIFF_VALS_FIELD 4 + /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is updated */ #define DICT_HDR_ROW_ID_WRITE_MARGIN 256 +#define DICT_HDR_XTRADB_FLAG 0x5854524144425F31ULL /* "XTRADB_1" */ + #ifndef UNIV_NONINL #include "dict0boot.ic" #endif diff -ruN a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h --- a/storage/innobase/include/dict0crea.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/dict0crea.h 2010-12-03 17:19:24.886949643 +0900 @@ -53,6 +53,14 @@ dict_index_t* index, /*!< in: index to create, built as a memory data structure */ mem_heap_t* heap); /*!< in: heap where created */ +/*********************************************************************//** +*/ +UNIV_INTERN +ind_node_t* +ind_insert_stats_graph_create( +/*==========================*/ + dict_index_t* index, + mem_heap_t* heap); /***********************************************************//** Creates a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -62,6 +70,13 @@ /*===================*/ que_thr_t* thr); /*!< in: query thread */ /***********************************************************//** +*/ +UNIV_INTERN +que_thr_t* +dict_insert_stats_step( +/*===================*/ + que_thr_t* thr); +/***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ @@ -170,6 +185,7 @@ ins_node_t* field_def; /* child node which does the inserts of the field definitions; the row to be inserted is built by the parent node */ + ins_node_t* stats_def; commit_node_t* commit_node; /* child node which performs a commit after a successful index creation */ @@ -180,6 +196,7 @@ dict_table_t* table; /*!< table which owns the index */ dtuple_t* ind_row;/* index definition row built */ ulint field_no;/* next field definition to insert */ + ulint stats_no; mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ }; @@ -189,6 +206,7 @@ #define INDEX_CREATE_INDEX_TREE 3 #define INDEX_COMMIT_WORK 4 #define INDEX_ADD_TO_CACHE 5 +#define INDEX_BUILD_STATS_COLS 6 #ifndef UNIV_NONINL #include "dict0crea.ic" diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h --- a/storage/innobase/include/dict0dict.h 2010-12-03 15:48:03.073024387 +0900 +++ b/storage/innobase/include/dict0dict.h 2010-12-03 17:19:24.888965622 +0900 @@ -1084,10 +1084,11 @@ dict_update_statistics( /*===================*/ dict_table_t* table, /*!< in/out: table */ - ibool only_calc_if_missing_stats);/*!< in: only + ibool only_calc_if_missing_stats, /*!< in: only update/recalc the stats if they have not been initialized yet, otherwise do nothing */ + ibool sync); /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN @@ -1202,6 +1203,7 @@ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ dict_table_t* sys_fields; /*!< SYS_FIELDS table */ + dict_table_t* sys_stats; /*!< SYS_STATS table */ }; #endif /* !UNIV_HOTBACKUP */ diff -ruN a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h --- a/storage/innobase/include/dict0load.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/dict0load.h 2010-12-03 17:19:24.889947481 +0900 @@ -41,6 +41,7 @@ SYS_FIELDS, SYS_FOREIGN, SYS_FOREIGN_COLS, + SYS_STATS, /* This must be last item. Defines the number of system tables. */ SYS_NUM_SYSTEM_TABLES @@ -319,6 +320,19 @@ const char** ref_col_name, /*!< out: referenced column name in referenced table */ ulint* pos); /*!< out: column position */ +/********************************************************************//** +This function parses a SYS_STATS record and extract necessary +information from the record and return to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_stats_rec( +/*=============================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_STATS rec */ + index_id_t* index_id, /*!< out: INDEX_ID */ + ulint* key_cols, /*!< out: KEY_COLS */ + ib_uint64_t* diff_vals); /*!< out: DIFF_VALS */ #ifndef UNIV_NONINL #include "dict0load.ic" #endif diff -ruN a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h --- a/storage/innobase/include/page0cur.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/page0cur.h 2010-12-03 17:19:24.891954511 +0900 @@ -293,6 +293,22 @@ /*==========================*/ buf_block_t* block, /*!< in: page */ page_cur_t* cursor);/*!< out: page cursor */ + +UNIV_INTERN +void +page_cur_open_on_nth_user_rec( +/*==========================*/ + buf_block_t* block, /*!< in: page */ + page_cur_t* cursor, /*!< out: page cursor */ + ulint nth); + +UNIV_INTERN +ibool +page_cur_open_on_rnd_user_rec_after_nth( +/*==========================*/ + buf_block_t* block, /*!< in: page */ + page_cur_t* cursor, /*!< out: page cursor */ + ulint nth); #endif /* !UNIV_HOTBACKUP */ /***********************************************************//** Parses a log record of a record insert on a page. diff -ruN a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h --- a/storage/innobase/include/que0que.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/que0que.h 2010-12-03 17:19:24.892947946 +0900 @@ -492,6 +492,8 @@ #define QUE_NODE_CALL 31 #define QUE_NODE_EXIT 32 +#define QUE_NODE_INSERT_STATS 34 + /* Query thread states */ #define QUE_THR_RUNNING 1 #define QUE_THR_PROCEDURE_WAIT 2 diff -ruN a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h --- a/storage/innobase/include/rem0cmp.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/rem0cmp.h 2010-12-03 17:19:24.893953395 +0900 @@ -169,10 +169,11 @@ matched fields; when the function returns, contains the value the for current comparison */ - ulint* matched_bytes);/*!< in/out: number of already matched + ulint* matched_bytes, /*!< in/out: number of already matched bytes within the first field not completely matched; when the function returns, contains the value for the current comparison */ + ulint stats_method); /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared. diff -ruN a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic --- a/storage/innobase/include/rem0cmp.ic 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/rem0cmp.ic 2010-12-03 17:19:24.902983425 +0900 @@ -87,5 +87,5 @@ ulint match_b = 0; return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); + &match_f, &match_b, 0)); } diff -ruN a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h --- a/storage/innobase/include/row0mysql.h 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/include/row0mysql.h 2010-12-03 17:19:24.904973020 +0900 @@ -387,6 +387,14 @@ then checked for not being too large. */ /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx); +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h --- a/storage/innobase/include/srv0srv.h 2010-12-03 15:53:54.622036720 +0900 +++ b/storage/innobase/include/srv0srv.h 2010-12-03 17:19:24.906953188 +0900 @@ -209,6 +209,13 @@ extern ibool srv_innodb_status; extern unsigned long long srv_stats_sample_pages; +extern ulint srv_stats_method; +#define SRV_STATS_METHOD_NULLS_EQUAL 0 +#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1 +#define SRV_STATS_METHOD_IGNORE_NULLS 2 +extern ulint srv_stats_auto_update; +extern ulint srv_stats_update_need_lock; +extern ibool srv_use_sys_stats_table; extern ibool srv_use_doublewrite_buf; extern ibool srv_use_checksums; diff -ruN a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c --- a/storage/innobase/page/page0cur.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/page/page0cur.c 2010-12-03 17:19:24.908973357 +0900 @@ -564,6 +564,74 @@ } while (rnd--); } +UNIV_INTERN +void +page_cur_open_on_nth_user_rec( +/*==========================*/ + buf_block_t* block, /*!< in: page */ + page_cur_t* cursor, /*!< out: page cursor */ + ulint nth) +{ + ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); + + page_cur_set_before_first(block, cursor); + + if (UNIV_UNLIKELY(n_recs == 0)) { + + return; + } + + nth--; + + if (nth >= n_recs) { + nth = n_recs - 1; + } + + do { + page_cur_move_to_next(cursor); + } while (nth--); +} + +UNIV_INTERN +ibool +page_cur_open_on_rnd_user_rec_after_nth( +/*==========================*/ + buf_block_t* block, /*!< in: page */ + page_cur_t* cursor, /*!< out: page cursor */ + ulint nth) +{ + ulint rnd; + ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); + ibool ret; + + page_cur_set_before_first(block, cursor); + + if (UNIV_UNLIKELY(n_recs == 0)) { + + return (FALSE); + } + + nth--; + + if (nth >= n_recs) { + nth = n_recs - 1; + } + + rnd = (ulint) (nth + page_cur_lcg_prng() % (n_recs - nth)); + + if (rnd == nth) { + ret = TRUE; + } else { + ret = FALSE; + } + + do { + page_cur_move_to_next(cursor); + } while (rnd--); + + return (ret); +} + /***********************************************************//** Writes the log record of a record insert on a page. */ static diff -ruN a/storage/innobase/que/que0que.c b/storage/innobase/que/que0que.c --- a/storage/innobase/que/que0que.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/que/que0que.c 2010-12-03 17:19:24.910953422 +0900 @@ -621,11 +621,21 @@ que_graph_free_recursive(cre_ind->ind_def); que_graph_free_recursive(cre_ind->field_def); + if (srv_use_sys_stats_table) + que_graph_free_recursive(cre_ind->stats_def); que_graph_free_recursive(cre_ind->commit_node); mem_heap_free(cre_ind->heap); break; + case QUE_NODE_INSERT_STATS: + cre_ind = node; + + que_graph_free_recursive(cre_ind->stats_def); + que_graph_free_recursive(cre_ind->commit_node); + + mem_heap_free(cre_ind->heap); + break; case QUE_NODE_PROC: que_graph_free_stat_list(((proc_node_t*)node)->stat_list); @@ -1138,6 +1148,8 @@ str = "CREATE TABLE"; } else if (type == QUE_NODE_CREATE_INDEX) { str = "CREATE INDEX"; + } else if (type == QUE_NODE_INSERT_STATS) { + str = "INSERT TO SYS_STATS"; } else if (type == QUE_NODE_FOR) { str = "FOR LOOP"; } else if (type == QUE_NODE_RETURN) { @@ -1255,6 +1267,8 @@ thr = dict_create_table_step(thr); } else if (type == QUE_NODE_CREATE_INDEX) { thr = dict_create_index_step(thr); + } else if (type == QUE_NODE_INSERT_STATS) { + thr = dict_insert_stats_step(thr); } else if (type == QUE_NODE_ROW_PRINTF) { thr = row_printf_step(thr); } else { diff -ruN a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c --- a/storage/innobase/rem/rem0cmp.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/rem/rem0cmp.c 2010-12-03 17:19:24.911953579 +0900 @@ -866,10 +866,11 @@ matched fields; when the function returns, contains the value the for current comparison */ - ulint* matched_bytes) /*!< in/out: number of already matched + ulint* matched_bytes, /*!< in/out: number of already matched bytes within the first field not completely matched; when the function returns, contains the value for the current comparison */ + ulint stats_method) { ulint rec1_n_fields; /* the number of fields in rec */ ulint rec1_f_len; /* length of current field in rec */ @@ -962,7 +963,11 @@ if (rec1_f_len == rec2_f_len) { - goto next_field; + if (stats_method == SRV_STATS_METHOD_NULLS_EQUAL) { + goto next_field; + } else { + ret = -1; + } } else if (rec2_f_len == UNIV_SQL_NULL) { diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c --- a/storage/innobase/row/row0merge.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/row/row0merge.c 2010-12-03 17:19:24.914955391 +0900 @@ -2020,6 +2020,8 @@ "UPDATE SYS_INDEXES SET NAME=CONCAT('" TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n" "COMMIT WORK;\n" + /* Drop the statistics of the index. */ + "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n" /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" /* Drop the index definition and the B-tree. */ diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c --- a/storage/innobase/row/row0mysql.c 2010-11-03 07:01:13.000000000 +0900 +++ b/storage/innobase/row/row0mysql.c 2010-12-03 17:19:24.918953476 +0900 @@ -921,6 +921,9 @@ table->stat_modified_counter = counter + 1; + if (!srv_stats_auto_update) + return; + /* Calculate new statistics if 1 / 16 of table has been modified since the last time a statistics batch was run, or if stat_modified_counter > 2 000 000 000 (to avoid wrap-around). @@ -931,7 +934,7 @@ || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) { dict_update_statistics(table, FALSE /* update even if stats - are initialized */); + are initialized */, TRUE); } } @@ -2103,6 +2106,45 @@ } /*********************************************************************//** +*/ +UNIV_INTERN +int +row_insert_stats_for_mysql( +/*=======================*/ + dict_index_t* index, + trx_t* trx) +{ + ind_node_t* node; + mem_heap_t* heap; + que_thr_t* thr; + ulint err; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "try to insert rows to SYS_STATS"; + + trx_start_if_not_started(trx); + trx->error_state = DB_SUCCESS; + + heap = mem_heap_create(512); + + node = ind_insert_stats_graph_create(index, heap); + + thr = pars_complete_graph_for_exec(node, trx, heap); + + ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); + que_run_threads(thr); + + err = trx->error_state; + + que_graph_free((que_t*) que_node_get_parent(thr)); + + trx->op_info = ""; + + return((int) err); +} + +/*********************************************************************//** Scans a table create SQL string and adds to the data dictionary the foreign key constraints declared in the string. This function should be called after the indexes for a table have been created. @@ -3022,7 +3064,7 @@ dict_table_autoinc_initialize(table, 1); dict_table_autoinc_unlock(table); dict_update_statistics(table, FALSE /* update even if stats are - initialized */); + initialized */, TRUE); trx_commit_for_mysql(trx); @@ -3324,6 +3366,8 @@ " IF (SQL % NOTFOUND) THEN\n" " found := 0;\n" " ELSE\n" + " DELETE FROM SYS_STATS\n" + " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_FIELDS\n" " WHERE INDEX_ID = index_id;\n" " DELETE FROM SYS_INDEXES\n" diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c --- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:53:54.625288512 +0900 +++ b/storage/innobase/srv/srv0srv.c 2010-12-03 17:19:24.922953561 +0900 @@ -397,6 +397,10 @@ /* When estimating number of different key values in an index, sample this many index pages */ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; +UNIV_INTERN ulint srv_stats_method = 0; +UNIV_INTERN ulint srv_stats_auto_update = 1; +UNIV_INTERN ulint srv_stats_update_need_lock = 1; +UNIV_INTERN ibool srv_use_sys_stats_table = FALSE; UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; UNIV_INTERN ibool srv_use_checksums = TRUE;