--- /dev/null
+# name : bug580324.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
+--- a/sql/sql_base.cc 2011-01-11 21:35:26.000000000 +0300
++++ b/sql/sql_base.cc 2011-01-11 21:42:02.000000000 +0300
+@@ -251,8 +251,12 @@
+ const TABLE_LIST *table_list,
+ bool tmp_table)
+ {
+- uint key_length= (uint) (strmov(strmov(key, table_list->db)+1,
+- table_list->table_name)-key)+1;
++ char *db_end= strnmov(key, table_list->db, MAX_DBKEY_LENGTH - 2);
++ *db_end++= '\0';
++ char *table_end= strnmov(db_end, table_list->table_name,
++ key + MAX_DBKEY_LENGTH - 1 - db_end);
++ *table_end++= '\0';
++ uint key_length= (uint) (table_end-key);
+ if (tmp_table)
+ {
+ int4store(key + key_length, thd->server_id);
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2011-01-11 21:35:26.000000000 +0300
++++ b/sql/sql_parse.cc 2011-01-11 21:48:50.000000000 +0300
+@@ -1110,11 +1110,18 @@
+ break;
+ #else
+ {
+- char *fields, *packet_end= packet + packet_length, *arg_end;
++ char *fields;
++ char *packet_end= packet + packet_length;
++ char *wildcard;
+ /* Locked closure of all tables */
+ TABLE_LIST table_list;
++ char table_name_buff[NAME_LEN+1];
+ LEX_STRING table_name;
++ uint dummy_errors;
+ LEX_STRING db;
++
++ table_name.str= table_name_buff;
++ table_name.length= 0;
+ /*
+ SHOW statements should not add the used tables to the list of tables
+ used in a transaction.
+@@ -1127,24 +1134,23 @@
+ /*
+ We have name + wildcard in packet, separated by endzero
+ */
+- arg_end= strend(packet);
+- uint arg_length= arg_end - packet;
+-
+- /* Check given table name length. */
+- if (arg_length >= packet_length || arg_length > NAME_LEN)
++ wildcard= strend(packet);
++ table_name.length= wildcard - packet;
++ wildcard++;
++ uint query_length= (uint) (packet_end - wildcard); // Don't count end \0
++ if (table_name.length > NAME_LEN || query_length > NAME_LEN)
+ {
+ my_message(ER_UNKNOWN_COM_ERROR, ER(ER_UNKNOWN_COM_ERROR), MYF(0));
+ break;
+ }
+- thd->convert_string(&table_name, system_charset_info,
+- packet, arg_length, thd->charset());
+- if (check_table_name(table_name.str, table_name.length, FALSE))
+- {
+- /* this is OK due to convert_string() null-terminating the string */
+- my_error(ER_WRONG_TABLE_NAME, MYF(0), table_name.str);
++ table_name.length= copy_and_convert(table_name.str,
++ sizeof(table_name_buff)-1,
++ system_charset_info,
++ packet, table_name.length,
++ thd->charset(), &dummy_errors);
++ table_name.str[table_name.length]= '\0';
++ if (!(fields= (char *) thd->memdup(wildcard, query_length + 1)))
+ break;
+- }
+- packet= arg_end + 1;
+ mysql_reset_thd_for_next_command(thd);
+ lex_start(thd);
+ /* Must be before we init the table list. */
+@@ -1169,9 +1175,6 @@
+ table_list.schema_table= schema_table;
+ }
+
+- uint query_length= (uint) (packet_end - packet); // Don't count end \0
+- if (!(fields= (char *) thd->memdup(packet, query_length + 1)))
+- break;
+ thd->set_query(fields, query_length);
+ general_log_print(thd, command, "%s %s", table_list.table_name, fields);
+
+diff -ruN a/strings/ctype-utf8.c b/strings/ctype-utf8.c
+--- a/strings/ctype-utf8.c 2010-12-03 20:58:26.000000000 +0300
++++ b/strings/ctype-utf8.c 2011-01-11 21:42:02.000000000 +0300
+@@ -4212,6 +4212,10 @@
+ {
+ int code;
+ char hex[]= "0123456789abcdef";
++
++ if (s >= e)
++ return MY_CS_TOOSMALL;
++
+ if (wc < 128 && filename_safe_char[wc])
+ {
+ *s= (uchar) wc;
--- /dev/null
+# name : control_online_alter_index.patch
+# introduced : 12
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/handler.h 2010-12-03 13:51:04.727293058 +0900
+@@ -194,6 +194,19 @@
+ #define HA_ONLINE_DROP_UNIQUE_INDEX (1L << 9) /*drop uniq. online*/
+ #define HA_ONLINE_ADD_PK_INDEX (1L << 10)/*add prim. online*/
+ #define HA_ONLINE_DROP_PK_INDEX (1L << 11)/*drop prim. online*/
++
++#define HA_ONLINE_ALTER_INDEX_MASK (HA_ONLINE_ADD_INDEX_NO_WRITES \
++ | HA_ONLINE_DROP_INDEX_NO_WRITES \
++ | HA_ONLINE_ADD_UNIQUE_INDEX_NO_WRITES \
++ | HA_ONLINE_DROP_UNIQUE_INDEX_NO_WRITES \
++ | HA_ONLINE_ADD_PK_INDEX_NO_WRITES \
++ | HA_ONLINE_DROP_PK_INDEX_NO_WRITES \
++ | HA_ONLINE_ADD_INDEX \
++ | HA_ONLINE_DROP_INDEX \
++ | HA_ONLINE_ADD_UNIQUE_INDEX \
++ | HA_ONLINE_DROP_UNIQUE_INDEX \
++ | HA_ONLINE_ADD_PK_INDEX \
++ | HA_ONLINE_DROP_PK_INDEX)
+ /*
+ HA_PARTITION_FUNCTION_SUPPORTED indicates that the function is
+ supported at all.
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h 2010-12-02 20:31:56.200956501 +0900
++++ b/sql/sql_class.h 2010-12-03 13:51:04.744953174 +0900
+@@ -481,6 +481,8 @@
+ my_bool engine_condition_pushdown;
+ my_bool keep_files_on_create;
+
++ my_bool online_alter_index;
++
+ my_bool old_alter_table;
+ my_bool old_passwords;
+ my_bool big_tables;
+diff -ruN a/sql/sql_partition.cc b/sql/sql_partition.cc
+--- a/sql/sql_partition.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_partition.cc 2010-12-03 13:59:56.444039002 +0900
+@@ -4635,7 +4635,12 @@
+ alter_info->num_parts= curr_part_no - new_part_no;
+ }
+ }
+- if (!(flags= new_table->file->alter_table_flags(alter_info->flags)))
++ flags= new_table->file->alter_table_flags(alter_info->flags);
++ if (!thd->variables.online_alter_index)
++ {
++ flags&= ~((uint)HA_ONLINE_ALTER_INDEX_MASK);
++ }
++ if (!flags)
+ {
+ my_error(ER_PARTITION_FUNCTION_FAILURE, MYF(0));
+ goto err;
+diff -ruN a/sql/sql_table.cc b/sql/sql_table.cc
+--- a/sql/sql_table.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_table.cc 2010-12-03 13:51:04.768955495 +0900
+@@ -6107,6 +6107,10 @@
+ uint *idx_end_p;
+
+ alter_flags= table->file->alter_table_flags(alter_info->flags);
++ if (!thd->variables.online_alter_index)
++ {
++ alter_flags&= ~((ulong)HA_ONLINE_ALTER_INDEX_MASK);
++ }
+ DBUG_PRINT("info", ("alter_flags: %lu", alter_flags));
+ /* Check dropped indexes. */
+ for (idx_p= index_drop_buffer, idx_end_p= idx_p + index_drop_count;
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2010-12-02 21:23:05.569356468 +0900
++++ b/sql/sys_vars.cc 2010-12-03 14:05:28.857356603 +0900
+@@ -2124,6 +2124,13 @@
+ GLOBAL_VAR(opt_optimizer_fix),
+ NO_CMD_LINE, DEFAULT(TRUE));
+
++static Sys_var_mybool Sys_fast_index_creation(
++ "fast_index_creation",
++ "If disabled, suppresses online operations for indexes of ALTER TABLE "
++ "(e.g. fast index creation of InnoDB Plugin) for the session.",
++ SESSION_VAR(online_alter_index), NO_CMD_LINE,
++ DEFAULT(TRUE));
++
+ /** propagates changes to the relevant flag of @@optimizer_switch */
+ static bool fix_engine_condition_pushdown(sys_var *self, THD *thd,
+ enum_var_type type)
--- /dev/null
+# name : error_pad.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/extra/comp_err.c b/extra/comp_err.c
+--- a/extra/comp_err.c 2010-08-03 17:24:24.000000000 +0000
++++ b/extra/comp_err.c 2010-09-14 16:49:28.000000000 +0000
+@@ -30,11 +30,12 @@
+ #include <assert.h>
+ #include <my_dir.h>
+
+-#define MAX_ROWS 1000
++#define MAX_ROWS 5000
+ #define HEADER_LENGTH 32 /* Length of header in errmsg.sys */
+ #define DEFAULT_CHARSET_DIR "../sql/share/charsets"
+ #define ER_PREFIX "ER_"
+ #define WARN_PREFIX "WARN_"
++#define PADD_PREFIX "PADD_"
+ static char *OUTFILE= (char*) "errmsg.sys";
+ static char *HEADERFILE= (char*) "mysqld_error.h";
+ static char *NAMEFILE= (char*) "mysqld_ername.h";
+@@ -89,6 +90,7 @@
+ const char *sql_code1; /* sql state */
+ const char *sql_code2; /* ODBC state */
+ struct errors *next_error; /* Pointer to next error */
++ my_bool is_padding; /* If true - padd this er_name while er_code != d_code*/
+ DYNAMIC_ARRAY msg; /* All language texts for this error */
+ };
+
+@@ -127,6 +129,7 @@
+
+
+ static struct languages *parse_charset_string(char *str);
++static struct errors *parse_padd_string(char *ptr, int er_count);
+ static struct errors *parse_error_string(char *ptr, int er_count);
+ static struct message *parse_message_string(struct message *new_message,
+ char *str);
+@@ -252,6 +255,11 @@
+
+ for (tmp_error= error_head; tmp_error; tmp_error= tmp_error->next_error)
+ {
++ if (tmp_error->is_padding)
++ {
++ er_last= tmp_error->d_code;
++ continue;
++ }
+ /*
+ generating mysqld_error.h
+ fprintf() will automatically add \r on windows
+@@ -344,12 +352,29 @@
+ "language\n", tmp_error->er_name, tmp_lang->lang_short_name);
+ goto err;
+ }
+- if (copy_rows(to, tmp->text, row_nr, start_pos))
++ if (tmp_error->is_padding)
+ {
+- fprintf(stderr, "Failed to copy rows to %s\n", outfile);
+- goto err;
++ uint padd_to= tmp_error->d_code;
++ char* padd_message= tmp->text;
++ while ((row_nr+er_offset) < padd_to)
++ {
++ if (copy_rows(to, padd_message,row_nr,start_pos))
++ {
++ fprintf(stderr, "Failed to copy rows to %s\n", outfile);
++ goto err;
++ }
++ row_nr++;
++ }
++ }
++ else
++ {
++ if (copy_rows(to, tmp->text, row_nr, start_pos))
++ {
++ fprintf(stderr, "Failed to copy rows to %s\n", outfile);
++ goto err;
++ }
++ row_nr++;
+ }
+- row_nr++;
+ }
+
+ /* continue with header of the errmsg.sys file */
+@@ -500,14 +525,26 @@
+ DBUG_RETURN(0);
+ continue;
+ }
+- if (is_prefix(str, ER_PREFIX) || is_prefix(str, WARN_PREFIX))
++ if (is_prefix(str, ER_PREFIX) || is_prefix(str, WARN_PREFIX) || is_prefix(str, PADD_PREFIX))
+ {
+- if (!(current_error= parse_error_string(str, rcount)))
++ if (is_prefix(str, PADD_PREFIX))
+ {
+- fprintf(stderr, "Failed to parse the error name string\n");
+- DBUG_RETURN(0);
++ if (!(current_error= parse_padd_string(str, rcount)))
++ {
++ fprintf(stderr, "Failed to parse the error padd string\n");
++ DBUG_RETURN(0);
++ }
++ rcount= current_error->d_code - er_offset; /* Count number of unique errors */
++ }
++ else
++ {
++ if (!(current_error= parse_error_string(str, rcount)))
++ {
++ fprintf(stderr, "Failed to parse the error name string\n");
++ DBUG_RETURN(0);
++ }
++ rcount++; /* Count number of unique errors */
+ }
+- rcount++; /* Count number of unique errors */
+
+ /* add error to the list */
+ *tail_error= current_error;
+@@ -848,78 +885,122 @@
+ DBUG_RETURN(new_message);
+ }
+
++static struct errors* create_new_error(my_bool is_padding, char *er_name, int d_code, const char *sql_code1, const char *sql_code2)
++{
++ struct errors *new_error;
++ DBUG_ENTER("create_new_error");
++ /* create a new element */
++ new_error= (struct errors *) my_malloc(sizeof(*new_error), MYF(MY_WME));
++ if (my_init_dynamic_array(&new_error->msg, sizeof(struct message), 0, 0))
++ DBUG_RETURN(0); /* OOM: Fatal error */
++ new_error->is_padding= is_padding;
++ DBUG_PRINT("info", ("is_padding: %s", (is_padding ? "true" : "false")));
++ new_error->er_name= er_name;
++ DBUG_PRINT("info", ("er_name: %s", er_name));
++ new_error->d_code= d_code;
++ DBUG_PRINT("info", ("d_code: %d", d_code));
++ new_error->sql_code1= sql_code1;
++ DBUG_PRINT("info", ("sql_code1: %s", sql_code1));
++ new_error->sql_code2= sql_code2;
++ DBUG_PRINT("info", ("sql_code2: %s", sql_code2));
++ DBUG_RETURN(new_error);
++}
+
+ /*
+- Parsing the string with error name and codes; returns the pointer to
++ Parsing the string with padd syntax (name + error to pad); returns the pointer to
+ the errors struct
+ */
+
+-static struct errors *parse_error_string(char *str, int er_count)
++static struct errors *parse_padd_string(char* str, int er_count)
+ {
+- struct errors *new_error;
++ char *er_name;
++ uint d_code;
++ char *start;
+ DBUG_ENTER("parse_error_string");
+ DBUG_PRINT("enter", ("str: %s", str));
+
+- /* create a new element */
+- new_error= (struct errors *) my_malloc(sizeof(*new_error), MYF(MY_WME));
++ start= str;
++ str= skip_delimiters(str);
+
+- if (my_init_dynamic_array(&new_error->msg, sizeof(struct message), 0, 0))
++ /* getting the error name */
++
++ if (!(er_name= get_word(&str)))
+ DBUG_RETURN(0); /* OOM: Fatal error */
+
+- /* getting the error name */
+ str= skip_delimiters(str);
+
+- if (!(new_error->er_name= get_word(&str)))
++ if (!(d_code= parse_error_offset(start)))
++ {
++ fprintf(stderr, "Failed to parse the error padd string '%s' '%s' (d_code doesn't parse)!\n",er_name,str);
++ DBUG_RETURN(0);
++ }
++ if (d_code < (uint)(er_offset + er_count))
++ {
++ fprintf(stderr, "Error to padding less current error number!\n");
++ DBUG_RETURN(0);
++ }
++ DBUG_RETURN(create_new_error(TRUE,er_name,d_code,empty_string,empty_string));
++}
++
++/*
++ Parsing the string with error name and codes; returns the pointer to
++ the errors struct
++*/
++
++static struct errors *parse_error_string(char *str, int er_count)
++{
++ char *er_name;
++ int d_code;
++ const char *sql_code1= empty_string;
++ const char *sql_code2= empty_string;
++ DBUG_ENTER("parse_error_string");
++ DBUG_PRINT("enter", ("str: %s", str));
++
++ str= skip_delimiters(str);
++
++ /* getting the error name */
++
++ if (!(er_name= get_word(&str)))
+ DBUG_RETURN(0); /* OOM: Fatal error */
+- DBUG_PRINT("info", ("er_name: %s", new_error->er_name));
+
+ str= skip_delimiters(str);
+
+ /* getting the code1 */
+-
+- new_error->d_code= er_offset + er_count;
+- DBUG_PRINT("info", ("d_code: %d", new_error->d_code));
++ d_code= er_offset + er_count;
+
+ str= skip_delimiters(str);
+
+ /* if we reached EOL => no more codes, but this can happen */
+ if (!*str)
+ {
+- new_error->sql_code1= empty_string;
+- new_error->sql_code2= empty_string;
+ DBUG_PRINT("info", ("str: %s", str));
+- DBUG_RETURN(new_error);
++ goto complete_create;
+ }
+-
+ /* getting the sql_code 1 */
+-
+- if (!(new_error->sql_code1= get_word(&str)))
++ if (!(sql_code1= get_word(&str)))
+ DBUG_RETURN(0); /* OOM: Fatal error */
+- DBUG_PRINT("info", ("sql_code1: %s", new_error->sql_code1));
+
+ str= skip_delimiters(str);
+
+ /* if we reached EOL => no more codes, but this can happen */
+ if (!*str)
+ {
+- new_error->sql_code2= empty_string;
+ DBUG_PRINT("info", ("str: %s", str));
+- DBUG_RETURN(new_error);
++ goto complete_create;
+ }
+-
+ /* getting the sql_code 2 */
+- if (!(new_error->sql_code2= get_word(&str)))
++ if (!(sql_code2= get_word(&str)))
+ DBUG_RETURN(0); /* OOM: Fatal error */
+- DBUG_PRINT("info", ("sql_code2: %s", new_error->sql_code2));
+
+ str= skip_delimiters(str);
++
+ if (*str)
+ {
+ fprintf(stderr, "The error line did not end with sql/odbc code!");
+ DBUG_RETURN(0);
+ }
+-
+- DBUG_RETURN(new_error);
++complete_create:
++ DBUG_RETURN(create_new_error(FALSE,er_name,d_code,sql_code1,sql_code2));
+ }
+
+
--- /dev/null
+# name : innodb_adaptive_hash_index_num.patch
+# introduced : XtraDB on 5.5 (-13?)
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
+--- a/storage/innobase/btr/btr0btr.c 2010-12-04 15:52:23.355483176 +0900
++++ b/storage/innobase/btr/btr0btr.c 2010-12-04 16:12:48.639514256 +0900
+@@ -954,7 +954,7 @@
+ }
+ ut_a(block);
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, NULL);
+
+ header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+ #ifdef UNIV_BTR_DEBUG
+@@ -1023,7 +1023,7 @@
+
+ #ifndef UNIV_HOTBACKUP
+ if (UNIV_LIKELY(!recovery)) {
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+ }
+
+ block->check_index_page_at_flush = TRUE;
+@@ -1188,7 +1188,7 @@
+ ut_a(!page_zip || page_zip_validate(page_zip, page));
+ #endif /* UNIV_ZIP_DEBUG */
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ /* Recreate the page: note that global data on page (possible
+ segment headers, next page-field, etc.) is preserved intact */
+@@ -2497,7 +2497,7 @@
+ mem_heap_free(heap);
+ }
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ /* Make the father empty */
+ btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+@@ -2720,7 +2720,7 @@
+ goto err_exit;
+ }
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ /* Remove the page from the level list */
+ btr_level_list_remove(space, zip_size, page, mtr);
+@@ -2761,7 +2761,7 @@
+ goto err_exit;
+ }
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ #ifdef UNIV_BTR_DEBUG
+ if (UNIV_LIKELY_NULL(merge_page_zip)) {
+@@ -2875,7 +2875,7 @@
+ ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
+
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ btr_page_get_father(index, block, mtr, &cursor);
+ father = btr_cur_get_block(&cursor);
+@@ -2980,7 +2980,7 @@
+
+ page = buf_block_get_frame(block);
+ ut_a(page_is_comp(merge_page) == page_is_comp(page));
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
+
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c 2010-12-04 15:52:23.359513820 +0900
++++ b/storage/innobase/btr/btr0cur.c 2010-12-04 16:12:48.643551837 +0900
+@@ -486,7 +486,7 @@
+ #ifdef UNIV_SEARCH_PERF_STAT
+ info->n_searches++;
+ #endif
+- if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
++ if (rw_lock_get_writer(btr_search_get_latch(cursor->index->id)) == RW_LOCK_NOT_LOCKED
+ && latch_mode <= BTR_MODIFY_LEAF
+ && info->last_hash_succ
+ && !estimate
+@@ -522,7 +522,7 @@
+
+ if (has_search_latch) {
+ /* Release possible search latch to obey latching order */
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(cursor->index->id));
+ }
+
+ /* Store the position of the tree latch we push to mtr so that we
+@@ -844,7 +844,7 @@
+
+ if (has_search_latch) {
+
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_lock(btr_search_get_latch(cursor->index->id));
+ }
+ }
+
+@@ -2059,7 +2059,7 @@
+ btr_search_update_hash_on_delete(cursor);
+ }
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+ }
+
+ if (!(flags & BTR_KEEP_SYS_FLAG)) {
+@@ -2073,7 +2073,7 @@
+ row_upd_rec_in_place(rec, index, offsets, update, page_zip);
+
+ if (block->is_hashed) {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+ }
+
+ if (page_zip && !dict_index_is_clust(index)
+@@ -2857,7 +2857,7 @@
+ block = btr_cur_get_block(cursor);
+
+ if (block->is_hashed) {
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index->id));
+ }
+
+ page_zip = buf_block_get_page_zip(block);
+@@ -2872,7 +2872,7 @@
+ }
+
+ if (block->is_hashed) {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+ }
+
+ btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+@@ -3003,13 +3003,13 @@
+ == dict_table_is_comp(cursor->index->table));
+
+ if (block->is_hashed) {
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+ }
+
+ btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
+
+ if (block->is_hashed) {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+ }
+
+ btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c 2010-12-04 15:52:23.387513429 +0900
++++ b/storage/innobase/btr/btr0sea.c 2010-12-04 16:14:51.721884049 +0900
+@@ -48,6 +48,8 @@
+ UNIV_INTERN char btr_search_enabled = TRUE;
+ UNIV_INTERN ibool btr_search_fully_disabled = FALSE;
+
++UNIV_INTERN ulint btr_search_index_num = 1;
++
+ /** Mutex protecting btr_search_enabled */
+ static mutex_t btr_search_enabled_mutex;
+
+@@ -79,7 +81,9 @@
+
+ /* We will allocate the latch from dynamic memory to get it to the
+ same DRAM page as other hotspot semaphores */
+-UNIV_INTERN rw_lock_t* btr_search_latch_temp;
++//UNIV_INTERN rw_lock_t* btr_search_latch_temp;
++
++UNIV_INTERN rw_lock_t** btr_search_latch_part;
+
+ /** padding to prevent other memory update hotspots from residing on
+ the same memory cache line */
+@@ -131,18 +135,19 @@
+ will not guarantee success. */
+ static
+ void
+-btr_search_check_free_space_in_heap(void)
++btr_search_check_free_space_in_heap(
+ /*=====================================*/
++ index_id_t key)
+ {
+ hash_table_t* table;
+ mem_heap_t* heap;
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(key);
+
+ heap = table->heap;
+
+@@ -153,7 +158,7 @@
+ if (heap->free_block == NULL) {
+ buf_block_t* block = buf_block_alloc(NULL, 0);
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(key));
+
+ if (heap->free_block == NULL) {
+ heap->free_block = block;
+@@ -161,7 +166,7 @@
+ buf_block_free(block);
+ }
+
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(key));
+ }
+ }
+
+@@ -173,19 +178,30 @@
+ /*==================*/
+ ulint hash_size) /*!< in: hash index hash table size */
+ {
++ ulint i;
+ /* We allocate the search latch from dynamic memory:
+ see above at the global variable definition */
+
+- btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
++ //btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
+
+- rw_lock_create(btr_search_latch_key, &btr_search_latch,
+- SYNC_SEARCH_SYS);
++ //rw_lock_create(btr_search_latch_key, &btr_search_latch,
++ // SYNC_SEARCH_SYS);
+ mutex_create(btr_search_enabled_mutex_key,
+ &btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
+
+ btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));
+
+- btr_search_sys->hash_index = ha_create(hash_size, 0, 0);
++ /* btr_search_index_num should be <= 32. (bits of trx->has_search_latch) */
++ btr_search_latch_part = mem_alloc(sizeof(rw_lock_t*) * btr_search_index_num);
++ btr_search_sys->hash_index = mem_alloc(sizeof(hash_table_t*) * btr_search_index_num);
++ for (i = 0; i < btr_search_index_num; i++) {
++ btr_search_latch_part[i] = mem_alloc(sizeof(rw_lock_t));
++
++ rw_lock_create(btr_search_latch_key,
++ btr_search_latch_part[i], SYNC_SEARCH_SYS);
++
++ btr_search_sys->hash_index[i] = ha_create(hash_size, 0, 0);
++ }
+ }
+
+ /*****************************************************************//**
+@@ -195,11 +211,20 @@
+ btr_search_sys_free(void)
+ /*=====================*/
+ {
+- rw_lock_free(&btr_search_latch);
+- mem_free(btr_search_latch_temp);
+- btr_search_latch_temp = NULL;
+- mem_heap_free(btr_search_sys->hash_index->heap);
+- hash_table_free(btr_search_sys->hash_index);
++ ulint i;
++
++ for (i = 0; i < btr_search_index_num; i++) {
++ mem_heap_free(btr_search_sys->hash_index[i]->heap);
++ hash_table_free(btr_search_sys->hash_index[i]);
++
++ rw_lock_free(btr_search_latch_part[i]);
++
++ mem_free(btr_search_latch_part[i]);
++ }
++
++ //rw_lock_free(&btr_search_latch);
++ //mem_free(btr_search_latch_temp);
++ //btr_search_latch_temp = NULL;
+ mem_free(btr_search_sys);
+ btr_search_sys = NULL;
+ }
+@@ -212,7 +237,7 @@
+ /*====================*/
+ {
+ mutex_enter(&btr_search_enabled_mutex);
+- rw_lock_x_lock(&btr_search_latch);
++ btr_search_x_lock_all();
+
+ /* Disable access to hash index, also tell ha_insert_for_fold()
+ stop adding new nodes to hash index, but still allow updating
+@@ -230,7 +255,7 @@
+ /* btr_search_enabled_mutex should guarantee this. */
+ ut_ad(!btr_search_enabled);
+
+- rw_lock_x_unlock(&btr_search_latch);
++ btr_search_x_unlock_all();
+ mutex_exit(&btr_search_enabled_mutex);
+ }
+
+@@ -242,12 +267,12 @@
+ /*====================*/
+ {
+ mutex_enter(&btr_search_enabled_mutex);
+- rw_lock_x_lock(&btr_search_latch);
++ btr_search_x_lock_all();
+
+ btr_search_enabled = TRUE;
+ btr_search_fully_disabled = FALSE;
+
+- rw_lock_x_unlock(&btr_search_latch);
++ btr_search_x_unlock_all();
+ mutex_exit(&btr_search_enabled_mutex);
+ }
+
+@@ -300,20 +325,21 @@
+ ulint
+ btr_search_info_get_ref_count(
+ /*==========================*/
+- btr_search_t* info) /*!< in: search info. */
++ btr_search_t* info, /*!< in: search info. */
++ index_id_t key)
+ {
+ ulint ret;
+
+ ut_ad(info);
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(key), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_lock(btr_search_get_latch(key));
+ ret = info->ref_count;
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(key));
+
+ return(ret);
+ }
+@@ -334,8 +360,8 @@
+ int cmp;
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+ index = cursor->index;
+@@ -453,8 +479,8 @@
+ /*!< in: cursor */
+ {
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
+ || rw_lock_own(&block->lock, RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -538,7 +564,7 @@
+
+ ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -578,10 +604,10 @@
+ mem_heap_free(heap);
+ }
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- ha_insert_for_fold(btr_search_sys->hash_index, fold,
++ ha_insert_for_fold(btr_search_get_hash_index(cursor->index->id), fold,
+ block, rec);
+ }
+ }
+@@ -601,8 +627,8 @@
+ ulint* params2;
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+ block = btr_cur_get_block(cursor);
+@@ -623,7 +649,7 @@
+
+ if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
+
+- btr_search_check_free_space_in_heap();
++ btr_search_check_free_space_in_heap(cursor->index->id);
+ }
+
+ if (cursor->flag == BTR_CUR_HASH_FAIL) {
+@@ -633,11 +659,11 @@
+ btr_search_n_hash_fail++;
+ #endif /* UNIV_SEARCH_PERF_STAT */
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+
+ btr_search_update_hash_ref(info, block, cursor);
+
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+ }
+
+ if (build_index) {
+@@ -881,17 +907,17 @@
+ cursor->flag = BTR_CUR_HASH;
+
+ if (UNIV_LIKELY(!has_search_latch)) {
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_lock(btr_search_get_latch(index_id));
+
+ if (UNIV_UNLIKELY(!btr_search_enabled)) {
+ goto failure_unlock;
+ }
+ }
+
+- ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
+- ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
++ ut_ad(rw_lock_get_writer(btr_search_get_latch(index_id)) != RW_LOCK_EX);
++ ut_ad(rw_lock_get_reader_count(btr_search_get_latch(index_id)) > 0);
+
+- rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
++ rec = ha_search_and_get_data(btr_search_get_hash_index(index_id), fold);
+
+ if (UNIV_UNLIKELY(!rec)) {
+ goto failure_unlock;
+@@ -909,7 +935,7 @@
+ goto failure_unlock;
+ }
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index_id));
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
+ }
+@@ -1006,7 +1032,7 @@
+ /*-------------------------------------------*/
+ failure_unlock:
+ if (UNIV_LIKELY(!has_search_latch)) {
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index_id));
+ }
+ failure:
+ cursor->flag = BTR_CUR_HASH_FAIL;
+@@ -1029,10 +1055,11 @@
+ void
+ btr_search_drop_page_hash_index(
+ /*============================*/
+- buf_block_t* block) /*!< in: block containing index page,
++ buf_block_t* block, /*!< in: block containing index page,
+ s- or x-latched, or an index page
+ for which we know that
+ block->buf_fix_count == 0 */
++ dict_index_t* index_in)
+ {
+ hash_table_t* table;
+ ulint n_fields;
+@@ -1051,22 +1078,60 @@
+ ulint* offsets;
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ if (index_in) {
++ ut_ad(!rw_lock_own(btr_search_get_latch(index_in->id), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(index_in->id), RW_LOCK_EX));
++ }
+ #endif /* UNIV_SYNC_DEBUG */
+
+ retry:
+- rw_lock_s_lock(&btr_search_latch);
++ if (index_in) {
++ index = index_in;
++ rw_lock_s_lock(btr_search_get_latch(index->id));
++ } else if (btr_search_index_num > 1) {
++ rw_lock_t* btr_search_latch;
++
++ /* FIXME: This may be optimistic implementation still. */
++ btr_search_latch = (rw_lock_t*)(block->btr_search_latch);
++ if (UNIV_LIKELY(!btr_search_latch)) {
++ if (block->is_hashed) {
++ goto retry;
++ }
++ return;
++ }
++ rw_lock_s_lock(btr_search_latch);
++ if (UNIV_LIKELY(btr_search_latch != block->btr_search_latch)) {
++ rw_lock_s_unlock(btr_search_latch);
++ goto retry;
++ }
++ if (UNIV_LIKELY(!block->is_hashed)) {
++ rw_lock_s_unlock(btr_search_latch);
++ return;
++ }
++ index = block->index;
++ ut_a(btr_search_latch == btr_search_get_latch(index->id));
++ } else {
++ /* btr_search_index_num == 1 */
++ /* btr_search_latch is only one and able to obtain
++ before evaluating block->is_hashed. */
++ rw_lock_s_lock(btr_search_latch_part[0]);
++ if (UNIV_LIKELY(!block->is_hashed)) {
++ rw_lock_s_unlock(btr_search_latch_part[0]);
++ return;
++ }
++ index = block->index;
++ }
++
+ page = block->frame;
+
+ if (UNIV_LIKELY(!block->is_hashed)) {
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+
+ return;
+ }
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(index->id);
+
+ #ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+@@ -1076,14 +1141,14 @@
+
+ n_fields = block->curr_n_fields;
+ n_bytes = block->curr_n_bytes;
+- index = block->index;
++ ut_a(index == block->index);
+ ut_a(!dict_index_is_ibuf(index));
+
+ /* NOTE: The fields of block must not be accessed after
+ releasing btr_search_latch, as the index page might only
+ be s-latched! */
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+
+ ut_a(n_fields + n_bytes > 0);
+
+@@ -1133,7 +1198,7 @@
+ mem_heap_free(heap);
+ }
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index->id));
+
+ if (UNIV_UNLIKELY(!block->is_hashed)) {
+ /* Someone else has meanwhile dropped the hash index */
+@@ -1149,7 +1214,7 @@
+ /* Someone else has meanwhile built a new hash index on the
+ page, with different parameters */
+
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+
+ mem_free(folds);
+ goto retry;
+@@ -1165,6 +1230,7 @@
+
+ block->is_hashed = FALSE;
+ block->index = NULL;
++ block->btr_search_latch = NULL;
+
+ cleanup:
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+@@ -1177,14 +1243,14 @@
+ "InnoDB: the hash index to a page of %s,"
+ " still %lu hash nodes remain.\n",
+ index->name, (ulong) block->n_pointers);
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+
+ btr_search_validate();
+ } else {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+ }
+ #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+
+ mem_free(folds);
+@@ -1215,10 +1281,10 @@
+ mem_heap_t* heap = NULL;
+ ulint* offsets;
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index->id));
+ //buf_pool_mutex_enter_all();
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(index->id);
+
+ for (j = 0; j < srv_buf_pool_instances; j++) {
+ buf_pool_t* buf_pool;
+@@ -1291,6 +1357,7 @@
+
+ block->is_hashed = FALSE;
+ block->index = NULL;
++ block->btr_search_latch = NULL;
+
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ if (UNIV_UNLIKELY(block->n_pointers)) {
+@@ -1313,7 +1380,7 @@
+ }
+
+ //buf_pool_mutex_exit_all();
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+@@ -1360,7 +1427,7 @@
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, NULL);
+ }
+
+ mtr_commit(&mtr);
+@@ -1402,26 +1469,26 @@
+ ut_ad(index);
+ ut_a(!dict_index_is_ibuf(index));
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(index->id);
+ page = buf_block_get_frame(block);
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
+ ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+ || rw_lock_own(&(block->lock), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_lock(btr_search_get_latch(index->id));
+
+ if (block->is_hashed && ((block->curr_n_fields != n_fields)
+ || (block->curr_n_bytes != n_bytes)
+ || (block->curr_left_side != left_side))) {
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+ } else {
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+ }
+
+ n_recs = page_get_n_recs(page);
+@@ -1515,9 +1582,9 @@
+ fold = next_fold;
+ }
+
+- btr_search_check_free_space_in_heap();
++ btr_search_check_free_space_in_heap(index->id);
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index->id));
+
+ if (UNIV_UNLIKELY(btr_search_fully_disabled)) {
+ goto exit_func;
+@@ -1545,6 +1612,7 @@
+ block->curr_n_bytes = n_bytes;
+ block->curr_left_side = left_side;
+ block->index = index;
++ block->btr_search_latch = btr_search_get_latch(index->id);
+
+ for (i = 0; i < n_cached; i++) {
+
+@@ -1552,7 +1620,7 @@
+ }
+
+ exit_func:
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index->id));
+
+ mem_free(folds);
+ mem_free(recs);
+@@ -1591,13 +1659,13 @@
+ ut_a(!(new_block->is_hashed || block->is_hashed)
+ || !dict_index_is_ibuf(index));
+
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_lock(btr_search_get_latch(index->id));
+
+ if (new_block->is_hashed) {
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+
+ return;
+ }
+@@ -1612,7 +1680,7 @@
+ new_block->n_bytes = block->curr_n_bytes;
+ new_block->left_side = left_side;
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+
+ ut_a(n_fields + n_bytes > 0);
+
+@@ -1624,7 +1692,7 @@
+ return;
+ }
+
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+ }
+
+ /********************************************************************//**
+@@ -1663,7 +1731,7 @@
+ ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
+ ut_a(!dict_index_is_ibuf(cursor->index));
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(cursor->index->id);
+
+ index_id = cursor->index->id;
+ fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_,
+@@ -1672,11 +1740,11 @@
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+
+ ha_search_and_delete_if_found(table, fold, rec);
+
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+ }
+
+ /********************************************************************//**
+@@ -1710,21 +1778,21 @@
+ ut_a(block->index == cursor->index);
+ ut_a(!dict_index_is_ibuf(cursor->index));
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(cursor->index->id));
+
+ if ((cursor->flag == BTR_CUR_HASH)
+ && (cursor->n_fields == block->curr_n_fields)
+ && (cursor->n_bytes == block->curr_n_bytes)
+ && !block->curr_left_side) {
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(cursor->index->id);
+
+ ha_search_and_update_if_found(table, cursor->fold, rec,
+ block, page_rec_get_next(rec));
+
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+ } else {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(cursor->index->id));
+
+ btr_search_update_hash_on_insert(cursor);
+ }
+@@ -1759,9 +1827,9 @@
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+- table = btr_search_sys->hash_index;
++ table = btr_search_get_hash_index(cursor->index->id);
+
+- btr_search_check_free_space_in_heap();
++ btr_search_check_free_space_in_heap(cursor->index->id);
+
+ rec = btr_cur_get_rec(cursor);
+
+@@ -1806,7 +1874,7 @@
+ } else {
+ if (left_side) {
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index_id));
+
+ locked = TRUE;
+
+@@ -1820,7 +1888,7 @@
+
+ if (!locked) {
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index_id));
+
+ locked = TRUE;
+ }
+@@ -1838,7 +1906,7 @@
+ if (!left_side) {
+
+ if (!locked) {
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index_id));
+
+ locked = TRUE;
+ }
+@@ -1853,7 +1921,7 @@
+
+ if (!locked) {
+
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(index_id));
+
+ locked = TRUE;
+ }
+@@ -1876,7 +1944,7 @@
+ mem_heap_free(heap);
+ }
+ if (locked) {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(index_id));
+ }
+ }
+
+@@ -1892,7 +1960,7 @@
+ ha_node_t* node;
+ ulint n_page_dumps = 0;
+ ibool ok = TRUE;
+- ulint i;
++ ulint i,j;
+ ulint cell_count;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+@@ -1904,23 +1972,25 @@
+
+ rec_offs_init(offsets_);
+
+- rw_lock_x_lock(&btr_search_latch);
++ btr_search_x_lock_all();
+ buf_pool_page_hash_x_lock_all();
+
+- cell_count = hash_get_n_cells(btr_search_sys->hash_index);
++ for (j = 0; j < btr_search_index_num; j++) {
++
++ cell_count = hash_get_n_cells(btr_search_sys->hash_index[j]);
+
+ for (i = 0; i < cell_count; i++) {
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if ((i != 0) && ((i % chunk_size) == 0)) {
+ buf_pool_page_hash_x_unlock_all();
+- rw_lock_x_unlock(&btr_search_latch);
++ btr_search_x_unlock_all();
+ os_thread_yield();
+- rw_lock_x_lock(&btr_search_latch);
++ btr_search_x_lock_all();
+ buf_pool_page_hash_x_lock_all();
+ }
+
+- node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
++ node = hash_get_nth_cell(btr_search_sys->hash_index[j], i)->node;
+
+ for (; node != NULL; node = node->next) {
+ const buf_block_t* block
+@@ -2029,19 +2099,21 @@
+ give other queries a chance to run. */
+ if (i != 0) {
+ buf_pool_page_hash_x_unlock_all();
+- rw_lock_x_unlock(&btr_search_latch);
++ btr_search_x_unlock_all();
+ os_thread_yield();
+- rw_lock_x_lock(&btr_search_latch);
++ btr_search_x_lock_all();
+ buf_pool_page_hash_x_lock_all();
+ }
+
+- if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
++ if (!ha_validate(btr_search_sys->hash_index[j], i, end_index)) {
+ ok = FALSE;
+ }
+ }
+
++ } /*for (j = 0; j < btr_search_index_num; j++)*/
++
+ buf_pool_page_hash_x_unlock_all();
+- rw_lock_x_unlock(&btr_search_latch);
++ btr_search_x_unlock_all();
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-04 15:55:21.351597052 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-04 16:12:48.654550708 +0900
+@@ -949,6 +949,7 @@
+
+ block->check_index_page_at_flush = FALSE;
+ block->index = NULL;
++ block->btr_search_latch = NULL;
+
+ block->is_hashed = FALSE;
+
+@@ -1477,7 +1478,7 @@
+ /* To follow the latching order, we
+ have to release btr_search_latch
+ before acquiring block->latch. */
+- rw_lock_x_unlock(&btr_search_latch);
++ btr_search_x_unlock_all();
+ /* When we release the search latch,
+ we must rescan all blocks, because
+ some may become hashed again. */
+@@ -1508,11 +1509,11 @@
+ anything. block->is_hashed can only
+ be set on uncompressed file pages. */
+
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, NULL);
+
+ rw_lock_x_unlock(&block->lock);
+
+- rw_lock_x_lock(&btr_search_latch);
++ btr_search_x_lock_all();
+
+ ut_ad(!btr_search_enabled);
+ }
+@@ -1531,7 +1532,11 @@
+ ibool released_search_latch;
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ulint j;
++
++ for (j = 0; j < btr_search_index_num; j++) {
++ ut_ad(rw_lock_own(btr_search_latch_part[j], RW_LOCK_EX));
++ }
+ #endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!btr_search_enabled);
+
+@@ -2635,6 +2640,7 @@
+ {
+ block->check_index_page_at_flush = FALSE;
+ block->index = NULL;
++ block->btr_search_latch = NULL;
+
+ block->n_hash_helps = 0;
+ block->is_hashed = FALSE;
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c 2010-12-04 15:35:29.137347521 +0900
++++ b/storage/innobase/buf/buf0lru.c 2010-12-04 16:12:48.658550840 +0900
+@@ -1775,7 +1775,7 @@
+
+ UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
+ UNIV_PAGE_SIZE);
+- btr_search_drop_page_hash_index((buf_block_t*) bpage);
++ btr_search_drop_page_hash_index((buf_block_t*) bpage, NULL);
+ UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
+ UNIV_PAGE_SIZE);
+
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c 2010-12-04 15:52:23.398513916 +0900
++++ b/storage/innobase/dict/dict0dict.c 2010-12-04 16:12:48.662550715 +0900
+@@ -1802,7 +1802,7 @@
+ zero. */
+
+ for (;;) {
+- ulint ref_count = btr_search_info_get_ref_count(info);
++ ulint ref_count = btr_search_info_get_ref_count(info, index->id);
+ if (ref_count == 0) {
+ break;
+ }
+diff -ruN a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c
+--- a/storage/innobase/ha/ha0ha.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/ha/ha0ha.c 2010-12-04 16:12:48.665593752 +0900
+@@ -102,7 +102,8 @@
+ ut_ad(table);
+ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
++ /* cannot identificate which btr_search_latch[i] for now */
++ //ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
+ #endif /* UNIV_SYNC_DEBUG */
+
+ #ifndef UNIV_HOTBACKUP
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 16:12:20.185850734 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 16:12:48.674552412 +0900
+@@ -11611,6 +11611,11 @@
+ "Disable with --skip-innodb-adaptive-hash-index.",
+ NULL, innodb_adaptive_hash_index_update, TRUE);
+
++static MYSQL_SYSVAR_ULONG(adaptive_hash_index_partitions, btr_search_index_num,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "Number of InnoDB adaptive hash index partitions (default 1: disable partitioning)",
++ NULL, NULL, 1, 1, 32, 0);
++
+ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
+ PLUGIN_VAR_RQCMDARG,
+ "Replication thread delay (ms) on the slave server if "
+@@ -11963,6 +11968,7 @@
+ MYSQL_SYSVAR(use_sys_stats_table),
+ MYSQL_SYSVAR(stats_sample_pages),
+ MYSQL_SYSVAR(adaptive_hash_index),
++ MYSQL_SYSVAR(adaptive_hash_index_partitions),
+ MYSQL_SYSVAR(replication_delay),
+ MYSQL_SYSVAR(status_file),
+ MYSQL_SYSVAR(strict_mode),
+diff -ruN a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
+--- a/storage/innobase/include/btr0sea.h 2010-12-03 15:48:03.070987226 +0900
++++ b/storage/innobase/include/btr0sea.h 2010-12-04 16:12:48.707551382 +0900
+@@ -85,7 +85,8 @@
+ ulint
+ btr_search_info_get_ref_count(
+ /*==========================*/
+- btr_search_t* info); /*!< in: search info. */
++ btr_search_t* info, /*!< in: search info. */
++ index_id_t key);
+ /*********************************************************************//**
+ Updates the search info. */
+ UNIV_INLINE
+@@ -136,10 +137,11 @@
+ void
+ btr_search_drop_page_hash_index(
+ /*============================*/
+- buf_block_t* block); /*!< in: block containing index page,
++ buf_block_t* block, /*!< in: block containing index page,
+ s- or x-latched, or an index page
+ for which we know that
+ block->buf_fix_count == 0 */
++ dict_index_t* index_in);
+ /************************************************************************
+ Drops a page hash index based on index */
+ UNIV_INTERN
+@@ -199,10 +201,47 @@
+ # define btr_search_validate() TRUE
+ #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+
++/********************************************************************//**
++New functions to control split btr_search_index */
++UNIV_INLINE
++hash_table_t*
++btr_search_get_hash_index(
++/*======================*/
++ index_id_t key);
++
++UNIV_INLINE
++rw_lock_t*
++btr_search_get_latch(
++/*=================*/
++ index_id_t key);
++
++UNIV_INLINE
++void
++btr_search_x_lock_all(void);
++/*========================*/
++
++UNIV_INLINE
++void
++btr_search_x_unlock_all(void);
++/*==========================*/
++
++UNIV_INLINE
++void
++btr_search_s_lock_all(void);
++/*========================*/
++
++UNIV_INLINE
++void
++btr_search_s_unlock_all(void);
++/*==========================*/
++
++
+ /** Flag: has the search system been enabled?
+ Protected by btr_search_latch and btr_search_enabled_mutex. */
+ extern char btr_search_enabled;
+
++extern ulint btr_search_index_num;
++
+ /** Flag: whether the search system has completed its disabling process,
+ It is set to TRUE right after buf_pool_drop_hash_index() in
+ btr_search_disable(), indicating hash index entries are cleaned up.
+@@ -269,7 +308,7 @@
+
+ /** The hash index system */
+ struct btr_search_sys_struct{
+- hash_table_t* hash_index; /*!< the adaptive hash index,
++ hash_table_t** hash_index; /*!< the adaptive hash index,
+ mapping dtuple_fold values
+ to rec_t pointers on index pages */
+ };
+@@ -290,10 +329,12 @@
+
+ Bear in mind (3) and (4) when using the hash index.
+ */
+-extern rw_lock_t* btr_search_latch_temp;
++//extern rw_lock_t* btr_search_latch_temp;
++
++extern rw_lock_t** btr_search_latch_part;
+
+ /** The latch protecting the adaptive search system */
+-#define btr_search_latch (*btr_search_latch_temp)
++//#define btr_search_latch (*btr_search_latch_temp)
+
+ #ifdef UNIV_SEARCH_PERF_STAT
+ /** Number of successful adaptive hash index lookups */
+diff -ruN a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
+--- a/storage/innobase/include/btr0sea.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/btr0sea.ic 2010-12-04 16:12:48.709511202 +0900
+@@ -62,8 +62,8 @@
+ btr_search_t* info;
+
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
++ ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_SHARED));
++ ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
+ #endif /* UNIV_SYNC_DEBUG */
+
+ info = btr_search_get_info(index);
+@@ -82,3 +82,72 @@
+
+ btr_search_info_update_slow(info, cursor);
+ }
++
++/*********************************************************************//**
++New functions to control split btr_search_index */
++UNIV_INLINE
++hash_table_t*
++btr_search_get_hash_index(
++/*======================*/
++ index_id_t key)
++{
++ return(btr_search_sys->hash_index[key % btr_search_index_num]);
++}
++
++UNIV_INLINE
++rw_lock_t*
++btr_search_get_latch(
++/*=================*/
++ index_id_t key)
++{
++ return(btr_search_latch_part[key % btr_search_index_num]);
++}
++
++UNIV_INLINE
++void
++btr_search_x_lock_all(void)
++/*=======================*/
++{
++ ulint i;
++
++ for (i = 0; i < btr_search_index_num; i++) {
++ rw_lock_x_lock(btr_search_latch_part[i]);
++ }
++}
++
++UNIV_INLINE
++void
++btr_search_x_unlock_all(void)
++/*==========================*/
++{
++ ulint i;
++
++ for (i = 0; i < btr_search_index_num; i++) {
++ rw_lock_x_unlock(btr_search_latch_part[i]);
++ }
++}
++
++UNIV_INLINE
++void
++btr_search_s_lock_all(void)
++/*=======================*/
++{
++ ulint i;
++
++ for (i = 0; i < btr_search_index_num; i++) {
++ rw_lock_s_lock(btr_search_latch_part[i]);
++ }
++}
++
++UNIV_INLINE
++void
++btr_search_s_unlock_all(void)
++/*=========================*/
++{
++ ulint i;
++
++ for (i = 0; i < btr_search_index_num; i++) {
++ rw_lock_s_unlock(btr_search_latch_part[i]);
++ }
++}
++
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-15 19:00:07.713604580 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-15 20:58:03.546839883 +0900
+@@ -1473,7 +1473,7 @@
+ pointers in the adaptive hash index
+ pointing to this frame */
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+- unsigned is_hashed:1; /*!< TRUE if hash index has
++ volatile unsigned is_hashed:1; /*!< TRUE if hash index has
+ already been built on this
+ page; note that it does not
+ guarantee that the index is
+@@ -1487,6 +1487,7 @@
+ unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
+ dict_index_t* index; /*!< Index for which the adaptive
+ hash index has been created. */
++ volatile rw_lock_t* btr_search_latch;
+ /* @} */
+ # ifdef UNIV_SYNC_DEBUG
+ /** @name Debug fields */
+diff -ruN a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
+--- a/storage/innobase/include/row0upd.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/row0upd.ic 2010-12-04 16:12:48.710551113 +0900
+@@ -158,7 +158,7 @@
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ #ifdef UNIV_SYNC_DEBUG
+- if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
++ if (!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX)) {
+ ut_ad(!buf_block_align(rec)->is_hashed);
+ }
+ #endif /* UNIV_SYNC_DEBUG */
+diff -ruN a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
+--- a/storage/innobase/page/page0page.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/page/page0page.c 2010-12-04 16:12:48.712550963 +0900
+@@ -218,7 +218,7 @@
+ const ibool is_hashed = block->is_hashed;
+
+ if (is_hashed) {
+- rw_lock_x_lock(&btr_search_latch);
++ rw_lock_x_lock(btr_search_get_latch(block->index->id));
+ }
+
+ ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+@@ -244,7 +244,7 @@
+
+ #ifndef UNIV_HOTBACKUP
+ if (is_hashed) {
+- rw_lock_x_unlock(&btr_search_latch);
++ rw_lock_x_unlock(btr_search_get_latch(block->index->id));
+ }
+ #endif /* !UNIV_HOTBACKUP */
+ }
+diff -ruN a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
+--- a/storage/innobase/page/page0zip.c 2010-12-04 15:57:13.061494433 +0900
++++ b/storage/innobase/page/page0zip.c 2010-12-04 16:12:48.716470334 +0900
+@@ -4445,7 +4445,7 @@
+
+ #ifndef UNIV_HOTBACKUP
+ temp_block = buf_block_alloc(buf_pool, 0);
+- btr_search_drop_page_hash_index(block);
++ btr_search_drop_page_hash_index(block, index);
+ block->check_index_page_at_flush = TRUE;
+ #else /* !UNIV_HOTBACKUP */
+ ut_ad(block == back_block1);
+diff -ruN a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
+--- a/storage/innobase/row/row0sel.c 2010-12-04 16:09:53.204513572 +0900
++++ b/storage/innobase/row/row0sel.c 2010-12-04 16:12:48.722551273 +0900
+@@ -1210,7 +1210,7 @@
+ ut_ad(plan->unique_search);
+ ut_ad(!plan->must_get_clust);
+ #ifdef UNIV_SYNC_DEBUG
+- ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
++ ut_ad(rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_SHARED));
+ #endif /* UNIV_SYNC_DEBUG */
+
+ row_sel_open_pcur(plan, TRUE, mtr);
+@@ -1381,10 +1381,10 @@
+ && !plan->must_get_clust
+ && !plan->table->big_rows) {
+ if (!search_latch_locked) {
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_lock(btr_search_get_latch(index->id));
+
+ search_latch_locked = TRUE;
+- } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
++ } else if (rw_lock_get_writer(btr_search_get_latch(index->id)) == RW_LOCK_WAIT_EX) {
+
+ /* There is an x-latch request waiting: release the
+ s-latch for a moment; as an s-latch here is often
+@@ -1393,8 +1393,8 @@
+ from acquiring an s-latch for a long time, lowering
+ performance significantly in multiprocessors. */
+
+- rw_lock_s_unlock(&btr_search_latch);
+- rw_lock_s_lock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
++ rw_lock_s_lock(btr_search_get_latch(index->id));
+ }
+
+ found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
+@@ -1417,7 +1417,7 @@
+ }
+
+ if (search_latch_locked) {
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+
+ search_latch_locked = FALSE;
+ }
+@@ -1993,7 +1993,7 @@
+
+ func_exit:
+ if (search_latch_locked) {
+- rw_lock_s_unlock(&btr_search_latch);
++ rw_lock_s_unlock(btr_search_get_latch(index->id));
+ }
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+@@ -3356,6 +3356,8 @@
+ /* if the returned record was locked and we did a semi-consistent
+ read (fetch the newest committed version), then this is set to
+ TRUE */
++ ulint i;
++ ulint should_release;
+ #ifdef UNIV_SEARCH_DEBUG
+ ulint cnt = 0;
+ #endif /* UNIV_SEARCH_DEBUG */
+@@ -3441,18 +3443,32 @@
+ /* PHASE 0: Release a possible s-latch we are holding on the
+ adaptive hash index latch if there is someone waiting behind */
+
+- if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
+- && trx->has_search_latch) {
++ should_release = 0;
++ for (i = 0; i < btr_search_index_num; i++) {
++ if ((trx->has_search_latch & ((ulint)1 << i))
++ && rw_lock_get_writer(btr_search_latch_part[i])
++ != RW_LOCK_NOT_LOCKED) {
++ should_release |= ((ulint)1 << i);
++ }
++ }
++
++ if (should_release) {
+
+ /* There is an x-latch request on the adaptive hash index:
+ release the s-latch to reduce starvation and wait for
+ BTR_SEA_TIMEOUT rounds before trying to keep it again over
+ calls from MySQL */
+
+- rw_lock_s_unlock(&btr_search_latch);
+- trx->has_search_latch = FALSE;
++ for (i = 0; i < btr_search_index_num; i++) {
++ if (should_release & ((ulint)1 << i)) {
++ rw_lock_s_unlock(btr_search_latch_part[i]);
++ trx->has_search_latch &= ~((ulint)1 << i);
++ }
++ }
+
++ if (!trx->has_search_latch) {
+ trx->search_latch_timeout = BTR_SEA_TIMEOUT;
++ }
+ }
+
+ /* Reset the new record lock info if srv_locks_unsafe_for_binlog
+@@ -3603,9 +3619,11 @@
+ hash index semaphore! */
+
+ #ifndef UNIV_SEARCH_DEBUG
+- if (!trx->has_search_latch) {
+- rw_lock_s_lock(&btr_search_latch);
+- trx->has_search_latch = TRUE;
++ if (!(trx->has_search_latch
++ & ((ulint)1 << (index->id % btr_search_index_num)))) {
++ rw_lock_s_lock(btr_search_get_latch(index->id));
++ trx->has_search_latch |=
++ (ulint)1 << (index->id % btr_search_index_num);
+ }
+ #endif
+ switch (row_sel_try_search_shortcut_for_mysql(
+@@ -3666,7 +3684,11 @@
+
+ trx->search_latch_timeout--;
+
+- rw_lock_s_unlock(&btr_search_latch);
++ for (i = 0; i < btr_search_index_num; i++) {
++ if (trx->has_search_latch & ((ulint)1 << i)) {
++ rw_lock_s_unlock(btr_search_latch_part[i]);
++ }
++ }
+ trx->has_search_latch = FALSE;
+ }
+
+@@ -3690,7 +3712,12 @@
+ /* PHASE 3: Open or restore index cursor position */
+
+ if (trx->has_search_latch) {
+- rw_lock_s_unlock(&btr_search_latch);
++
++ for (i = 0; i < btr_search_index_num; i++) {
++ if (trx->has_search_latch & ((ulint)1 << i)) {
++ rw_lock_s_unlock(btr_search_latch_part[i]);
++ }
++ }
+ trx->has_search_latch = FALSE;
+ }
+
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 16:12:20.231484679 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 16:12:48.726551018 +0900
+@@ -2008,7 +2008,9 @@
+ "-------------------------------------\n", file);
+ ibuf_print(file);
+
+- ha_print_info(file, btr_search_sys->hash_index);
++ for (i = 0; i < btr_search_index_num; i++) {
++ ha_print_info(file, btr_search_get_hash_index((index_id_t)i));
++ }
+
+ fprintf(file,
+ "%.2f hash searches/s, %.2f non-hash searches/s\n",
+@@ -2033,14 +2035,15 @@
+ ut_total_allocated_memory,
+ mem_pool_get_reserved(mem_comm_pool));
+ /* Calcurate reserved memories */
+- if (btr_search_sys && btr_search_sys->hash_index->heap) {
+- btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap);
++ if (btr_search_sys && btr_search_sys->hash_index[0]->heap) {
++ btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index[0]->heap);
+ } else {
+ btr_search_sys_subtotal = 0;
+- for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) {
+- btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]);
++ for (i=0; i < btr_search_sys->hash_index[0]->n_mutexes; i++) {
++ btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index[0]->heaps[i]);
+ }
+ }
++ btr_search_sys_subtotal *= btr_search_index_num;
+
+ lock_sys_subtotal = 0;
+ if (trx_sys) {
+@@ -2067,10 +2070,10 @@
+ " Threads %lu \t(%lu + %lu)\n",
+
+ (ulong) (btr_search_sys
+- ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0)
++ ? (btr_search_sys->hash_index[0]->n_cells * btr_search_index_num * sizeof(hash_cell_t)) : 0)
+ + btr_search_sys_subtotal,
+ (ulong) (btr_search_sys
+- ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0),
++ ? (btr_search_sys->hash_index[0]->n_cells * btr_search_index_num * sizeof(hash_cell_t)) : 0),
+ (ulong) btr_search_sys_subtotal,
+
+ (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c 2010-12-03 17:36:44.300986571 +0900
++++ b/storage/innobase/sync/sync0sync.c 2010-12-04 16:12:48.729513564 +0900
+@@ -1183,7 +1183,6 @@
+ case SYNC_ANY_LATCH:
+ case SYNC_FILE_FORMAT_TAG:
+ case SYNC_DOUBLEWRITE:
+- case SYNC_SEARCH_SYS:
+ case SYNC_SEARCH_SYS_CONF:
+ case SYNC_TRX_LOCK_HEAP:
+ case SYNC_KERNEL:
+@@ -1204,6 +1203,7 @@
+ ut_error;
+ }
+ break;
++ case SYNC_SEARCH_SYS:
+ case SYNC_BUF_LRU_LIST:
+ case SYNC_BUF_FLUSH_LIST:
+ case SYNC_BUF_PAGE_HASH:
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c 2010-12-03 17:49:11.623953784 +0900
++++ b/storage/innobase/trx/trx0trx.c 2010-12-04 16:12:48.731513275 +0900
+@@ -266,8 +266,14 @@
+ /*=================================*/
+ trx_t* trx) /*!< in: transaction */
+ {
++ ulint i;
++
+ if (trx->has_search_latch) {
+- rw_lock_s_unlock(&btr_search_latch);
++ for (i = 0; i < btr_search_index_num; i++) {
++ if (trx->has_search_latch & ((ulint)1 << i)) {
++ rw_lock_s_unlock(btr_search_latch_part[i]);
++ }
++ }
+
+ trx->has_search_latch = FALSE;
+ }
--- /dev/null
+# name : innodb_admin_command_base.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:32:15.624039043 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:32:35.424957827 +0900
+@@ -11775,7 +11775,8 @@
+ i_s_innodb_sys_foreign_cols,
+ i_s_innodb_sys_stats,
+ i_s_innodb_table_stats,
+-i_s_innodb_index_stats
++i_s_innodb_index_stats,
++i_s_innodb_admin_command
+ mysql_declare_plugin_end;
+
+ /** @brief Initialize the default value of innodb_commit_concurrency.
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 17:30:16.299955549 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-03 17:32:35.425989972 +0900
+@@ -4162,3 +4162,139 @@
+ STRUCT_FLD(system_vars, NULL),
+ STRUCT_FLD(__reserved1, NULL)
+ };
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO i_s_innodb_admin_command_info[] =
++{
++ {STRUCT_FLD(field_name, "result_message"),
++ STRUCT_FLD(field_length, 1024),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++#ifndef INNODB_COMPATIBILITY_HOOKS
++#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
++#endif
++
++extern "C" {
++char **thd_query(MYSQL_THD thd);
++}
++
++static
++int
++i_s_innodb_admin_command_fill(
++/*==========================*/
++ THD* thd,
++ TABLE_LIST* tables,
++ COND* cond)
++{
++ TABLE* i_s_table = (TABLE *) tables->table;
++ char** query_str;
++ char* ptr;
++ char quote = '\0';
++ const char* command_head = "XTRA_";
++
++ DBUG_ENTER("i_s_innodb_admin_command_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++ DBUG_RETURN(0);
++ }
++
++ if(thd_sql_command(thd) != SQLCOM_SELECT) {
++ field_store_string(i_s_table->field[0],
++ "SELECT command is only accepted.");
++ goto end_func;
++ }
++
++ query_str = thd_query(thd);
++ ptr = *query_str;
++
++ for (; *ptr; ptr++) {
++ if (*ptr == quote) {
++ quote = '\0';
++ } else if (quote) {
++ } else if (*ptr == '`' || *ptr == '"') {
++ quote = *ptr;
++ } else {
++ long i;
++ for (i = 0; command_head[i]; i++) {
++ if (toupper((int)(unsigned char)(ptr[i]))
++ != toupper((int)(unsigned char)
++ (command_head[i]))) {
++ goto nomatch;
++ }
++ }
++ break;
++nomatch:
++ ;
++ }
++ }
++
++ if (!*ptr) {
++ field_store_string(i_s_table->field[0],
++ "No XTRA_* command in the SQL statement."
++ " Please add /*!XTRA_xxxx*/ to the SQL.");
++ goto end_func;
++ }
++
++ if (!strncasecmp("XTRA_HELLO", ptr, 10)) {
++ /* This is example command XTRA_HELLO */
++
++ ut_print_timestamp(stderr);
++ fprintf(stderr, " InnoDB: administration command test for XtraDB"
++ " 'XTRA_HELLO' was detected.\n");
++
++ field_store_string(i_s_table->field[0],
++ "Hello!");
++ goto end_func;
++ }
++
++ field_store_string(i_s_table->field[0],
++ "Undefined XTRA_* command.");
++ goto end_func;
++
++end_func:
++ if (schema_table_store_record(thd, i_s_table)) {
++ DBUG_RETURN(1);
++ } else {
++ DBUG_RETURN(0);
++ }
++}
++
++static
++int
++i_s_innodb_admin_command_init(
++/*==========================*/
++ void* p)
++{
++ DBUG_ENTER("i_s_innodb_admin_command_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_admin_command_info;
++ schema->fill_table = i_s_innodb_admin_command_fill;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_admin_command =
++{
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++ STRUCT_FLD(info, &i_s_info),
++ STRUCT_FLD(name, "XTRADB_ADMIN_COMMAND"),
++ STRUCT_FLD(author, plugin_author),
++ STRUCT_FLD(descr, "XtraDB specific command acceptor"),
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++ STRUCT_FLD(init, i_s_innodb_admin_command_init),
++ STRUCT_FLD(deinit, i_s_common_deinit),
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++ STRUCT_FLD(status_vars, NULL),
++ STRUCT_FLD(system_vars, NULL),
++ STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h 2010-12-03 17:30:16.301987692 +0900
++++ b/storage/innobase/handler/i_s.h 2010-12-03 17:32:35.426954555 +0900
+@@ -44,5 +44,6 @@
+ extern struct st_mysql_plugin i_s_innodb_sys_stats;
+ extern struct st_mysql_plugin i_s_innodb_table_stats;
+ extern struct st_mysql_plugin i_s_innodb_index_stats;
++extern struct st_mysql_plugin i_s_innodb_admin_command;
+
+ #endif /* i_s_h */
--- /dev/null
+# name : innodb_buffer_pool_pages_i_s.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-04 20:20:44.595483291 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-06 19:28:04.055227506 +0900
+@@ -4514,6 +4514,36 @@
+ mutex_exit(block_mutex);
+ }
+
++/********************************************************************//**
++*/
++UNIV_INTERN
++buf_block_t*
++buf_page_from_array(
++/*================*/
++ buf_pool_t* buf_pool,
++ ulint n_block)
++{
++ ulint n_chunks, offset;
++ buf_chunk_t* chunk;
++
++ ut_a(n_block < buf_pool->curr_size);
++
++ chunk = buf_pool->chunks;
++ offset = n_block;
++
++ for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
++ if (offset < chunk->size) {
++ return(&chunk->blocks[offset]);
++ }
++
++ offset -= chunk->size;
++ }
++
++ ut_error;
++
++ return(NULL);
++}
++
+ /*********************************************************************//**
+ Asserts that all file pages in the buffer are in a replaceable state.
+ @return TRUE */
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 20:20:44.614551139 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-06 19:23:47.622195800 +0900
+@@ -12042,6 +12042,9 @@
+ i_s_innodb_sys_stats,
+ i_s_innodb_table_stats,
+ i_s_innodb_index_stats,
++i_s_innodb_buffer_pool_pages,
++i_s_innodb_buffer_pool_pages_index,
++i_s_innodb_buffer_pool_pages_blob,
+ i_s_innodb_admin_command
+ mysql_declare_plugin_end;
+
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-04 19:46:39.786513783 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-06 19:28:52.270226921 +0900
+@@ -51,6 +51,7 @@
+ #include "trx0sys.h" /* for trx_sys */
+ #include "dict0dict.h" /* for dict_sys */
+ #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
++#include "btr0btr.h" /* for btr_page_get_index_id */
+ }
+
+ static const char plugin_author[] = "Innobase Oy";
+@@ -4329,3 +4330,701 @@
+ STRUCT_FLD(system_vars, NULL),
+ STRUCT_FLD(__reserved1, NULL)
+ };
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "page_type"),
++ STRUCT_FLD(field_length, 64),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "lru_position"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fix_count"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "flush_type"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_index_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "index_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "n_recs"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "data_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "hashed"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "access_time"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "modified"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "dirty"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "old"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "lru_position"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fix_count"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "flush_type"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO i_s_innodb_buffer_pool_pages_blob_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "compressed"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "part_len"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "next_page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "lru_position"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fix_count"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "flush_type"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_fill(
++/*================*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++ ulint i;
++
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ ulint n_block;
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++
++ buf_pool_mutex_enter(buf_pool);
++
++ for (n_block = 0; n_block < buf_pool->curr_size; n_block++) {
++ buf_block_t* block = buf_page_from_array(buf_pool, n_block);
++ const buf_frame_t* frame = block->frame;
++
++ char page_type[64];
++
++ switch(fil_page_get_type(frame))
++ {
++ case FIL_PAGE_INDEX:
++ strcpy(page_type, "index");
++ break;
++ case FIL_PAGE_UNDO_LOG:
++ strcpy(page_type, "undo_log");
++ break;
++ case FIL_PAGE_INODE:
++ strcpy(page_type, "inode");
++ break;
++ case FIL_PAGE_IBUF_FREE_LIST:
++ strcpy(page_type, "ibuf_free_list");
++ break;
++ case FIL_PAGE_TYPE_ALLOCATED:
++ strcpy(page_type, "allocated");
++ break;
++ case FIL_PAGE_IBUF_BITMAP:
++ strcpy(page_type, "bitmap");
++ break;
++ case FIL_PAGE_TYPE_SYS:
++ strcpy(page_type, "sys");
++ break;
++ case FIL_PAGE_TYPE_TRX_SYS:
++ strcpy(page_type, "trx_sys");
++ break;
++ case FIL_PAGE_TYPE_FSP_HDR:
++ strcpy(page_type, "fsp_hdr");
++ break;
++ case FIL_PAGE_TYPE_XDES:
++ strcpy(page_type, "xdes");
++ break;
++ case FIL_PAGE_TYPE_BLOB:
++ strcpy(page_type, "blob");
++ break;
++ case FIL_PAGE_TYPE_ZBLOB:
++ strcpy(page_type, "zblob");
++ break;
++ case FIL_PAGE_TYPE_ZBLOB2:
++ strcpy(page_type, "zblob2");
++ break;
++ default:
++ sprintf(page_type, "unknown (type=%li)", fil_page_get_type(frame));
++ }
++
++ field_store_string(table->field[0], page_type);
++ table->field[1]->store(block->page.space);
++ table->field[2]->store(block->page.offset);
++ table->field[3]->store(0);
++ table->field[4]->store(block->page.buf_fix_count);
++ table->field[5]->store(block->page.flush_type);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ }
++
++ buf_pool_mutex_exit(buf_pool);
++ }
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_fill(
++/*================*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++ ulint i;
++ index_id_t index_id;
++
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ ulint n_block;
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++
++ buf_pool_mutex_enter(buf_pool);
++
++ for (n_block = 0; n_block < buf_pool->curr_size; n_block++) {
++ buf_block_t* block = buf_page_from_array(buf_pool, n_block);
++ const buf_frame_t* frame = block->frame;
++
++ if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
++ index_id = btr_page_get_index_id(frame);
++ table->field[0]->store(index_id);
++ table->field[1]->store(block->page.space);
++ table->field[2]->store(block->page.offset);
++ table->field[3]->store(page_get_n_recs(frame));
++ table->field[4]->store(page_get_data_size(frame));
++ table->field[5]->store(block->is_hashed);
++ table->field[6]->store(block->page.access_time);
++ table->field[7]->store(block->page.newest_modification != 0);
++ table->field[8]->store(block->page.oldest_modification != 0);
++ table->field[9]->store(block->page.old);
++ table->field[10]->store(0);
++ table->field[11]->store(block->page.buf_fix_count);
++ table->field[12]->store(block->page.flush_type);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++ }
++ }
++
++ buf_pool_mutex_exit(buf_pool);
++ }
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Fill the dynamic table information_schema.innodb_buffer_pool_pages_index. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_fill(
++/*================*/
++ /* out: 0 on success, 1 on failure */
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++ ulint i;
++
++ ulint part_len;
++ ulint next_page_no;
++
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ ulint n_block;
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++
++ buf_pool_mutex_enter(buf_pool);
++
++ for (n_block = 0; n_block < buf_pool->curr_size; n_block++) {
++ buf_block_t* block = buf_page_from_array(buf_pool, n_block);
++ page_zip_des_t* block_page_zip = buf_block_get_page_zip(block);
++ const buf_frame_t* frame = block->frame;
++
++ if (fil_page_get_type(frame) == FIL_PAGE_TYPE_BLOB) {
++
++ if (UNIV_LIKELY_NULL(block_page_zip)) {
++ part_len = 0; /* hmm, can't figure it out */
++
++ next_page_no = mach_read_from_4(
++ buf_block_get_frame(block)
++ + FIL_PAGE_NEXT);
++ } else {
++ part_len = mach_read_from_4(
++ buf_block_get_frame(block)
++ + FIL_PAGE_DATA
++ + 0 /*BTR_BLOB_HDR_PART_LEN*/);
++
++ next_page_no = mach_read_from_4(
++ buf_block_get_frame(block)
++ + FIL_PAGE_DATA
++ + 4 /*BTR_BLOB_HDR_NEXT_PAGE_NO*/);
++ }
++
++ table->field[0]->store(block->page.space);
++ table->field[1]->store(block->page.offset);
++ table->field[2]->store(block_page_zip != NULL);
++ table->field[3]->store(part_len);
++
++ if(next_page_no == FIL_NULL)
++ {
++ table->field[4]->store(0);
++ } else {
++ table->field[4]->store(block->page.offset);
++ }
++
++ table->field[5]->store(0);
++ table->field[6]->store(block->page.buf_fix_count);
++ table->field[7]->store(block->page.flush_type);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ }
++ }
++
++ buf_pool_mutex_exit(buf_pool);
++ }
++
++ DBUG_RETURN(status);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_buffer_pool_pages_fields_info;
++ schema->fill_table = i_s_innodb_buffer_pool_pages_fill;
++
++ DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_index_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_index_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_buffer_pool_pages_index_fields_info;
++ schema->fill_table = i_s_innodb_buffer_pool_pages_index_fill;
++
++ DBUG_RETURN(0);
++}
++
++/***********************************************************************
++Bind the dynamic table information_schema.innodb_buffer_pool_pages. */
++static
++int
++i_s_innodb_buffer_pool_pages_blob_init(
++/*=========*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_buffer_pool_pages_blob_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_buffer_pool_pages_blob_fields_info;
++ schema->fill_table = i_s_innodb_buffer_pool_pages_blob_fill;
++
++ DBUG_RETURN(0);
++}
++
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB buffer pool pages"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages_index =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_INDEX"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB buffer pool index pages"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_index_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_pool_pages_blob =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_BUFFER_POOL_PAGES_BLOB"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB buffer pool blob pages"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_buffer_pool_pages_blob_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h 2010-12-04 19:46:39.657513849 +0900
++++ b/storage/innobase/handler/i_s.h 2010-12-06 19:23:47.635192988 +0900
+@@ -45,5 +45,8 @@
+ extern struct st_mysql_plugin i_s_innodb_table_stats;
+ extern struct st_mysql_plugin i_s_innodb_index_stats;
+ extern struct st_mysql_plugin i_s_innodb_admin_command;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages_index;
++extern struct st_mysql_plugin i_s_innodb_buffer_pool_pages_blob;
+
+ #endif /* i_s_h */
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-04 19:46:40.197471531 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-06 19:23:47.638195824 +0900
+@@ -1072,6 +1072,14 @@
+ /*===========*/
+ const buf_pool_t* buf_pool) /*!< in: buffer pool */
+ __attribute__((nonnull, const));
++/********************************************************************//**
++*/
++UNIV_INTERN
++buf_block_t*
++buf_page_from_array(
++/*================*/
++ buf_pool_t* buf_pool,
++ ulint n_block);
+ /******************************************************************//**
+ Returns the buffer pool instance given a page instance
+ @return buf_pool */
--- /dev/null
+# name : innodb_buffer_pool_shm.patch
+# introduced : 12
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-12-04 19:46:39.372513543 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-07 17:56:28.302087851 +0900
+@@ -183,7 +183,7 @@
+ void* buf, /*!< in: buffer frame to deallocate */
+ ibool have_page_hash_mutex)
+ {
+- const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
++ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf_pool, buf);
+ buf_page_t* bpage;
+ buf_block_t* block;
+
+@@ -227,7 +227,7 @@
+ buf_block_t* block) /*!< in: buffer frame to allocate */
+ {
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+- const ulint fold = BUF_POOL_ZIP_FOLD(block);
++ const ulint fold = BUF_POOL_ZIP_FOLD(buf_pool, block);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-06 20:16:21.726195340 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-07 20:40:30.824749814 +0900
+@@ -53,6 +53,10 @@
+ #include "page0zip.h"
+ #include "trx0trx.h"
+ #include "srv0start.h"
++#include "que0que.h"
++#include "read0read.h"
++#include "row0row.h"
++#include "ha_prototypes.h"
+
+ /* prototypes for new functions added to ha_innodb.cc */
+ trx_t* innobase_get_trx();
+@@ -342,6 +346,31 @@
+ was allocated for the frames */
+ buf_block_t* blocks; /*!< array of buffer control blocks */
+ };
++
++/* Buffer pool shared memory segment information */
++typedef struct buf_shm_info_struct buf_shm_info_t;
++
++struct buf_shm_info_struct {
++ char head_str[8];
++ ulint binary_id;
++ ibool is_new; /* during initializing */
++ ibool clean; /* clean shutdowned and free */
++ ibool reusable; /* reusable */
++ ulint buf_pool_size; /* backup value */
++ ulint page_size; /* backup value */
++ ulint frame_offset; /* offset of the first frame based on chunk->mem */
++ ulint zip_hash_offset;
++ ulint zip_hash_n;
++
++ ulint checksum;
++
++ buf_pool_t buf_pool_backup;
++ buf_chunk_t chunk_backup;
++
++ ib_uint64_t dummy;
++};
++
++#define BUF_SHM_INFO_HEAD "XTRA_SHM"
+ #endif /* !UNIV_HOTBACKUP */
+
+ /********************************************************************//**
+@@ -988,6 +1017,58 @@
+ #endif /* UNIV_SYNC_DEBUG */
+ }
+
++static
++void
++buf_block_reuse(
++/*============*/
++ buf_block_t* block,
++ ptrdiff_t frame_offset)
++{
++ /* block_init */
++ block->frame += frame_offset;
++
++ UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
++
++ block->index = NULL;
++ block->btr_search_latch = NULL;
++
++#ifdef UNIV_DEBUG
++ /* recreate later */
++ block->page.in_page_hash = FALSE;
++ block->page.in_zip_hash = FALSE;
++#endif /* UNIV_DEBUG */
++
++#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
++ block->n_pointers = 0;
++#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
++
++ if (block->page.zip.data)
++ block->page.zip.data += frame_offset;
++
++ block->is_hashed = FALSE;
++
++#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
++ /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
++ of buffer block mutex/rwlock with performance schema. If
++ PFS_GROUP_BUFFER_SYNC is defined, skip the registration
++ since buffer block mutex/rwlock will be registered later in
++ pfs_register_buffer_block() */
++
++ mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
++ rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
++#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
++ mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
++ rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
++#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
++
++ ut_ad(rw_lock_validate(&(block->lock)));
++
++#ifdef UNIV_SYNC_DEBUG
++ rw_lock_create(buf_block_debug_latch_key,
++ &block->debug_latch, SYNC_NO_ORDER_CHECK);
++#endif /* UNIV_SYNC_DEBUG */
++}
++
+ /********************************************************************//**
+ Allocates a chunk of buffer frames.
+ @return chunk, or NULL on failure */
+@@ -1001,26 +1082,188 @@
+ {
+ buf_block_t* block;
+ byte* frame;
++ ulint zip_hash_n = 0;
++ ulint zip_hash_mem_size = 0;
++ hash_table_t* zip_hash_tmp = NULL;
+ ulint i;
++ buf_shm_info_t* shm_info = NULL;
+
+ /* Round down to a multiple of page size,
+ although it already should be. */
+ mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
++
++ srv_buffer_pool_shm_is_reused = FALSE;
++
++ if (srv_buffer_pool_shm_key) {
++ /* zip_hash size */
++ zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
++ zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
++ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
++ }
++
+ /* Reserve space for the block descriptors. */
+ mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
+ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
++ if (srv_buffer_pool_shm_key) {
++ mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
++ + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
++ mem_size += zip_hash_mem_size;
++ }
+
+ chunk->mem_size = mem_size;
++
++ if (srv_buffer_pool_shm_key) {
++ ulint binary_id;
++ ibool is_new;
++
++ ut_a(buf_pool->n_chunks == 1);
++
++ fprintf(stderr,
++ "InnoDB: Notice: The innodb_buffer_pool_shm_key option has been specified.\n"
++ "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
++ "InnoDB: * the mysqld executable between restarts of the server.\n"
++ "InnoDB: * the value of innodb_buffer_pool_size.\n"
++ "InnoDB: * the value of innodb_page_size.\n"
++ "InnoDB: * datafiles created by InnoDB during this session.\n"
++ "InnoDB: Otherwise, data corruption in datafiles may result.\n");
++
++ /* FIXME: This is vague id still */
++ binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
++ + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
++ + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
++ + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
++ + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
++ + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
++ + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
++ + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
++ + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
++ + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
++ + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
++
++ chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
++
++ if (UNIV_UNLIKELY(chunk->mem == NULL)) {
++ return(NULL);
++ }
++init_again:
++#ifdef UNIV_SET_MEM_TO_ZERO
++ if (is_new) {
++ memset(chunk->mem, '\0', chunk->mem_size);
++ }
++#endif
++ /* for ut_fold_binary_32(), these values should be 32-bit aligned */
++ ut_a(sizeof(buf_shm_info_t) % 4 == 0);
++ ut_a((ulint)chunk->mem % 4 == 0);
++ ut_a(chunk->mem_size % 4 == 0);
++
++ shm_info = chunk->mem;
++
++ zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
++
++ if (is_new) {
++ strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
++ shm_info->binary_id = binary_id;
++ shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */
++ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
++ shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */
++ shm_info->buf_pool_size = srv_buf_pool_size;
++ shm_info->page_size = srv_page_size;
++ shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
++ shm_info->zip_hash_n = zip_hash_n;
++ } else {
++ ulint checksum;
++
++ if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
++ fprintf(stderr,
++ "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
++ return(NULL);
++ }
++ if (shm_info->binary_id != binary_id) {
++ fprintf(stderr,
++ "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
++ return(NULL);
++ }
++ if (shm_info->is_new) {
++ fprintf(stderr,
++ "InnoDB: Error: The shared memory was not initialized yet.\n");
++ return(NULL);
++ }
++ if (shm_info->buf_pool_size != srv_buf_pool_size) {
++ fprintf(stderr,
++ "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
++ shm_info->buf_pool_size, srv_buf_pool_size);
++ return(NULL);
++ }
++ if (shm_info->page_size != srv_page_size) {
++ fprintf(stderr,
++ "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
++ shm_info->page_size, srv_page_size);
++ return(NULL);
++ }
++ if (!shm_info->reusable) {
++ fprintf(stderr,
++ "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
++ "InnoDB: The shared memory segment is initialized.\n");
++ is_new = TRUE;
++ goto init_again;
++ }
++ if (!shm_info->clean) {
++ fprintf(stderr,
++ "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
++ "InnoDB: The shared memory segment is initialized.\n");
++ is_new = TRUE;
++ goto init_again;
++ }
++
++ ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
++ ut_a(shm_info->zip_hash_n == zip_hash_n);
++
++ /* check checksum */
++ if (srv_buffer_pool_shm_checksum) {
++ checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
++ chunk->mem_size - sizeof(buf_shm_info_t));
++ } else {
++ checksum = BUF_NO_CHECKSUM_MAGIC;
++ }
++
++ if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
++ && shm_info->checksum != checksum) {
++ fprintf(stderr,
++ "InnoDB: Error: checksum of the shared memory is not match. "
++ "(stored=%lu calculated=%lu)\n",
++ shm_info->checksum, checksum);
++ return(NULL);
++ }
++
++ /* flag to use the segment. */
++ shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
++ }
++
++ /* init zip_hash contents */
++ if (is_new) {
++ hash_create_init(zip_hash_tmp, zip_hash_n);
++ } else {
++ /* adjust offset is done later */
++ hash_create_reuse(zip_hash_tmp);
++
++ srv_buffer_pool_shm_is_reused = TRUE;
++ }
++ } else {
+ chunk->mem = os_mem_alloc_large(&chunk->mem_size);
+
+ if (UNIV_UNLIKELY(chunk->mem == NULL)) {
+
+ return(NULL);
+ }
++ }
+
+ /* Allocate the block descriptors from
+ the start of the memory block. */
++ if (srv_buffer_pool_shm_key) {
++ chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
++ } else {
+ chunk->blocks = chunk->mem;
++ }
+
+ /* Align a pointer to the first frame. Note that when
+ os_large_page_size is smaller than UNIV_PAGE_SIZE,
+@@ -1028,8 +1271,13 @@
+ it is bigger, we may allocate more blocks than requested. */
+
+ frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
++ if (srv_buffer_pool_shm_key) {
++ /* reserve zip_hash space and always -1 for reproductibity */
++ chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
++ } else {
+ chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
+ - (frame != chunk->mem);
++ }
+
+ /* Subtract the space needed for block descriptors. */
+ {
+@@ -1043,6 +1291,98 @@
+ chunk->size = size;
+ }
+
++ if (shm_info && !(shm_info->is_new)) {
++ /* convert the shared memory segment for reuse */
++ ptrdiff_t phys_offset;
++ ptrdiff_t logi_offset;
++ ptrdiff_t blocks_offset;
++ void* previous_frame_address;
++
++ if (chunk->size < shm_info->chunk_backup.size) {
++ fprintf(stderr,
++ "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
++ "InnoDB: Retrying may avoid this situation.\n");
++ shm_info->clean = TRUE; /* release the flag for retrying */
++ return(NULL);
++ }
++
++ chunk->size = shm_info->chunk_backup.size;
++ phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
++ logi_offset = frame - chunk->blocks[0].frame;
++ previous_frame_address = chunk->blocks[0].frame;
++ blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
++
++ if (phys_offset || logi_offset || blocks_offset) {
++ fprintf(stderr,
++ "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
++ "InnoDB: Previous frames in address : %p\n"
++ "InnoDB: Previous frames were located : %p\n"
++ "InnoDB: Current frames should be located: %p\n"
++ "InnoDB: Pysical offset : %ld (%#lx)\n"
++ "InnoDB: Logical offset (frames) : %ld (%#lx)\n"
++ "InnoDB: Logical offset (blocks) : %ld (%#lx)\n",
++ (byte*)chunk->mem + shm_info->frame_offset,
++ chunk->blocks[0].frame, frame,
++ phys_offset, phys_offset, logi_offset, logi_offset,
++ blocks_offset, blocks_offset);
++ } else {
++ fprintf(stderr,
++ "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
++ }
++
++ if (phys_offset) {
++ fprintf(stderr,
++ "InnoDB: Aligning physical offset...");
++
++ memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
++ chunk->size * UNIV_PAGE_SIZE);
++
++ fprintf(stderr,
++ " Done.\n");
++ }
++
++ /* buf_block_t */
++ block = chunk->blocks;
++ for (i = chunk->size; i--; ) {
++ buf_block_reuse(block, logi_offset);
++ block++;
++ }
++
++ if (logi_offset || blocks_offset) {
++ fprintf(stderr,
++ "InnoDB: Aligning logical offset...");
++
++
++ /* buf_pool_t buf_pool_backup */
++ UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
++ previous_frame_address, logi_offset, blocks_offset);
++ UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
++ previous_frame_address, logi_offset, blocks_offset);
++ UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
++ previous_frame_address, logi_offset, blocks_offset);
++ if (shm_info->buf_pool_backup.LRU_old)
++ shm_info->buf_pool_backup.LRU_old =
++ (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
++ + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
++ ? logi_offset : blocks_offset));
++
++ UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
++ previous_frame_address, logi_offset, blocks_offset);
++
++ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
++ previous_frame_address, logi_offset, blocks_offset);
++ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
++ UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
++ previous_frame_address, logi_offset, blocks_offset);
++ }
++
++ HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
++ previous_frame_address, logi_offset, blocks_offset);
++
++ fprintf(stderr,
++ " Done.\n");
++ }
++ } else {
+ /* Init block structs and assign frames for them. Then we
+ assign the frames to the first blocks (we already mapped the
+ memory above). */
+@@ -1068,6 +1408,11 @@
+ block++;
+ frame += UNIV_PAGE_SIZE;
+ }
++ }
++
++ if (shm_info) {
++ shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
++ }
+
+ #ifdef PFS_GROUP_BUFFER_SYNC
+ pfs_register_buffer_block(chunk);
+@@ -1249,6 +1594,8 @@
+ UNIV_MEM_UNDESC(block);
+ }
+
++ ut_a(!srv_buffer_pool_shm_key);
++
+ os_mem_free_large(chunk->mem, chunk->mem_size);
+ }
+
+@@ -1289,7 +1636,7 @@
+ ulint instance_no) /*!< in: id of the instance */
+ {
+ ulint i;
+- buf_chunk_t* chunk;
++ buf_chunk_t* chunk = NULL;
+
+ /* 1. Initialize general fields
+ ------------------------------- */
+@@ -1335,7 +1682,10 @@
+ buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
+
+ buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
++ /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
++ if (!srv_buffer_pool_shm_key) {
+ buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
++ }
+
+ buf_pool->last_printout_time = ut_time();
+ }
+@@ -1354,6 +1704,86 @@
+
+ /* All fields are initialized by mem_zalloc(). */
+
++ if (chunk && srv_buffer_pool_shm_key) {
++ buf_shm_info_t* shm_info;
++
++ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
++ shm_info = chunk->mem;
++
++ buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
++
++ if(shm_info->is_new) {
++ shm_info->is_new = FALSE; /* initialization was finished */
++ } else {
++ buf_block_t* block = chunk->blocks;
++ buf_page_t* b;
++
++ /* shm_info->buf_pool_backup should be converted */
++ /* at buf_chunk_init(). So copy simply. */
++ buf_pool->flush_list = shm_info->buf_pool_backup.flush_list;
++ buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock;
++ buf_pool->free = shm_info->buf_pool_backup.free;
++ buf_pool->LRU = shm_info->buf_pool_backup.LRU;
++ buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old;
++ buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len;
++ buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU;
++ buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean;
++ for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
++ buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i];
++ }
++
++ for (i = 0; i < chunk->size; i++, block++) {
++ if (buf_block_get_state(block)
++ == BUF_BLOCK_FILE_PAGE) {
++ ut_d(block->page.in_page_hash = TRUE);
++ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
++ buf_page_address_fold(
++ block->page.space,
++ block->page.offset),
++ &block->page);
++ }
++ }
++
++ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
++ ut_ad(!b->in_flush_list);
++ ut_ad(b->in_LRU_list);
++
++ ut_d(b->in_page_hash = TRUE);
++ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
++ buf_page_address_fold(b->space, b->offset), b);
++ }
++
++ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
++ ut_ad(b->in_flush_list);
++ ut_ad(b->in_LRU_list);
++
++ switch (buf_page_get_state(b)) {
++ case BUF_BLOCK_ZIP_DIRTY:
++ ut_d(b->in_page_hash = TRUE);
++ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
++ buf_page_address_fold(b->space,
++ b->offset), b);
++ break;
++ case BUF_BLOCK_FILE_PAGE:
++ /* uncompressed page */
++ break;
++ case BUF_BLOCK_ZIP_FREE:
++ case BUF_BLOCK_ZIP_PAGE:
++ case BUF_BLOCK_NOT_USED:
++ case BUF_BLOCK_READY_FOR_USE:
++ case BUF_BLOCK_MEMORY:
++ case BUF_BLOCK_REMOVE_HASH:
++ ut_error;
++ break;
++ }
++ }
++
++
++ }
++ }
++
+ mutex_exit(&buf_pool->LRU_list_mutex);
+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+@@ -1373,6 +1803,42 @@
+ buf_chunk_t* chunk;
+ buf_chunk_t* chunks;
+
++ if (srv_buffer_pool_shm_key) {
++ buf_shm_info_t* shm_info;
++
++ ut_a(buf_pool->n_chunks == 1);
++
++ chunk = buf_pool->chunks;
++ shm_info = chunk->mem;
++ ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
++
++ /* if opened, close shm. */
++ if (!shm_info->clean) {
++ /* validation the shared memory segment doesn't have unrecoverable contents. */
++ /* Currently, validation became not needed */
++ shm_info->reusable = TRUE;
++
++ memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
++ memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
++
++ if (srv_fast_shutdown < 2) {
++ if (srv_buffer_pool_shm_checksum) {
++ shm_info->checksum =
++ ut_fold_binary_32(
++ (byte*)chunk->mem + sizeof(buf_shm_info_t),
++ chunk->mem_size - sizeof(buf_shm_info_t));
++ } else {
++ shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
++ }
++ shm_info->clean = TRUE;
++ }
++
++ fprintf(stderr,
++ "InnoDB: The shared memory was closed.\n");
++ }
++
++ os_shm_free(chunk->mem, chunk->mem_size);
++ } else {
+ chunks = buf_pool->chunks;
+ chunk = chunks + buf_pool->n_chunks;
+
+@@ -1381,10 +1847,13 @@
+ would fail at shutdown. */
+ os_mem_free_large(chunk->mem, chunk->mem_size);
+ }
++ }
+
+ mem_free(buf_pool->chunks);
+ hash_table_free(buf_pool->page_hash);
++ if (!srv_buffer_pool_shm_key) {
+ hash_table_free(buf_pool->zip_hash);
++ }
+ }
+
+ /********************************************************************//**
+@@ -1668,6 +2137,11 @@
+ //buf_pool_mutex_enter(buf_pool);
+ mutex_enter(&buf_pool->LRU_list_mutex);
+
++ if (srv_buffer_pool_shm_key) {
++ /* Cannot support shrink */
++ goto func_done;
++ }
++
+ shrink_again:
+ if (buf_pool->n_chunks <= 1) {
+
+@@ -1848,7 +2322,7 @@
+ zip_hash = hash_create(2 * buf_pool->curr_size);
+
+ HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
+- BUF_POOL_ZIP_FOLD_BPAGE);
++ buf_pool, BUF_POOL_ZIP_FOLD_BPAGE);
+
+ hash_table_free(buf_pool->zip_hash);
+ buf_pool->zip_hash = zip_hash;
+@@ -2130,6 +2604,11 @@
+ ulint change_size;
+ ulint min_change_size = 1048576 * srv_buf_pool_instances;
+
++ if (srv_buffer_pool_shm_key) {
++ /* Cannot support resize */
++ return;
++ }
++
+ buf_pool_mutex_enter_all();
+
+ if (srv_buf_pool_old_size == srv_buf_pool_size) {
+diff -ruN a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c
+--- a/storage/innobase/ha/hash0hash.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/ha/hash0hash.c 2010-12-07 16:10:14.937749140 +0900
+@@ -133,6 +133,70 @@
+ }
+
+ /*************************************************************//**
++*/
++UNIV_INTERN
++ulint
++hash_create_needed(
++/*===============*/
++ ulint n)
++{
++ ulint prime;
++ ulint offset;
++
++ prime = ut_find_prime(n);
++
++ offset = (sizeof(hash_table_t) + 7) / 8;
++ offset *= 8;
++
++ return(offset + sizeof(hash_cell_t) * prime);
++}
++
++UNIV_INTERN
++void
++hash_create_init(
++/*=============*/
++ hash_table_t* table,
++ ulint n)
++{
++ ulint prime;
++ ulint offset;
++
++ prime = ut_find_prime(n);
++
++ offset = (sizeof(hash_table_t) + 7) / 8;
++ offset *= 8;
++
++ table->array = (hash_cell_t*)(((byte*)table) + offset);
++ table->n_cells = prime;
++# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
++ table->adaptive = FALSE;
++# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
++ table->n_mutexes = 0;
++ table->mutexes = NULL;
++ table->heaps = NULL;
++ table->heap = NULL;
++ ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
++
++ /* Initialize the cell array */
++ hash_table_clear(table);
++}
++
++UNIV_INTERN
++void
++hash_create_reuse(
++/*==============*/
++ hash_table_t* table)
++{
++ ulint offset;
++
++ offset = (sizeof(hash_table_t) + 7) / 8;
++ offset *= 8;
++
++ table->array = (hash_cell_t*)(((byte*)table) + offset);
++ ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
++}
++
++/*************************************************************//**
+ Frees a hash table. */
+ UNIV_INTERN
+ void
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-06 20:16:21.733263627 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-07 17:56:28.316139830 +0900
+@@ -194,6 +194,7 @@
+ static my_bool innobase_create_status_file = FALSE;
+ static my_bool innobase_stats_on_metadata = TRUE;
+ static my_bool innobase_use_sys_stats_table = FALSE;
++static my_bool innobase_buffer_pool_shm_checksum = TRUE;
+
+
+ static char* internal_innobase_data_file_path = NULL;
+@@ -2620,6 +2621,14 @@
+ srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
+ srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
+
++ if (srv_buffer_pool_shm_key && srv_buf_pool_instances > 1) {
++ fprintf(stderr,
++ "InnoDB: Warning: innodb_buffer_pool_shm_key cannot be used with several innodb_buffer_pool_instances.\n"
++ "InnoDB: innodb_buffer_pool_instances was set to 1.\n");
++ srv_buf_pool_instances = 1;
++ innobase_buffer_pool_instances = 1;
++ }
++
+ srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
+
+ srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+@@ -2636,6 +2645,7 @@
+ srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+ srv_use_checksums = (ibool) innobase_use_checksums;
+ srv_fast_checksum = (ibool) innobase_fast_checksum;
++ srv_buffer_pool_shm_checksum = (ibool) innobase_buffer_pool_shm_checksum;
+
+ #ifdef HAVE_LARGE_PAGES
+ if ((os_use_large_pages = (ibool) my_use_large_pages))
+@@ -11642,6 +11652,16 @@
+ "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
+ NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
+
++static MYSQL_SYSVAR_UINT(buffer_pool_shm_key, srv_buffer_pool_shm_key,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "[experimental] The key value of shared memory segment for the buffer pool. 0 (default) disables the feature.",
++ NULL, NULL, 0, 0, INT_MAX32, 0);
++
++static MYSQL_SYSVAR_BOOL(buffer_pool_shm_checksum, innobase_buffer_pool_shm_checksum,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "Enable buffer_pool_shm checksum validation (enabled by default).",
++ NULL, NULL, TRUE);
++
+ static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
+ PLUGIN_VAR_RQCMDARG,
+ "Helps in performance tuning in heavily concurrent environments.",
+@@ -11921,6 +11941,8 @@
+ MYSQL_SYSVAR(autoextend_increment),
+ MYSQL_SYSVAR(buffer_pool_size),
+ MYSQL_SYSVAR(buffer_pool_instances),
++ MYSQL_SYSVAR(buffer_pool_shm_key),
++ MYSQL_SYSVAR(buffer_pool_shm_checksum),
+ MYSQL_SYSVAR(checksums),
+ MYSQL_SYSVAR(fast_checksum),
+ MYSQL_SYSVAR(commit_concurrency),
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-06 20:16:21.778264552 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-07 17:56:28.322749380 +0900
+@@ -36,6 +36,7 @@
+ #ifndef UNIV_HOTBACKUP
+ #include "ut0rbt.h"
+ #include "os0proc.h"
++#include "srv0srv.h"
+
+ /** @name Modes for buf_page_get_gen */
+ /* @{ */
+@@ -1520,9 +1521,12 @@
+ /**********************************************************************//**
+ Compute the hash fold value for blocks in buf_pool->zip_hash. */
+ /* @{ */
+-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+-#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
++/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
++#define BUF_POOL_ZIP_FOLD_PTR(bpool, ptr) (!srv_buffer_pool_shm_key\
++ ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
++ :((ulint) ((byte*)ptr - (byte*)(buf_page_from_array(bpool, 0)->frame)) / UNIV_PAGE_SIZE))
++#define BUF_POOL_ZIP_FOLD(bpool, b) BUF_POOL_ZIP_FOLD_PTR(bpool, (b)->frame)
++#define BUF_POOL_ZIP_FOLD_BPAGE(bpool, b) BUF_POOL_ZIP_FOLD(bpool, (buf_block_t*) (b))
+ /* @} */
+
+ /** @brief The buffer pool statistics structure. */
+diff -ruN a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
+--- a/storage/innobase/include/hash0hash.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/hash0hash.h 2010-12-07 17:56:28.324726446 +0900
+@@ -49,6 +49,28 @@
+ hash_create(
+ /*========*/
+ ulint n); /*!< in: number of array cells */
++
++/*************************************************************//**
++*/
++UNIV_INTERN
++ulint
++hash_create_needed(
++/*===============*/
++ ulint n);
++
++UNIV_INTERN
++void
++hash_create_init(
++/*=============*/
++ hash_table_t* table,
++ ulint n);
++
++UNIV_INTERN
++void
++hash_create_reuse(
++/*==============*/
++ hash_table_t* table);
++
+ #ifndef UNIV_HOTBACKUP
+ /*************************************************************//**
+ Creates a mutex array to protect a hash table. */
+@@ -306,7 +328,7 @@
+ /****************************************************************//**
+ Move all hash table entries from OLD_TABLE to NEW_TABLE. */
+
+-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
++#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, BPOOL, FOLD_FUNC) \
+ do {\
+ ulint i2222;\
+ ulint cell_count2222;\
+@@ -318,7 +340,7 @@
+ \
+ while (node2222) {\
+ NODE_TYPE* next2222 = node2222->PTR_NAME;\
+- ulint fold2222 = FOLD_FUNC(node2222);\
++ ulint fold2222 = FOLD_FUNC(BPOOL, node2222);\
+ \
+ HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
+ fold2222, node2222);\
+@@ -327,6 +349,33 @@
+ }\
+ }\
+ } while (0)
++
++/********************************************************************//**
++Align nodes with moving location.*/
++#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
++do {\
++ ulint i2222;\
++ ulint cell_count2222;\
++\
++ cell_count2222 = hash_get_n_cells(TABLE);\
++\
++ for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
++ NODE_TYPE* node2222;\
++\
++ if ((TABLE)->array[i2222].node) \
++ (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
++ + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
++ node2222 = HASH_GET_FIRST((TABLE), i2222);\
++\
++ while (node2222) {\
++ if (node2222->PTR_NAME) \
++ node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
++ + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
++\
++ node2222 = node2222->PTR_NAME;\
++ }\
++ }\
++} while (0)
+
+ /************************************************************//**
+ Gets the mutex index for a fold value in a hash table.
+diff -ruN a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h
+--- a/storage/innobase/include/os0proc.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0proc.h 2010-12-07 16:10:14.955718750 +0900
+@@ -32,6 +32,11 @@
+ #ifdef UNIV_LINUX
+ #include <sys/ipc.h>
+ #include <sys/shm.h>
++#else
++# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
++#include <sys/ipc.h>
++#include <sys/shm.h>
++# endif
+ #endif
+
+ typedef void* os_process_t;
+@@ -70,6 +75,29 @@
+ ulint size); /*!< in: size returned by
+ os_mem_alloc_large() */
+
++
++/****************************************************************//**
++Allocates or attaches and reuses shared memory segment.
++The content is not cleared automatically.
++@return allocated memory */
++UNIV_INTERN
++void*
++os_shm_alloc(
++/*=========*/
++ ulint* n, /*!< in/out: number of bytes */
++ uint key,
++ ibool* is_new);
++
++/****************************************************************//**
++Detach shared memory segment. */
++UNIV_INTERN
++void
++os_shm_free(
++/*========*/
++ void *ptr, /*!< in: pointer returned by
++ os_shm_alloc() */
++ ulint size); /*!< in: size returned by
++ os_shm_alloc() */
+ #ifndef UNIV_NONINL
+ #include "os0proc.ic"
+ #endif
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-04 20:20:28.016566697 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-07 16:10:14.956717659 +0900
+@@ -171,6 +171,10 @@
+ extern ulint srv_mem_pool_size;
+ extern ulint srv_lock_table_size;
+
++extern uint srv_buffer_pool_shm_key;
++extern ibool srv_buffer_pool_shm_is_reused;
++extern ibool srv_buffer_pool_shm_checksum;
++
+ extern ibool srv_thread_concurrency_timer_based;
+
+ extern ulint srv_n_file_io_threads;
+diff -ruN a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
+--- a/storage/innobase/include/ut0lst.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ut0lst.h 2010-12-07 16:10:14.957785525 +0900
+@@ -257,5 +257,48 @@
+ ut_a(ut_list_node_313 == NULL); \
+ } while (0)
+
++/********************************************************************//**
++Align nodes with moving location.
++@param NAME the name of the list
++@param TYPE node type
++@param BASE base node (not a pointer to it)
++@param OFFSET offset moved */
++#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \
++do { \
++ ulint ut_list_i_313; \
++ TYPE* ut_list_node_313; \
++ \
++ if ((BASE).start) \
++ (BASE).start = (void*)((byte*)((BASE).start) \
++ + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
++ if ((BASE).end) \
++ (BASE).end = (void*)((byte*)((BASE).end) \
++ + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
++ \
++ ut_list_node_313 = (BASE).start; \
++ \
++ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
++ ut_a(ut_list_node_313); \
++ if ((ut_list_node_313->NAME).prev) \
++ (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
++ + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
++ if ((ut_list_node_313->NAME).next) \
++ (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
++ + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
++ ut_list_node_313 = (ut_list_node_313->NAME).next; \
++ } \
++ \
++ ut_a(ut_list_node_313 == NULL); \
++ \
++ ut_list_node_313 = (BASE).end; \
++ \
++ for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
++ ut_a(ut_list_node_313); \
++ ut_list_node_313 = (ut_list_node_313->NAME).prev; \
++ } \
++ \
++ ut_a(ut_list_node_313 == NULL); \
++} while (0)
++
+ #endif
+
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c 2010-12-04 19:46:40.212513377 +0900
++++ b/storage/innobase/log/log0recv.c 2010-12-07 16:10:14.959785817 +0900
+@@ -2912,6 +2912,7 @@
+ /*==========================*/
+ {
+ ut_a(!recv_needed_recovery);
++ ut_a(!srv_buffer_pool_shm_is_reused);
+
+ recv_needed_recovery = TRUE;
+
+diff -ruN a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c
+--- a/storage/innobase/os/os0proc.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/os/os0proc.c 2010-12-07 16:10:14.960800123 +0900
+@@ -229,3 +229,173 @@
+ }
+ #endif
+ }
++
++/****************************************************************//**
++Allocates or attaches and reuses shared memory segment.
++The content is not cleared automatically.
++@return allocated memory */
++UNIV_INTERN
++void*
++os_shm_alloc(
++/*=========*/
++ ulint* n, /*!< in/out: number of bytes */
++ uint key,
++ ibool* is_new)
++{
++ void* ptr;
++#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
++ ulint size;
++ int shmid;
++
++ *is_new = FALSE;
++ fprintf(stderr,
++ "InnoDB: The shared memory segment containing the buffer pool is: key %#x (%d).\n",
++ key, key);
++# if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
++ if (!os_use_large_pages || !os_large_page_size) {
++ goto skip;
++ }
++
++ /* Align block size to os_large_page_size */
++ ut_ad(ut_is_2pow(os_large_page_size));
++ size = ut_2pow_round(*n + (os_large_page_size - 1),
++ os_large_page_size);
++
++ shmid = shmget((key_t)key, (size_t)size,
++ IPC_CREAT | IPC_EXCL | SHM_HUGETLB | SHM_R | SHM_W);
++ if (shmid < 0) {
++ if (errno == EEXIST) {
++ fprintf(stderr,
++ "InnoDB: HugeTLB: The shared memory segment exists.\n");
++ shmid = shmget((key_t)key, (size_t)size,
++ SHM_HUGETLB | SHM_R | SHM_W);
++ if (shmid < 0) {
++ fprintf(stderr,
++ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
++ size, errno);
++ goto skip;
++ } else {
++ fprintf(stderr,
++ "InnoDB: HugeTLB: The existent shared memory segment is used.\n");
++ }
++ } else {
++ fprintf(stderr,
++ "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
++ size, errno);
++ goto skip;
++ }
++ } else {
++ *is_new = TRUE;
++ fprintf(stderr,
++ "InnoDB: HugeTLB: A new shared memory segment has been created .\n");
++ }
++
++ ptr = shmat(shmid, NULL, 0);
++ if (ptr == (void *)-1) {
++ fprintf(stderr,
++ "InnoDB: HugeTLB: Warning: Failed to attach shared memory segment, errno %d\n",
++ errno);
++ ptr = NULL;
++ }
++
++ if (ptr) {
++ *n = size;
++ os_fast_mutex_lock(&ut_list_mutex);
++ ut_total_allocated_memory += size;
++ os_fast_mutex_unlock(&ut_list_mutex);
++ UNIV_MEM_ALLOC(ptr, size);
++ return(ptr);
++ }
++skip:
++ *is_new = FALSE;
++# endif /* HAVE_LARGE_PAGES && defined UNIV_LINUX */
++# ifdef HAVE_GETPAGESIZE
++ size = getpagesize();
++# else
++ size = UNIV_PAGE_SIZE;
++# endif
++ /* Align block size to system page size */
++ ut_ad(ut_is_2pow(size));
++ size = *n = ut_2pow_round(*n + (size - 1), size);
++
++ shmid = shmget((key_t)key, (size_t)size,
++ IPC_CREAT | IPC_EXCL | SHM_R | SHM_W);
++ if (shmid < 0) {
++ if (errno == EEXIST) {
++ fprintf(stderr,
++ "InnoDB: A shared memory segment containing the buffer pool seems to already exist.\n");
++ shmid = shmget((key_t)key, (size_t)size,
++ SHM_R | SHM_W);
++ if (shmid < 0) {
++ fprintf(stderr,
++ "InnoDB: Warning: Failed to allocate %lu bytes. (reuse) errno %d\n",
++ size, errno);
++ ptr = NULL;
++ goto end;
++ } else {
++ fprintf(stderr,
++ "InnoDB: The existent shared memory segment is used.\n");
++ }
++ } else {
++ fprintf(stderr,
++ "InnoDB: Warning: Failed to allocate %lu bytes. (new) errno %d\n",
++ size, errno);
++ ptr = NULL;
++ goto end;
++ }
++ } else {
++ *is_new = TRUE;
++ fprintf(stderr,
++ "InnoDB: A new shared memory segment has been created.\n");
++ }
++
++ ptr = shmat(shmid, NULL, 0);
++ if (ptr == (void *)-1) {
++ fprintf(stderr,
++ "InnoDB: Warning: Failed to attach shared memory segment, errno %d\n",
++ errno);
++ ptr = NULL;
++ }
++
++ if (ptr) {
++ *n = size;
++ os_fast_mutex_lock(&ut_list_mutex);
++ ut_total_allocated_memory += size;
++ os_fast_mutex_unlock(&ut_list_mutex);
++ UNIV_MEM_ALLOC(ptr, size);
++ }
++end:
++#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
++ ptr = NULL;
++#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++ return(ptr);
++}
++
++/****************************************************************//**
++Detach shared memory segment. */
++UNIV_INTERN
++void
++os_shm_free(
++/*========*/
++ void *ptr, /*!< in: pointer returned by
++ os_shm_alloc() */
++ ulint size) /*!< in: size returned by
++ os_shm_alloc() */
++{
++ os_fast_mutex_lock(&ut_list_mutex);
++ ut_a(ut_total_allocated_memory >= size);
++ os_fast_mutex_unlock(&ut_list_mutex);
++
++#if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
++ if (!shmdt(ptr)) {
++ os_fast_mutex_lock(&ut_list_mutex);
++ ut_a(ut_total_allocated_memory >= size);
++ ut_total_allocated_memory -= size;
++ os_fast_mutex_unlock(&ut_list_mutex);
++ UNIV_MEM_FREE(ptr, size);
++ }
++#else /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++ fprintf(stderr, "InnoDB: shared memory segment is not supported.\n");
++#endif /* HAVE_SYS_IPC_H && HAVE_SYS_SHM_H */
++}
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 20:20:44.687550693 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-07 16:10:14.962785720 +0900
+@@ -233,6 +233,11 @@
+ UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
+ UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
+
++/* key value for shm */
++UNIV_INTERN uint srv_buffer_pool_shm_key = 0;
++UNIV_INTERN ibool srv_buffer_pool_shm_is_reused = FALSE;
++UNIV_INTERN ibool srv_buffer_pool_shm_checksum = TRUE;
++
+ /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
+ instead. */
+ UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 20:19:29.806482628 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-07 16:10:14.964785346 +0900
+@@ -1759,6 +1759,8 @@
+ Note that this is not as heavy weight as it seems. At
+ this point there will be only ONE page in the buf_LRU
+ and there must be no page in the buf_flush list. */
++ /* buffer_pool_shm should not be reused when recovery was needed. */
++ if (!srv_buffer_pool_shm_is_reused)
+ buf_pool_invalidate();
+
+ /* We always try to do a recovery, even if the database had
--- /dev/null
+# name : innodb_deadlock_count.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 16:09:53.145500265 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 16:10:24.605515894 +0900
+@@ -667,6 +667,8 @@
+ (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
+ {"dblwr_writes",
+ (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
++ {"deadlocks",
++ (char*) &export_vars.innodb_deadlocks, SHOW_LONG},
+ {"dict_tables",
+ (char*) &export_vars.innodb_dict_tables, SHOW_LONG},
+ {"have_atomic_builtins",
+diff -ruN a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
+--- a/storage/innobase/include/lock0lock.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/lock0lock.h 2010-12-04 16:10:24.605515894 +0900
+@@ -43,6 +43,7 @@
+ #endif /* UNIV_DEBUG */
+ /* Buffer for storing information about the most recent deadlock error */
+ extern FILE* lock_latest_err_file;
++extern ulint srv_n_lock_deadlock_count;
+
+ /*********************************************************************//**
+ Gets the size of a lock struct.
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-04 15:55:21.378480843 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-04 16:10:24.606550983 +0900
+@@ -750,6 +750,7 @@
+ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
+ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
+ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
++ ulint innodb_deadlocks;
+ ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
+ ulint innodb_log_waits; /*!< srv_log_waits */
+ ulint innodb_log_write_requests; /*!< srv_log_write_requests */
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c 2010-12-03 17:49:11.609953956 +0900
++++ b/storage/innobase/lock/lock0lock.c 2010-12-04 16:10:24.608513889 +0900
+@@ -3328,6 +3328,7 @@
+ break;
+
+ case LOCK_VICTIM_IS_START:
++ srv_n_lock_deadlock_count++;
+ fputs("*** WE ROLL BACK TRANSACTION (2)\n",
+ lock_latest_err_file);
+ break;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:57:13.069513371 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 16:10:24.610593039 +0900
+@@ -465,6 +465,7 @@
+ static ulint srv_n_rows_deleted_old = 0;
+ static ulint srv_n_rows_read_old = 0;
+
++UNIV_INTERN ulint srv_n_lock_deadlock_count = 0;
+ UNIV_INTERN ulint srv_n_lock_wait_count = 0;
+ UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
+ UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
+@@ -2251,6 +2252,7 @@
+ export_vars.innodb_buffer_pool_pages_data = LRU_len;
+ export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+ export_vars.innodb_buffer_pool_pages_free = free_len;
++ export_vars.innodb_deadlocks = srv_n_lock_deadlock_count;
+ #ifdef UNIV_DEBUG
+ export_vars.innodb_buffer_pool_pages_latched
+ = buf_get_latched_pages_number();
--- /dev/null
+# name : innodb_dict_size_limit.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:45:47.503988924 +0900
+@@ -1185,6 +1185,132 @@
+ mem_free(folds);
+ }
+
++/************************************************************************
++Drops a page hash index based on index */
++UNIV_INTERN
++void
++btr_search_drop_page_hash_index_on_index(
++/*=====================================*/
++ dict_index_t* index) /* in: record descriptor */
++{
++ buf_page_t* bpage;
++ hash_table_t* table;
++ buf_block_t* block;
++ ulint n_fields;
++ ulint n_bytes;
++ const page_t* page;
++ const rec_t* rec;
++ ulint fold;
++ ulint prev_fold;
++ index_id_t index_id;
++ ulint n_cached;
++ ulint n_recs;
++ ulint* folds;
++ ulint i, j;
++ mem_heap_t* heap = NULL;
++ ulint* offsets;
++
++ rw_lock_x_lock(&btr_search_latch);
++ buf_pool_mutex_enter_all();
++
++ table = btr_search_sys->hash_index;
++
++ for (j = 0; j < srv_buf_pool_instances; j++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(j);
++
++ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++
++ while (bpage != NULL) {
++ block = (buf_block_t*) bpage;
++ if (block->index == index && block->is_hashed) {
++ page = block->frame;
++
++ /* from btr_search_drop_page_hash_index() */
++ n_fields = block->curr_n_fields;
++ n_bytes = block->curr_n_bytes;
++
++ ut_a(n_fields + n_bytes > 0);
++
++ n_recs = page_get_n_recs(page);
++
++ /* Calculate and cache fold values into an array for fast deletion
++ from the hash index */
++
++ folds = mem_alloc(n_recs * sizeof(ulint));
++
++ n_cached = 0;
++
++ rec = page_get_infimum_rec(page);
++ rec = page_rec_get_next_low(rec, page_is_comp(page));
++
++ index_id = btr_page_get_index_id(page);
++
++ ut_a(index_id == index->id);
++
++ prev_fold = 0;
++
++ offsets = NULL;
++
++ while (!page_rec_is_supremum(rec)) {
++ offsets = rec_get_offsets(rec, index, offsets,
++ n_fields + (n_bytes > 0), &heap);
++ ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
++ fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
++
++ if (fold == prev_fold && prev_fold != 0) {
++
++ goto next_rec;
++ }
++
++ /* Remove all hash nodes pointing to this page from the
++ hash chain */
++
++ folds[n_cached] = fold;
++ n_cached++;
++next_rec:
++ rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
++ prev_fold = fold;
++ }
++
++ for (i = 0; i < n_cached; i++) {
++
++ ha_remove_all_nodes_to_page(table, folds[i], page);
++ }
++
++ ut_a(index->search_info->ref_count > 0);
++ index->search_info->ref_count--;
++
++ block->is_hashed = FALSE;
++ block->index = NULL;
++
++#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
++ if (UNIV_UNLIKELY(block->n_pointers)) {
++ /* Corruption */
++ ut_print_timestamp(stderr);
++ fprintf(stderr,
++" InnoDB: Corruption of adaptive hash index. After dropping\n"
++"InnoDB: the hash index to a page of %s, still %lu hash nodes remain.\n",
++ index->name, (ulong) block->n_pointers);
++ }
++#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
++
++ mem_free(folds);
++ }
++
++ bpage = UT_LIST_GET_PREV(LRU, bpage);
++ }
++ }
++
++ buf_pool_mutex_exit_all();
++ rw_lock_x_unlock(&btr_search_latch);
++
++ if (UNIV_LIKELY_NULL(heap)) {
++ mem_heap_free(heap);
++ }
++}
++
+ /********************************************************************//**
+ Drops a page hash index when a page is freed from a fseg to the file system.
+ Drops possible hash index if the page happens to be in the buffer pool. */
+diff -ruN a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c
+--- a/storage/innobase/dict/dict0boot.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0boot.c 2010-12-03 15:45:47.503988924 +0900
+@@ -284,6 +284,7 @@
+ system tables */
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
++ table->n_mysql_handles_opened = 1; /* for pin */
+
+ dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+@@ -336,6 +337,7 @@
+
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
++ table->n_mysql_handles_opened = 1; /* for pin */
+
+ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
+@@ -368,6 +370,7 @@
+
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
++ table->n_mysql_handles_opened = 1; /* for pin */
+
+ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
+@@ -413,6 +416,7 @@
+
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
++ table->n_mysql_handles_opened = 1; /* for pin */
+
+ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
+diff -ruN a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
+--- a/storage/innobase/dict/dict0crea.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0crea.c 2010-12-03 15:45:47.521955810 +0900
+@@ -1210,6 +1210,9 @@
+ /* Foreign constraint system tables have already been
+ created, and they are ok */
+
++ table1->n_mysql_handles_opened = 1; /* for pin */
++ table2->n_mysql_handles_opened = 1; /* for pin */
++
+ mutex_exit(&(dict_sys->mutex));
+
+ return(DB_SUCCESS);
+@@ -1291,6 +1294,11 @@
+
+ trx_commit_for_mysql(trx);
+
++ table1 = dict_table_get_low("SYS_FOREIGN");
++ table2 = dict_table_get_low("SYS_FOREIGN_COLS");
++ table1->n_mysql_handles_opened = 1; /* for pin */
++ table2->n_mysql_handles_opened = 1; /* for pin */
++
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_free_for_mysql(trx);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0dict.c 2010-12-03 15:45:47.525953769 +0900
+@@ -625,6 +625,8 @@
+
+ table = dict_table_get_on_id_low(table_id);
+
++ dict_table_LRU_trim(table);
++
+ mutex_exit(&(dict_sys->mutex));
+
+ return(table);
+@@ -743,6 +745,8 @@
+ table->n_mysql_handles_opened++;
+ }
+
++ dict_table_LRU_trim(table);
++
+ mutex_exit(&(dict_sys->mutex));
+
+ if (table != NULL) {
+@@ -1256,6 +1260,64 @@
+ dict_mem_table_free(table);
+ }
+
++/**************************************************************************
++Frees tables from the end of table_LRU if the dictionary cache occupies
++too much space. */
++UNIV_INTERN
++void
++dict_table_LRU_trim(
++/*================*/
++ dict_table_t* self)
++{
++ dict_table_t* table;
++ dict_table_t* prev_table;
++ dict_foreign_t* foreign;
++ ulint n_removed;
++ ulint n_have_parent;
++ ulint cached_foreign_tables;
++
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(mutex_own(&(dict_sys->mutex)));
++#endif /* UNIV_SYNC_DEBUG */
++
++retry:
++ n_removed = n_have_parent = 0;
++ table = UT_LIST_GET_LAST(dict_sys->table_LRU);
++
++ while ( srv_dict_size_limit && table
++ && ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t)
++ + dict_sys->size) > srv_dict_size_limit ) {
++ prev_table = UT_LIST_GET_PREV(table_LRU, table);
++
++ if (table == self || table->n_mysql_handles_opened)
++ goto next_loop;
++
++ cached_foreign_tables = 0;
++ foreign = UT_LIST_GET_FIRST(table->foreign_list);
++ while (foreign != NULL) {
++ if (foreign->referenced_table)
++ cached_foreign_tables++;
++ foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
++ }
++
++ if (cached_foreign_tables == 0) {
++ dict_table_remove_from_cache(table);
++ n_removed++;
++ } else {
++ n_have_parent++;
++ }
++next_loop:
++ table = prev_table;
++ }
++
++ if ( srv_dict_size_limit && n_have_parent && n_removed
++ && ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells) * sizeof(hash_cell_t)
++ + dict_sys->size) > srv_dict_size_limit )
++ goto retry;
++}
++
+ /****************************************************************//**
+ If the given column name is reserved for InnoDB system columns, return
+ TRUE.
+@@ -1719,6 +1781,11 @@
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
++ /* remove all entry of the index from adaptive hash index,
++ because removing from adaptive hash index needs dict_index */
++ if (btr_search_enabled && srv_dict_size_limit)
++ btr_search_drop_page_hash_index_on_index(index);
++
+ /* We always create search info whether or not adaptive
+ hash index is enabled or not. */
+ info = index->search_info;
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:43:57.294986852 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:45:47.534959966 +0900
+@@ -655,6 +655,8 @@
+ (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
+ {"dblwr_writes",
+ (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
++ {"dict_tables",
++ (char*) &export_vars.innodb_dict_tables, SHOW_LONG},
+ {"have_atomic_builtins",
+ (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
+ {"log_waits",
+@@ -11543,6 +11545,11 @@
+ "Number of extra user rollback segments which are used in a round-robin fashion.",
+ NULL, NULL, 127, 0, 127, 0);
+
++static MYSQL_SYSVAR_ULONG(dict_size_limit, srv_dict_size_limit,
++ PLUGIN_VAR_RQCMDARG,
++ "Limit the allocated memory for dictionary cache. (0: unlimited)",
++ NULL, NULL, 0, 0, LONG_MAX, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -11611,6 +11618,7 @@
+ MYSQL_SYSVAR(adaptive_flushing_method),
+ MYSQL_SYSVAR(enable_unsafe_group_commit),
+ MYSQL_SYSVAR(extra_rsegments),
++ MYSQL_SYSVAR(dict_size_limit),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(use_native_aio),
+ MYSQL_SYSVAR(change_buffering),
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:18:48.889024455 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:45:47.553025057 +0900
+@@ -578,6 +578,7 @@
+
+ /* Use old-style record format for the insert buffer. */
+ table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
++ table->n_mysql_handles_opened = 1; /* for pin */
+
+ dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
+
+diff -ruN a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
+--- a/storage/innobase/include/btr0sea.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/btr0sea.h 2010-12-03 15:45:47.555024229 +0900
+@@ -140,6 +140,13 @@
+ s- or x-latched, or an index page
+ for which we know that
+ block->buf_fix_count == 0 */
++/************************************************************************
++Drops a page hash index based on index */
++UNIV_INTERN
++void
++btr_search_drop_page_hash_index_on_index(
++/*=====================================*/
++ dict_index_t* index); /* in: record descriptor */
+ /********************************************************************//**
+ Drops a page hash index when a page is freed from a fseg to the file system.
+ Drops possible hash index if the page happens to be in the buffer pool. */
+diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
+--- a/storage/innobase/include/dict0dict.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0dict.h 2010-12-03 15:45:47.558024515 +0900
+@@ -1158,6 +1158,12 @@
+ /*====================================*/
+ dict_table_t* table, /*!< in: table */
+ const char* name); /*!< in: name of the index to find */
++
++UNIV_INTERN
++void
++dict_table_LRU_trim(
++/*================*/
++ dict_table_t* self);
+ /* Buffers for storing detailed information about the latest foreign key
+ and unique key errors */
+ extern FILE* dict_foreign_err_file;
+diff -ruN a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
+--- a/storage/innobase/include/dict0dict.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0dict.ic 2010-12-03 15:45:47.560024398 +0900
+@@ -824,6 +824,13 @@
+ HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
+ dict_table_t*, table, ut_ad(table->cached),
+ !strcmp(table->name, table_name));
++
++ /* make young in table_LRU */
++ if (table) {
++ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
++ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
++ }
++
+ return(table);
+ }
+
+@@ -877,6 +884,12 @@
+ table = dict_load_table_on_id(table_id);
+ }
+
++ /* make young in table_LRU */
++ if (table) {
++ UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
++ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
++ }
++
+ ut_ad(!table || table->cached);
+
+ /* TODO: should get the type information from MySQL */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:43:57.297067100 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:45:47.562024404 +0900
+@@ -228,7 +228,7 @@
+ extern ulint srv_adaptive_flushing_method;
+
+ extern ulint srv_extra_rsegments;
+-
++extern ulint srv_dict_size_limit;
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+@@ -700,6 +700,7 @@
+ ulint innodb_data_writes; /*!< I/O write requests */
+ ulint innodb_data_written; /*!< Data bytes written */
+ ulint innodb_data_reads; /*!< I/O read requests */
++ ulint innodb_dict_tables;
+ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
+ ulint innodb_buffer_pool_pages_data; /*!< Data pages */
+ ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:43:57.301024390 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:45:47.565023830 +0900
+@@ -414,6 +414,7 @@
+ UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
+
+ UNIV_INTERN ulint srv_extra_rsegments = 127; /* extra rseg for users */
++UNIV_INTERN ulint srv_dict_size_limit = 0;
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+ UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+@@ -2192,6 +2193,7 @@
+ export_vars.innodb_data_reads = os_n_file_reads;
+ export_vars.innodb_data_writes = os_n_file_writes;
+ export_vars.innodb_data_written = srv_data_written;
++ export_vars.innodb_dict_tables= (dict_sys ? UT_LIST_GET_LEN(dict_sys->table_LRU) : 0);
+ export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
+ export_vars.innodb_buffer_pool_write_requests
+ = srv_buf_pool_write_requests;
--- /dev/null
+# name : innodb_expand_import.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:09:51.274957577 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-03 15:52:23.553986552 +0900
+@@ -40,6 +40,12 @@
+ #include "dict0dict.h"
+ #include "page0page.h"
+ #include "page0zip.h"
++#include "trx0trx.h"
++#include "trx0sys.h"
++#include "pars0pars.h"
++#include "row0mysql.h"
++#include "row0row.h"
++#include "que0que.h"
+ #ifndef UNIV_HOTBACKUP
+ # include "buf0lru.h"
+ # include "ibuf0ibuf.h"
+@@ -3050,7 +3056,7 @@
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, filepath, OS_FILE_OPEN,
+- OS_FILE_READ_ONLY, &success);
++ OS_FILE_READ_WRITE, &success);
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+@@ -3097,6 +3103,466 @@
+ space_id = fsp_header_get_space_id(page);
+ space_flags = fsp_header_get_flags(page);
+
++ if (srv_expand_import
++ && (space_id != id || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
++ ibool file_is_corrupt = FALSE;
++ byte* buf3;
++ byte* descr_page;
++ ibool descr_is_corrupt = FALSE;
++ index_id_t old_id[31];
++ index_id_t new_id[31];
++ ulint root_page[31];
++ ulint n_index;
++ os_file_t info_file = -1;
++ char* info_file_path;
++ ulint i;
++ int len;
++ ib_uint64_t current_lsn;
++ ulint size_low, size_high, size, free_limit;
++ ib_int64_t size_bytes, free_limit_bytes;
++ dict_table_t* table;
++ dict_index_t* index;
++ fil_system_t* system;
++ fil_node_t* node = NULL;
++ fil_space_t* space;
++
++ buf3 = ut_malloc(2 * UNIV_PAGE_SIZE);
++ descr_page = ut_align(buf3, UNIV_PAGE_SIZE);
++
++ current_lsn = log_get_lsn();
++
++ /* check the header page's consistency */
++ if (buf_page_is_corrupted(page,
++ dict_table_flags_to_zip_size(space_flags))) {
++ fprintf(stderr, "InnoDB: page 0 of %s seems corrupt.\n", filepath);
++ file_is_corrupt = TRUE;
++ descr_is_corrupt = TRUE;
++ }
++
++ /* store as first descr page */
++ memcpy(descr_page, page, UNIV_PAGE_SIZE);
++
++ /* get free limit (page number) of the table space */
++/* these should be same to the definition in fsp0fsp.c */
++#define FSP_HEADER_OFFSET FIL_PAGE_DATA
++#define FSP_FREE_LIMIT 12
++ free_limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT + page);
++ free_limit_bytes = (ib_int64_t)free_limit * (ib_int64_t)UNIV_PAGE_SIZE;
++
++ /* overwrite fsp header */
++ fsp_header_init_fields(page, id, flags);
++ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
++ space_id = id;
++ space_flags = flags;
++ if (mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN) > current_lsn)
++ mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
++ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
++ srv_use_checksums
++ ? buf_calc_page_new_checksum(page)
++ : BUF_NO_CHECKSUM_MAGIC);
++ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++ srv_use_checksums
++ ? buf_calc_page_old_checksum(page)
++ : BUF_NO_CHECKSUM_MAGIC);
++ success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
++
++ /* get file size */
++ os_file_get_size(file, &size_low, &size_high);
++ size_bytes = (((ib_int64_t)size_high) << 32)
++ + (ib_int64_t)size_low;
++
++ if (size_bytes < free_limit_bytes) {
++ free_limit_bytes = size_bytes;
++ if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
++ fprintf(stderr, "InnoDB: free limit of %s is larger than its real size.\n", filepath);
++ file_is_corrupt = TRUE;
++ }
++ }
++
++ /* get cruster index information */
++ table = dict_table_get_low(name);
++ index = dict_table_get_first_index(table);
++ ut_a(index->page==3);
++
++ /* read metadata from .exp file */
++ n_index = 0;
++ memset(old_id, 0, sizeof(old_id));
++ memset(new_id, 0, sizeof(new_id));
++ memset(root_page, 0, sizeof(root_page));
++
++ info_file_path = fil_make_ibd_name(name, FALSE);
++ len = strlen(info_file_path);
++ info_file_path[len - 3] = 'e';
++ info_file_path[len - 2] = 'x';
++ info_file_path[len - 1] = 'p';
++
++ info_file = os_file_create_simple_no_error_handling(innodb_file_data_key,
++ info_file_path, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
++ if (!success) {
++ fprintf(stderr, "InnoDB: cannot open %s\n", info_file_path);
++ file_is_corrupt = TRUE;
++ goto skip_info;
++ }
++ success = os_file_read(info_file, page, 0, 0, UNIV_PAGE_SIZE);
++ if (!success) {
++ fprintf(stderr, "InnoDB: cannot read %s\n", info_file_path);
++ file_is_corrupt = TRUE;
++ goto skip_info;
++ }
++ if (mach_read_from_4(page) != 0x78706f72UL
++ || mach_read_from_4(page + 4) != 0x74696e66UL) {
++ fprintf(stderr, "InnoDB: %s seems not to be a correct .exp file\n", info_file_path);
++ file_is_corrupt = TRUE;
++ goto skip_info;
++ }
++
++ fprintf(stderr, "InnoDB: import: extended import of %s is started.\n", name);
++
++ n_index = mach_read_from_4(page + 8);
++ fprintf(stderr, "InnoDB: import: %lu indexes are detected.\n", (ulong)n_index);
++ for (i = 0; i < n_index; i++) {
++ new_id[i] =
++ dict_table_get_index_on_name(table,
++ (char*)(page + (i + 1) * 512 + 12))->id;
++ old_id[i] = mach_read_from_8(page + (i + 1) * 512);
++ root_page[i] = mach_read_from_4(page + (i + 1) * 512 + 8);
++ }
++
++skip_info:
++ if (info_file != -1)
++ os_file_close(info_file);
++
++ /*
++ if (size_bytes >= 1024 * 1024) {
++ size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
++ }
++ */
++ if (!(flags & DICT_TF_ZSSIZE_MASK)) {
++ mem_heap_t* heap = NULL;
++ ulint offsets_[REC_OFFS_NORMAL_SIZE];
++ ulint* offsets = offsets_;
++ ib_int64_t offset;
++
++ size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
++ /* over write space id of all pages */
++ rec_offs_init(offsets_);
++
++ fprintf(stderr, "InnoDB: Progress in %%:");
++
++ for (offset = 0; offset < free_limit_bytes; offset += UNIV_PAGE_SIZE) {
++ ulint checksum_field;
++ ulint old_checksum_field;
++ ibool page_is_corrupt;
++
++ success = os_file_read(file, page,
++ (ulint)(offset & 0xFFFFFFFFUL),
++ (ulint)(offset >> 32), UNIV_PAGE_SIZE);
++
++ page_is_corrupt = FALSE;
++
++ /* check consistency */
++ if (memcmp(page + FIL_PAGE_LSN + 4,
++ page + UNIV_PAGE_SIZE
++ - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
++
++ page_is_corrupt = TRUE;
++ }
++
++ if (mach_read_from_4(page + FIL_PAGE_OFFSET)
++ != offset / UNIV_PAGE_SIZE) {
++
++ page_is_corrupt = TRUE;
++ }
++
++ checksum_field = mach_read_from_4(page
++ + FIL_PAGE_SPACE_OR_CHKSUM);
++
++ old_checksum_field = mach_read_from_4(
++ page + UNIV_PAGE_SIZE
++ - FIL_PAGE_END_LSN_OLD_CHKSUM);
++
++ if (old_checksum_field != mach_read_from_4(page
++ + FIL_PAGE_LSN)
++ && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && old_checksum_field
++ != buf_calc_page_old_checksum(page)) {
++
++ page_is_corrupt = TRUE;
++ }
++
++ if (checksum_field != 0
++ && checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && checksum_field
++ != buf_calc_page_new_checksum(page)) {
++
++ page_is_corrupt = TRUE;
++ }
++
++ /* if it is free page, inconsistency is acceptable */
++ if (!offset) {
++ /* header page*/
++ /* it should be overwritten already */
++ ut_a(!page_is_corrupt);
++
++ } else if (!((offset / UNIV_PAGE_SIZE) % UNIV_PAGE_SIZE)) {
++ /* descr page (not header) */
++ if (page_is_corrupt) {
++ file_is_corrupt = TRUE;
++ descr_is_corrupt = TRUE;
++ } else {
++ ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_XDES);
++ descr_is_corrupt = FALSE;
++ }
++
++ /* store as descr page */
++ memcpy(descr_page, page, UNIV_PAGE_SIZE);
++
++ } else if (descr_is_corrupt) {
++ /* unknown state of the page */
++ if (page_is_corrupt) {
++ file_is_corrupt = TRUE;
++ }
++
++ } else {
++ /* check free page or not */
++ /* These definitions should be same to fsp0fsp.c */
++#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
++
++#define XDES_BITMAP (FLST_NODE_SIZE + 12)
++#define XDES_BITS_PER_PAGE 2
++#define XDES_FREE_BIT 0
++#define XDES_SIZE \
++ (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
++#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
++
++ /*descr = descr_page + XDES_ARR_OFFSET + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)*/
++ /*xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)*/
++ byte* descr;
++ ulint index;
++ ulint byte_index;
++ ulint bit_index;
++
++ descr = descr_page + XDES_ARR_OFFSET
++ + XDES_SIZE * (ut_2pow_remainder((offset / UNIV_PAGE_SIZE), UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE);
++
++ index = XDES_FREE_BIT + XDES_BITS_PER_PAGE * ((offset / UNIV_PAGE_SIZE) % FSP_EXTENT_SIZE);
++ byte_index = index / 8;
++ bit_index = index % 8;
++
++ if (ut_bit_get_nth(mach_read_from_1(descr + XDES_BITMAP + byte_index), bit_index)) {
++ /* free page */
++ if (page_is_corrupt) {
++ goto skip_write;
++ }
++ } else {
++ /* not free */
++ if (page_is_corrupt) {
++ file_is_corrupt = TRUE;
++ }
++ }
++ }
++
++ if (page_is_corrupt) {
++ fprintf(stderr, " [errp:%lld]", offset / UNIV_PAGE_SIZE);
++
++ /* cannot treat corrupt page */
++ goto skip_write;
++ }
++
++ if (mach_read_from_4(page + FIL_PAGE_OFFSET) || !offset) {
++ mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id);
++
++ for (i = 0; i < n_index; i++) {
++ if (offset / UNIV_PAGE_SIZE == root_page[i]) {
++ /* this is index root page */
++ mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
++ + FSEG_HDR_SPACE, id);
++ mach_write_to_4(page + FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
++ + FSEG_HDR_SPACE, id);
++ break;
++ }
++ }
++
++ if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
++ index_id_t tmp = mach_read_from_8(page + (PAGE_HEADER + PAGE_INDEX_ID));
++
++ if (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0
++ && old_id[0] == tmp) {
++ /* leaf page of cluster index, reset trx_id of records */
++ rec_t* rec;
++ rec_t* supremum;
++ ulint n_recs;
++
++ supremum = page_get_supremum_rec(page);
++ rec = page_rec_get_next(page_get_infimum_rec(page));
++ n_recs = page_get_n_recs(page);
++
++ while (rec && rec != supremum && n_recs > 0) {
++ ulint n_fields;
++ ulint i;
++ ulint offset = index->trx_id_offset;
++ offsets = rec_get_offsets(rec, index, offsets,
++ ULINT_UNDEFINED, &heap);
++ n_fields = rec_offs_n_fields(offsets);
++ if (!offset) {
++ offset = row_get_trx_id_offset(rec, index, offsets);
++ }
++ trx_write_trx_id(rec + offset, 1);
++
++ for (i = 0; i < n_fields; i++) {
++ if (rec_offs_nth_extern(offsets, i)) {
++ ulint local_len;
++ byte* data;
++
++ data = rec_get_nth_field(rec, offsets, i, &local_len);
++
++ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
++
++ mach_write_to_4(data + local_len + BTR_EXTERN_SPACE_ID, id);
++ }
++ }
++
++ rec = page_rec_get_next(rec);
++ n_recs--;
++ }
++ }
++
++ for (i = 0; i < n_index; i++) {
++ if (old_id[i] == tmp) {
++ mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), new_id[i]);
++ break;
++ }
++ }
++ }
++
++ if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
++ mach_write_to_8(page + FIL_PAGE_LSN, current_lsn);
++ mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++ current_lsn);
++ }
++
++ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
++ srv_use_checksums
++ ? buf_calc_page_new_checksum(page)
++ : BUF_NO_CHECKSUM_MAGIC);
++ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
++ srv_use_checksums
++ ? buf_calc_page_old_checksum(page)
++ : BUF_NO_CHECKSUM_MAGIC);
++
++ success = os_file_write(filepath, file, page,
++ (ulint)(offset & 0xFFFFFFFFUL),
++ (ulint)(offset >> 32), UNIV_PAGE_SIZE);
++ }
++
++skip_write:
++ if (free_limit_bytes
++ && ((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes)
++ != ((offset * 100) / free_limit_bytes)) {
++ fprintf(stderr, " %lu",
++ (ulong)((ib_int64_t)((offset + UNIV_PAGE_SIZE) * 100) / free_limit_bytes));
++ }
++ }
++
++ fprintf(stderr, " done.\n");
++
++ /* update SYS_INDEXES set root page */
++ index = dict_table_get_first_index(table);
++ while (index) {
++ for (i = 0; i < n_index; i++) {
++ if (new_id[i] == index->id) {
++ break;
++ }
++ }
++
++ if (i != n_index
++ && root_page[i] != index->page) {
++ /* must update */
++ ulint error;
++ trx_t* trx;
++ pars_info_t* info = NULL;
++
++ trx = trx_allocate_for_mysql();
++ trx->op_info = "extended import";
++
++ info = pars_info_create();
++
++ pars_info_add_ull_literal(info, "indexid", new_id[i]);
++ pars_info_add_int4_literal(info, "new_page", (lint) root_page[i]);
++
++ error = que_eval_sql(info,
++ "PROCEDURE UPDATE_INDEX_PAGE () IS\n"
++ "BEGIN\n"
++ "UPDATE SYS_INDEXES"
++ " SET PAGE_NO = :new_page"
++ " WHERE ID = :indexid;\n"
++ "COMMIT WORK;\n"
++ "END;\n",
++ FALSE, trx);
++
++ if (error != DB_SUCCESS) {
++ fprintf(stderr, "InnoDB: failed to update SYS_INDEXES\n");
++ }
++
++ trx_commit_for_mysql(trx);
++
++ trx_free_for_mysql(trx);
++
++ index->page = root_page[i];
++ }
++
++ index = dict_table_get_next_index(index);
++ }
++ if (UNIV_LIKELY_NULL(heap)) {
++ mem_heap_free(heap);
++ }
++ } else {
++ /* zip page? */
++ size = (ulint)
++ (size_bytes
++ / dict_table_flags_to_zip_size(flags));
++ fprintf(stderr, "InnoDB: import: table %s seems to be in newer format."
++ " It may not be able to treated for now.\n", name);
++ }
++ /* .exp file should be removed */
++ success = os_file_delete(info_file_path);
++ if (!success) {
++ success = os_file_delete_if_exists(info_file_path);
++ }
++ mem_free(info_file_path);
++
++ system = fil_system;
++ mutex_enter(&(system->mutex));
++ space = fil_space_get_by_id(id);
++ if (space)
++ node = UT_LIST_GET_FIRST(space->chain);
++ if (node && node->size < size) {
++ space->size += (size - node->size);
++ node->size = size;
++ }
++ mutex_exit(&(system->mutex));
++
++ ut_free(buf3);
++
++ if (file_is_corrupt) {
++ ut_print_timestamp(stderr);
++ fputs(" InnoDB: Error: file ",
++ stderr);
++ ut_print_filename(stderr, filepath);
++ fprintf(stderr, " seems to be corrupt.\n"
++ "InnoDB: anyway, all not corrupt pages were tried to be converted to salvage.\n"
++ "InnoDB: ##### CAUTION #####\n"
++ "InnoDB: ## The .ibd must cause to crash InnoDB, though re-import would seem to be succeeded.\n"
++ "InnoDB: ## If you don't have knowledge about salvaging data from .ibd, you should not use the file.\n"
++ "InnoDB: ###################\n");
++ success = FALSE;
++
++ ut_free(buf2);
++
++ goto func_exit;
++ }
++ }
++
+ ut_free(buf2);
+
+ if (UNIV_UNLIKELY(space_id != id
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:49:59.195023983 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:52:23.555957062 +0900
+@@ -7330,6 +7330,14 @@
+ err = row_discard_tablespace_for_mysql(dict_table->name, trx);
+ } else {
+ err = row_import_tablespace_for_mysql(dict_table->name, trx);
++
++ /* in expanded import mode re-initialize auto_increment again */
++ if ((err == DB_SUCCESS) && srv_expand_import &&
++ (table->found_next_number_field != NULL)) {
++ dict_table_autoinc_lock(dict_table);
++ innobase_initialize_autoinc();
++ dict_table_autoinc_unlock(dict_table);
++ }
+ }
+
+ err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
+@@ -11545,6 +11553,11 @@
+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+ NULL, NULL, 0, 0, 1, 0);
+
++static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable converting automatically *.ibd files when import tablespace.",
++ NULL, NULL, 0, 0, 1, 0);
++
+ static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
+ PLUGIN_VAR_RQCMDARG,
+ "Number of extra user rollback segments which are used in a round-robin fashion.",
+@@ -11622,6 +11635,7 @@
+ MYSQL_SYSVAR(read_ahead),
+ MYSQL_SYSVAR(adaptive_flushing_method),
+ MYSQL_SYSVAR(enable_unsafe_group_commit),
++ MYSQL_SYSVAR(expand_import),
+ MYSQL_SYSVAR(extra_rsegments),
+ MYSQL_SYSVAR(dict_size_limit),
+ MYSQL_SYSVAR(use_sys_malloc),
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:48:03.077954270 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:52:23.561986996 +0900
+@@ -227,6 +227,8 @@
+ extern ulint srv_read_ahead;
+ extern ulint srv_adaptive_flushing_method;
+
++extern ulint srv_expand_import;
++
+ extern ulint srv_extra_rsegments;
+ extern ulint srv_dict_size_limit;
+ /*-------------------------------------------*/
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:49:59.230956118 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:52:23.562954411 +0900
+@@ -413,6 +413,8 @@
+ UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
+ UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
+
++UNIV_INTERN ulint srv_expand_import = 0; /* 0:disable 1:enable */
++
+ UNIV_INTERN ulint srv_extra_rsegments = 127; /* extra rseg for users */
+ UNIV_INTERN ulint srv_dict_size_limit = 0;
+ /*-------------------------------------------*/
#!!! notice !!!
# Any small change to this file in the main branch
# should be done or reviewed by the maintainer!
-diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
---- a/storage/innodb_plugin/buf/buf0buf.c 2010-08-27 16:13:11.061058561 +0900
-+++ b/storage/innodb_plugin/buf/buf0buf.c 2010-08-27 16:30:47.341987400 +0900
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:49:59.175955882 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 17:42:42.074307123 +0900
@@ -51,6 +51,40 @@
#include "dict0dict.h"
#include "log0recv.h"
/*
IMPLEMENTATION OF THE BUFFER POOL
-@@ -1726,10 +1760,18 @@
+@@ -2397,11 +2431,19 @@
mutex_t* block_mutex;
ibool must_read;
unsigned access_time;
+ ulint ms;
+ ib_uint64_t start_time;
+ ib_uint64_t finish_time;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside());
buf_pool->stat.n_page_gets++;
for (;;) {
-@@ -1746,7 +1788,7 @@
- //buf_pool_mutex_exit();
- rw_lock_s_unlock(&page_hash_latch);
+@@ -2419,7 +2461,7 @@
+ //buf_pool_mutex_exit(buf_pool);
+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
- buf_read_page(space, zip_size, offset);
+ buf_read_page(space, zip_size, offset, trx);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
-@@ -1822,6 +1864,13 @@
+@@ -2497,6 +2539,13 @@
/* Let us wait until the read operation
completes */
for (;;) {
enum buf_io_fix io_fix;
-@@ -1836,6 +1885,12 @@
+@@ -2511,6 +2560,12 @@
break;
}
}
}
#ifdef UNIV_IBUF_COUNT_DEBUG
-@@ -2092,6 +2147,11 @@
+@@ -2823,6 +2878,11 @@
ibool must_read;
ulint retries = 0;
- mutex_t* block_mutex;
-+ trx_t* trx = NULL;
-+ ulint sec;
-+ ulint ms;
-+ ib_uint64_t start_time;
-+ ib_uint64_t finish_time;
+ mutex_t* block_mutex = NULL;
++ trx_t* trx = NULL;
++ ulint sec;
++ ulint ms;
++ ib_uint64_t start_time;
++ ib_uint64_t finish_time;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
-@@ -2106,6 +2166,9 @@
+@@ -2840,6 +2900,9 @@
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
#endif
+ trx = innobase_get_trx();
+ }
buf_pool->stat.n_page_gets++;
+ fold = buf_page_address_fold(space, offset);
loop:
- block = guess;
-@@ -2159,7 +2222,7 @@
+@@ -2913,7 +2976,7 @@
return(NULL);
}
retries = 0;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
-@@ -2444,6 +2507,13 @@
+@@ -3216,6 +3279,13 @@
/* Let us wait until the read operation
completes */
for (;;) {
enum buf_io_fix io_fix;
-@@ -2458,6 +2528,12 @@
+@@ -3230,6 +3300,12 @@
break;
}
}
}
fix_type = MTR_MEMO_BUF_FIX;
-@@ -2483,13 +2559,17 @@
+@@ -3255,13 +3331,17 @@
/* In the case of a first access, try to apply linear
read-ahead */
return(block);
}
-@@ -2512,6 +2592,7 @@
+@@ -3285,6 +3365,7 @@
unsigned access_time;
ibool success;
ulint fix_type;
ut_ad(block);
ut_ad(mtr);
-@@ -2589,13 +2670,17 @@
+@@ -3362,13 +3443,17 @@
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif
}
#ifdef UNIV_IBUF_COUNT_DEBUG
-@@ -2604,6 +2689,9 @@
- #endif
+@@ -3378,6 +3463,9 @@
+ buf_pool = buf_pool_from_block(block);
buf_pool->stat.n_page_gets++;
+ if (innobase_get_slow_log()) {
return(TRUE);
}
-@@ -2625,6 +2713,7 @@
- {
+@@ -3400,6 +3488,7 @@
+ buf_pool_t* buf_pool;
ibool success;
ulint fix_type;
+ trx_t* trx = NULL;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
-@@ -2709,6 +2798,11 @@
+@@ -3486,6 +3575,11 @@
#endif
buf_pool->stat.n_page_gets++;
return(TRUE);
}
-diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
---- a/storage/innodb_plugin/buf/buf0rea.c 2010-08-27 16:23:31.014020792 +0900
-+++ b/storage/innodb_plugin/buf/buf0rea.c 2010-08-27 16:30:47.342987531 +0900
-@@ -75,7 +75,8 @@
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 17:32:15.617037263 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 17:42:42.075297193 +0900
+@@ -77,7 +77,8 @@
treat the tablespace as dropped; this is a timestamp we
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
{
buf_page_t* bpage;
ulint wake_later;
-@@ -176,15 +177,15 @@
- ut_ad(buf_page_in_file(bpage));
+@@ -179,15 +180,15 @@
+ thd_wait_begin(NULL, THD_WAIT_DISKIO);
if (zip_size) {
- *err = fil_io(OS_FILE_READ | wake_later,
+ *err = _fil_io(OS_FILE_READ | wake_later,
- ((buf_block_t*) bpage)->frame, bpage);
+ ((buf_block_t*) bpage)->frame, bpage, trx);
}
+ thd_wait_end(NULL);
ut_a(*err == DB_SUCCESS);
-
-@@ -209,7 +210,8 @@
+@@ -213,7 +214,8 @@
/*==========*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint offset, /*!< in: page number */
+ trx_t* trx)
{
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
ib_int64_t tablespace_version;
- ulint count;
-@@ -222,7 +224,7 @@
+@@ -227,7 +229,7 @@
count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
srv_buf_pool_reads += count;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
-@@ -273,8 +275,9 @@
+@@ -278,8 +280,9 @@
/*==================*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
must want access to this page (see NOTE 3 above) */
+ trx_t* trx)
{
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
ib_int64_t tablespace_version;
- buf_page_t* bpage;
-@@ -497,7 +500,7 @@
+@@ -500,7 +503,7 @@
count += buf_read_page_low(
&err, FALSE,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
-@@ -587,7 +590,7 @@
+@@ -594,7 +597,7 @@
buf_read_page_low(&err, sync && (i + 1 == n_stored),
BUF_READ_ANY_PAGE, space_ids[i],
zip_size, TRUE, space_versions[i],
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
tablespace_deleted:
-@@ -728,12 +731,12 @@
+@@ -736,12 +739,12 @@
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, TRUE, tablespace_version,
}
}
-diff -ruN a/storage/innodb_plugin/fil/fil0fil.c b/storage/innodb_plugin/fil/fil0fil.c
---- a/storage/innodb_plugin/fil/fil0fil.c 2010-08-27 16:15:55.187400372 +0900
-+++ b/storage/innodb_plugin/fil/fil0fil.c 2010-08-27 16:30:47.346992376 +0900
-@@ -4325,7 +4325,7 @@
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-03 15:53:54.610037199 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-03 17:42:42.079064198 +0900
+@@ -4395,7 +4395,7 @@
node->name, node->handle, buf,
offset_low, offset_high,
page_size * n_pages,
#endif
if (success) {
node->size += n_pages;
-@@ -4652,7 +4652,7 @@
+@@ -4722,7 +4722,7 @@
i/o on a tablespace which does not exist */
UNIV_INTERN
ulint
/*===*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
-@@ -4677,8 +4677,9 @@
+@@ -4747,8 +4747,9 @@
void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
{
ulint mode;
fil_space_t* space;
-@@ -4848,7 +4849,7 @@
+@@ -4918,7 +4919,7 @@
#else
/* Queue the aio request */
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
#endif
ut_a(ret);
-diff -ruN a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
---- a/storage/innodb_plugin/handler/ha_innodb.cc 2010-08-27 16:30:34.589021493 +0900
-+++ b/storage/innodb_plugin/handler/ha_innodb.cc 2010-08-27 16:30:47.356987871 +0900
-@@ -1372,6 +1372,16 @@
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:36:44.293955189 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:42:42.090024586 +0900
+@@ -1526,6 +1526,16 @@
trx->check_unique_secondary = !thd_test_options(
thd, OPTION_RELAXED_UNIQUE_CHECKS);
DBUG_VOID_RETURN;
}
-@@ -1427,6 +1437,32 @@
+@@ -1580,6 +1590,32 @@
+ return(trx);
}
-
+/*************************************************************************
+Gets current trx. */
+extern "C"
+}
+
/*********************************************************************//**
- Construct ha_innobase handler. */
- UNIV_INTERN
-@@ -8940,6 +8976,25 @@
+ Note that a transaction has been registered with MySQL.
+ @return true if transaction is registered with MySQL 2PC coordinator */
+@@ -9200,6 +9236,25 @@
statement has ended */
if (trx->n_mysql_tables_in_use == 0) {
+#ifdef EXTENDED_SLOWLOG
+ increment_thd_innodb_stats(thd,
-+ (unsigned long long) ut_conv_dulint_to_longlong(trx->id),
++ (unsigned long long) trx->id,
+ trx->io_reads,
+ trx->io_read,
+ trx->io_reads_wait_timer,
trx->mysql_n_tables_locked = 0;
prebuilt->used_in_HANDLER = FALSE;
-diff -ruN a/storage/innodb_plugin/handler/innodb_patch_info.h b/storage/innodb_plugin/handler/innodb_patch_info.h
---- a/storage/innodb_plugin/handler/innodb_patch_info.h 2010-08-27 16:30:34.590004526 +0900
-+++ b/storage/innodb_plugin/handler/innodb_patch_info.h 2010-08-27 16:30:47.361987777 +0900
-@@ -40,5 +40,6 @@
- {"innodb_purge_thread","Enable to use purge devoted thread","","http://www.percona.com/docs/wiki/percona-xtradb"},
- {"innodb_admin_command_base","XtraDB specific command interface through i_s","","http://www.percona.com/docs/wiki/percona-xtradb"},
- {"innodb_show_lock_name","Show mutex/lock name instead of crated file/line","","http://www.percona.com/docs/wiki/percona-xtradb"},
-+{"innodb_extend_slow","Extended statistics in slow.log","It is InnoDB-part only. It needs to patch also to mysqld.","http://www.percona.com/docs/wiki/percona-xtradb"},
- {NULL, NULL, NULL, NULL}
- };
-diff -ruN a/storage/innodb_plugin/include/buf0rea.h b/storage/innodb_plugin/include/buf0rea.h
---- a/storage/innodb_plugin/include/buf0rea.h 2010-08-27 15:54:18.078987755 +0900
-+++ b/storage/innodb_plugin/include/buf0rea.h 2010-08-27 16:30:47.363031394 +0900
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h 2010-12-03 15:18:48.891024406 +0900
++++ b/storage/innobase/include/buf0rea.h 2010-12-03 17:42:42.096026873 +0900
@@ -27,6 +27,7 @@
#define buf0rea_h
/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
-diff -ruN a/storage/innodb_plugin/include/fil0fil.h b/storage/innodb_plugin/include/fil0fil.h
---- a/storage/innodb_plugin/include/fil0fil.h 2010-08-27 15:52:14.325059269 +0900
-+++ b/storage/innodb_plugin/include/fil0fil.h 2010-08-27 16:30:47.365059512 +0900
-@@ -610,9 +610,12 @@
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h 2010-12-03 15:09:51.290958543 +0900
++++ b/storage/innobase/include/fil0fil.h 2010-12-03 17:42:42.097027548 +0900
+@@ -611,9 +611,12 @@
Reads or writes data. This operation is asynchronous (aio).
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
/*===*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
-@@ -637,8 +640,9 @@
+@@ -638,8 +641,9 @@
void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
-diff -ruN a/storage/innodb_plugin/include/os0file.h b/storage/innodb_plugin/include/os0file.h
---- a/storage/innodb_plugin/include/os0file.h 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/os0file.h 2010-08-27 16:30:47.366987560 +0900
+diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
+--- a/storage/innobase/include/os0file.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.h 2010-12-03 17:42:42.100023783 +0900
@@ -36,6 +36,7 @@
#define os0file_h
#ifndef __WIN__
#include <dirent.h>
-@@ -482,9 +483,12 @@
- /*******************************************************************//**
- Requests a synchronous read operation.
- @return TRUE if request was successful, FALSE if fail */
-+#define os_file_read(file, buf, offset, offset_high, n) \
-+ _os_file_read(file, buf, offset, offset_high, n, NULL)
+@@ -277,13 +278,17 @@
+ pfs_os_file_close_func(file, __FILE__, __LINE__)
+
+ # define os_aio(type, mode, name, file, buf, offset, offset_high, \
+- n, message1, message2) \
++ n, message1, message2, trx) \
+ pfs_os_aio_func(type, mode, name, file, buf, offset, \
+- offset_high, n, message1, message2, \
++ offset_high, n, message1, message2, trx, \
+ __FILE__, __LINE__)
+
+ # define os_file_read(file, buf, offset, offset_high, n) \
+- pfs_os_file_read_func(file, buf, offset, offset_high, n, \
++ pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL, \
++ __FILE__, __LINE__)
+
- UNIV_INTERN
- ibool
--os_file_read(
-+_os_file_read(
- /*=========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
-@@ -492,7 +496,8 @@
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
++ pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \
+ __FILE__, __LINE__)
+
+ # define os_file_read_no_error_handling(file, buf, offset, \
+@@ -319,12 +324,15 @@
+ # define os_file_close(file) os_file_close_func(file)
+
+ # define os_aio(type, mode, name, file, buf, offset, offset_high, \
+- n, message1, message2) \
++ n, message1, message2, trx) \
+ os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
+- message1, message2)
++ message1, message2, trx)
+
+ # define os_file_read(file, buf, offset, offset_high, n) \
+- os_file_read_func(file, buf, offset, offset_high, n)
++ os_file_read_func(file, buf, offset, offset_high, n, NULL)
++
++# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
++ os_file_read_func(file, buf, offset, offset_high, n, trx)
+
+ # define os_file_read_no_error_handling(file, buf, offset, \
+ offset_high, n) \
+@@ -690,6 +698,7 @@
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+@@ -744,6 +753,7 @@
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+ /*******************************************************************//**
+@@ -885,7 +895,8 @@
offset where to read */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
/*******************************************************************//**
Rewind file to its start, read at most size - 1 bytes from it to str, and
NUL-terminate str. All errors are silently ignored. This function is
-@@ -646,10 +651,11 @@
+@@ -1044,10 +1055,11 @@
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
-diff -ruN a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h
---- a/storage/innodb_plugin/include/srv0srv.h 2010-08-27 16:27:30.225055856 +0900
-+++ b/storage/innodb_plugin/include/srv0srv.h 2010-08-27 16:30:47.367988259 +0900
-@@ -62,6 +62,9 @@
+diff -ruN a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
+--- a/storage/innobase/include/os0file.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/os0file.ic 2010-12-03 17:42:42.102024458 +0900
+@@ -229,6 +229,7 @@
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ {
+@@ -244,7 +245,7 @@
+ src_file, src_line);
+
+ result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
+- n, message1, message2);
++ n, message1, message2, trx);
+
+ register_pfs_file_io_end(locker, n);
+
+@@ -268,6 +269,7 @@
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
++ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+ {
+@@ -278,7 +280,7 @@
+ register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+ src_file, src_line);
+
+- result = os_file_read_func(file, buf, offset, offset_high, n);
++ result = os_file_read_func(file, buf, offset, offset_high, n, trx);
+
+ register_pfs_file_io_end(locker, n);
+
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 17:32:15.634987408 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 17:42:42.104028644 +0900
+@@ -71,6 +71,9 @@
#define SRV_AUTO_EXTEND_INCREMENT \
(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
/* This is set to TRUE if the MySQL user has set it in MySQL */
extern ibool srv_lower_case_table_names;
-diff -ruN a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
---- a/storage/innodb_plugin/include/trx0trx.h 2010-08-27 16:08:45.301058614 +0900
-+++ b/storage/innodb_plugin/include/trx0trx.h 2010-08-27 16:30:47.369989369 +0900
+diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
+--- a/storage/innobase/include/trx0trx.h 2010-12-03 15:41:52.049372966 +0900
++++ b/storage/innobase/include/trx0trx.h 2010-12-03 17:42:42.107024532 +0900
@@ -738,6 +738,17 @@
/*------------------------------*/
char detailed_error[256]; /*!< detailed error message for last
};
#define TRX_MAX_N_THREADS 32 /* maximum number of
-diff -ruN a/storage/innodb_plugin/lock/lock0lock.c b/storage/innodb_plugin/lock/lock0lock.c
---- a/storage/innodb_plugin/lock/lock0lock.c 2010-08-27 15:52:14.332058513 +0900
-+++ b/storage/innodb_plugin/lock/lock0lock.c 2010-08-27 16:30:47.374058285 +0900
-@@ -1757,6 +1757,8 @@
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c 2010-12-03 15:09:51.297986437 +0900
++++ b/storage/innobase/lock/lock0lock.c 2010-12-03 17:42:42.111024587 +0900
+@@ -1755,6 +1755,8 @@
{
lock_t* lock;
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
-@@ -1815,6 +1817,10 @@
+@@ -1813,6 +1815,10 @@
trx->que_state = TRX_QUE_LOCK_WAIT;
trx->was_chosen_as_deadlock_victim = FALSE;
trx->wait_started = time(NULL);
ut_a(que_thr_stop(thr));
-@@ -3695,6 +3701,8 @@
+@@ -3692,6 +3698,8 @@
{
lock_t* lock;
trx_t* trx;
ut_ad(mutex_own(&kernel_mutex));
-@@ -3750,6 +3758,10 @@
+@@ -3747,6 +3755,10 @@
return(DB_SUCCESS);
}
trx->que_state = TRX_QUE_LOCK_WAIT;
trx->was_chosen_as_deadlock_victim = FALSE;
trx->wait_started = time(NULL);
-diff -ruN a/storage/innodb_plugin/os/os0file.c b/storage/innodb_plugin/os/os0file.c
---- a/storage/innodb_plugin/os/os0file.c 2010-08-27 16:23:31.038058669 +0900
-+++ b/storage/innodb_plugin/os/os0file.c 2010-08-27 16:30:47.380058815 +0900
-@@ -38,6 +38,8 @@
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c 2010-12-03 17:32:15.644024974 +0900
++++ b/storage/innobase/os/os0file.c 2010-12-03 17:42:42.117023467 +0900
+@@ -43,6 +43,8 @@
#include "srv0start.h"
#include "fil0fil.h"
#include "buf0buf.h"
#include "log0recv.h"
#ifndef UNIV_HOTBACKUP
# include "os0sync.h"
-@@ -2087,22 +2089,30 @@
- /*******************************************************************//**
- Does a synchronous read operation in Posix.
- @return number of bytes read, -1 if error */
-+#define os_file_pread(file, buf, n, offset, offset_high) \
-+ _os_file_pread(file, buf, n, offset, offset_high, NULL);
-+
- static
- ssize_t
--os_file_pread(
-+_os_file_pread(
- /*==========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
+@@ -2177,13 +2179,18 @@
ulint n, /*!< in: number of bytes to read */
ulint offset, /*!< in: least significant 32 bits of file
offset from where to read */
ut_a((offset & 0xFFFFFFFFUL) == offset);
-@@ -2123,6 +2133,15 @@
+@@ -2204,6 +2211,15 @@
os_n_file_reads++;
#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
os_mutex_enter(os_file_count_mutex);
os_file_n_pending_preads++;
-@@ -2136,6 +2155,13 @@
+@@ -2217,6 +2233,13 @@
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
return(n_bytes);
#else
{
-@@ -2172,6 +2198,13 @@
+@@ -2253,6 +2276,13 @@
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
return(ret);
}
#endif
-@@ -2302,7 +2335,7 @@
- @return TRUE if request was successful, FALSE if fail */
- UNIV_INTERN
- ibool
--os_file_read(
-+_os_file_read(
- /*=========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
-@@ -2310,7 +2343,8 @@
+@@ -2393,7 +2423,8 @@
offset where to read */
ulint offset_high, /*!< in: most significant 32 bits of
offset */
{
#ifdef __WIN__
BOOL ret;
-@@ -2385,7 +2419,7 @@
+@@ -2468,7 +2499,7 @@
os_bytes_read_since_printout += n;
try_again:
- ret = os_file_pread(file, buf, n, offset, offset_high);
-+ ret = _os_file_pread(file, buf, n, offset, offset_high, trx);
++ ret = os_file_pread(file, buf, n, offset, offset_high, trx);
if ((ulint)ret == n) {
-@@ -3356,7 +3390,8 @@
+@@ -2597,7 +2628,7 @@
+ os_bytes_read_since_printout += n;
+
+ try_again:
+- ret = os_file_pread(file, buf, n, offset, offset_high);
++ ret = os_file_pread(file, buf, n, offset, offset_high, NULL);
+
+ if ((ulint)ret == n) {
+
+@@ -3619,7 +3650,8 @@
offset */
ulint offset_high, /*!< in: most significant 32 bits of
offset */
+ ulint len, /*!< in: length of the block to read or write */
+ trx_t* trx)
{
- os_aio_slot_t* slot;
- ulint i;
-@@ -3642,10 +3677,11 @@
+ os_aio_slot_t* slot = NULL;
+ #ifdef WIN_ASYNC_IO
+@@ -3991,10 +4023,11 @@
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
-@@ -3687,8 +3723,8 @@
+@@ -4035,8 +4068,8 @@
wait in the Windows case. */
if (type == OS_FILE_READ) {
- return(os_file_read(file, buf, offset,
- offset_high, n));
-+ return(_os_file_read(file, buf, offset,
++ return(os_file_read_trx(file, buf, offset,
+ offset_high, n, trx));
}
ut_a(type == OS_FILE_WRITE);
-@@ -3721,8 +3757,13 @@
+@@ -4074,8 +4107,13 @@
ut_error;
}
- name, buf, offset, offset_high, n);
+ name, buf, offset, offset_high, n, trx);
if (type == OS_FILE_READ) {
- if (os_aio_use_native_aio) {
- #ifdef WIN_ASYNC_IO
-diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
---- a/storage/innodb_plugin/srv/srv0srv.c 2010-08-27 16:27:30.233022109 +0900
-+++ b/storage/innodb_plugin/srv/srv0srv.c 2010-08-27 16:30:47.384058509 +0900
-@@ -86,6 +86,9 @@
- #include "trx0i_s.h"
- #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
+ if (srv_use_native_aio) {
+ os_n_file_reads++;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:32:15.648024399 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 17:45:05.067023254 +0900
+@@ -88,6 +88,9 @@
+ #include "mysql/plugin.h"
+ #include "mysql/service_thd_wait.h"
+/* prototypes for new functions added to ha_innodb.cc */
+ibool innobase_get_slow_log();
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
affects only FOREIGN KEY definition parsing */
UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
-@@ -1155,6 +1158,10 @@
+@@ -1219,6 +1222,10 @@
ibool has_slept = FALSE;
srv_conc_slot_t* slot = NULL;
ulint i;
if (trx->mysql_thd != NULL
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
-@@ -1231,6 +1238,7 @@
+@@ -1295,6 +1302,7 @@
switches. */
if (SRV_THREAD_SLEEP_DELAY > 0) {
os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
}
trx->op_info = "";
-@@ -1286,12 +1294,25 @@
+@@ -1350,6 +1358,13 @@
/* Go to wait for the event; when a thread leaves InnoDB it will
release this thread */
+
trx->op_info = "waiting in InnoDB queue";
- os_event_wait(slot->event);
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
+@@ -1358,6 +1373,12 @@
trx->op_info = "";
os_fast_mutex_lock(&srv_conc_mutex);
srv_conc_n_waiting_threads--;
-diff -ruN a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
---- a/storage/innodb_plugin/trx/trx0trx.c 2010-08-27 16:08:45.306058896 +0900
-+++ b/storage/innodb_plugin/trx/trx0trx.c 2010-08-27 16:30:47.387058330 +0900
-@@ -179,6 +179,15 @@
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c 2010-12-03 15:41:52.053955669 +0900
++++ b/storage/innobase/trx/trx0trx.c 2010-12-03 17:42:42.127023410 +0900
+@@ -185,6 +185,15 @@
trx->global_read_view = NULL;
trx->read_view = NULL;
/* Set X/Open XA transaction identification to NULL */
memset(&trx->xid, 0, sizeof(trx->xid));
trx->xid.formatID = -1;
-@@ -216,6 +225,11 @@
+@@ -222,6 +231,11 @@
trx->mysql_process_no = os_proc_get_number();
return(trx);
}
-@@ -347,6 +361,12 @@
+@@ -353,6 +367,12 @@
/*===============*/
trx_t* trx) /*!< in, own: trx object */
{
mutex_enter(&kernel_mutex);
UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-@@ -368,6 +388,12 @@
+@@ -374,6 +394,12 @@
/*====================*/
trx_t* trx) /*!< in, own: trx object */
{
mutex_enter(&kernel_mutex);
trx_free(trx);
-@@ -1093,6 +1119,9 @@
+@@ -1091,6 +1117,9 @@
trx_t* trx) /*!< in: transaction */
{
que_thr_t* thr;
ut_ad(mutex_own(&kernel_mutex));
ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-@@ -1107,6 +1136,11 @@
+@@ -1105,6 +1134,11 @@
thr = UT_LIST_GET_FIRST(trx->wait_thrs);
}
trx->que_state = TRX_QUE_RUNNING;
}
-@@ -1120,6 +1154,9 @@
+@@ -1118,6 +1152,9 @@
trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
{
que_thr_t* thr;
ut_ad(mutex_own(&kernel_mutex));
ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
-@@ -1134,6 +1171,11 @@
+@@ -1132,6 +1169,11 @@
thr = UT_LIST_GET_FIRST(trx->wait_thrs);
}
--- /dev/null
+# name : innodb_extra_rseg.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:18:48.879955903 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:22:53.779955671 +0900
+@@ -11330,6 +11330,11 @@
+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+ NULL, NULL, 0, 0, 1, 0);
+
++static MYSQL_SYSVAR_ULONG(extra_rsegments, srv_extra_rsegments,
++ PLUGIN_VAR_RQCMDARG,
++ "Number of extra user rollback segments which are used in a round-robin fashion.",
++ NULL, NULL, 127, 0, 127, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -11395,6 +11400,7 @@
+ MYSQL_SYSVAR(read_ahead),
+ MYSQL_SYSVAR(adaptive_flushing_method),
+ MYSQL_SYSVAR(enable_unsafe_group_commit),
++ MYSQL_SYSVAR(extra_rsegments),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(use_native_aio),
+ MYSQL_SYSVAR(change_buffering),
+@@ -11423,6 +11429,7 @@
+ innobase_system_variables, /* system variables */
+ NULL /* reserved */
+ },
++i_s_innodb_rseg,
+ i_s_innodb_trx,
+ i_s_innodb_locks,
+ i_s_innodb_lock_waits,
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:06:58.742986460 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:33:08.790070078 +0900
+@@ -45,6 +45,8 @@
+ #include "srv0start.h" /* for srv_was_started */
+ #include "trx0i_s.h"
+ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
++#include "trx0rseg.h" /* for trx_rseg_struct */
++#include "trx0sys.h" /* for trx_sys */
+ }
+
+ static const char plugin_author[] = "Innobase Oy";
+@@ -1782,3 +1784,166 @@
+
+ DBUG_RETURN(0);
+ }
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] =
++{
++ {STRUCT_FLD(field_name, "rseg_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "space_id"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "zip_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "page_no"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "max_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "curr_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static
++int
++i_s_innodb_rseg_fill(
++/*=================*/
++ THD* thd, /* in: thread */
++ TABLE_LIST* tables, /* in/out: tables to fill */
++ COND* cond) /* in: condition (ignored) */
++{
++ TABLE* table = (TABLE *) tables->table;
++ int status = 0;
++ trx_rseg_t* rseg;
++
++ DBUG_ENTER("i_s_innodb_rseg_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
++
++ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
++
++ while (rseg) {
++ table->field[0]->store(rseg->id);
++ table->field[1]->store(rseg->space);
++ table->field[2]->store(rseg->zip_size);
++ table->field[3]->store(rseg->page_no);
++ table->field[4]->store(rseg->max_size);
++ table->field[5]->store(rseg->curr_size);
++
++ if (schema_table_store_record(thd, table)) {
++ status = 1;
++ break;
++ }
++
++ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
++ }
++
++ DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_rseg_init(
++/*=================*/
++ /* out: 0 on success */
++ void* p) /* in/out: table schema object */
++{
++ DBUG_ENTER("i_s_innodb_rseg_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_rseg_fields_info;
++ schema->fill_table = i_s_innodb_rseg_fill;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_rseg =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_RSEG"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB rollback segment information"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, i_s_innodb_rseg_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h 2010-12-03 15:06:58.744953398 +0900
++++ b/storage/innobase/handler/i_s.h 2010-12-03 15:22:53.783953418 +0900
+@@ -33,5 +33,6 @@
+ extern struct st_mysql_plugin i_s_innodb_cmp_reset;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
++extern struct st_mysql_plugin i_s_innodb_rseg;
+
+ #endif /* i_s_h */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:18:48.894029379 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:22:53.786986025 +0900
+@@ -225,6 +225,8 @@
+ extern ulint srv_read_ahead;
+ extern ulint srv_adaptive_flushing_method;
+
++extern ulint srv_extra_rsegments;
++
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:18:48.913956140 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:22:53.789987037 +0900
+@@ -411,6 +411,8 @@
+ UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+ UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
+ UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
++
++UNIV_INTERN ulint srv_extra_rsegments = 127; /* extra rseg for users */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+ UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c 2010-12-03 15:18:48.918982105 +0900
++++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:22:53.792983193 +0900
+@@ -619,7 +619,7 @@
+
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+
+- if (rseg == NULL) {
++ if (rseg == NULL || rseg->id > srv_extra_rsegments) {
+ rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ }
+
+@@ -627,7 +627,8 @@
+ it */
+
+ if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
+- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
++ && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)
++ && srv_extra_rsegments > 0) {
+ goto loop;
+ }
+
--- /dev/null
+# name : innodb_fast_checksum.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-04 15:52:23.391514910 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-04 15:53:45.013513772 +0900
+@@ -511,6 +511,27 @@
+ return(checksum);
+ }
+
++UNIV_INTERN
++ulint
++buf_calc_page_new_checksum_32(
++/*==========================*/
++ const byte* page) /*!< in: buffer page */
++{
++ ulint checksum;
++
++ checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
++ FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
++ + ut_fold_binary(page + FIL_PAGE_DATA,
++ FIL_PAGE_DATA_ALIGN_32 - FIL_PAGE_DATA)
++ + ut_fold_binary_32(page + FIL_PAGE_DATA_ALIGN_32,
++ UNIV_PAGE_SIZE - FIL_PAGE_DATA_ALIGN_32
++ - FIL_PAGE_END_LSN_OLD_CHKSUM);
++
++ checksum = checksum & 0xFFFFFFFFUL;
++
++ return(checksum);
++}
++
+ /********************************************************************//**
+ In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+ looked at the first few bytes of the page. This calculates that old
+@@ -627,9 +648,21 @@
+ /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
+ (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
+
+- if (checksum_field != 0
++ if (!srv_fast_checksum
++ && checksum_field != 0
++ && checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && checksum_field
++ != buf_calc_page_new_checksum(read_buf)) {
++
++ return(TRUE);
++ }
++
++ if (srv_fast_checksum
++ && checksum_field != 0
+ && checksum_field != BUF_NO_CHECKSUM_MAGIC
+ && checksum_field
++ != buf_calc_page_new_checksum_32(read_buf)
++ && checksum_field
+ != buf_calc_page_new_checksum(read_buf)) {
+
+ return(TRUE);
+@@ -653,6 +686,7 @@
+ dict_index_t* index;
+ #endif /* !UNIV_HOTBACKUP */
+ ulint checksum;
++ ulint checksum_32;
+ ulint old_checksum;
+ ulint size = zip_size;
+
+@@ -739,12 +773,14 @@
+
+ checksum = srv_use_checksums
+ ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
++ checksum_32 = srv_use_checksums
++ ? buf_calc_page_new_checksum_32(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+ old_checksum = srv_use_checksums
+ ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+- " InnoDB: Page checksum %lu, prior-to-4.0.14-form"
++ " InnoDB: Page checksum %lu (32bit_calc: %lu), prior-to-4.0.14-form"
+ " checksum %lu\n"
+ "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
+ " stored checksum %lu\n"
+@@ -753,7 +789,7 @@
+ "InnoDB: Page number (if stored to page already) %lu,\n"
+ "InnoDB: space id (if created with >= MySQL-4.1.1"
+ " and stored already) %lu\n",
+- (ulong) checksum, (ulong) old_checksum,
++ (ulong) checksum, (ulong) checksum_32, (ulong) old_checksum,
+ (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+ (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+ - FIL_PAGE_END_LSN_OLD_CHKSUM),
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-12-04 15:37:50.555568346 +0900
++++ b/storage/innobase/buf/buf0flu.c 2010-12-04 15:53:45.015513917 +0900
+@@ -1027,7 +1027,9 @@
+
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+ srv_use_checksums
+- ? buf_calc_page_new_checksum(page)
++ ? (!srv_fast_checksum
++ ? buf_calc_page_new_checksum(page)
++ : buf_calc_page_new_checksum_32(page))
+ : BUF_NO_CHECKSUM_MAGIC);
+
+ /* We overwrite the first 4 bytes of the end lsn field to store
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-04 15:52:23.406513743 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:53:45.020513800 +0900
+@@ -3171,7 +3171,9 @@
+ mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+ srv_use_checksums
+- ? buf_calc_page_new_checksum(page)
++ ? (!srv_fast_checksum
++ ? buf_calc_page_new_checksum(page)
++ : buf_calc_page_new_checksum_32(page))
+ : BUF_NO_CHECKSUM_MAGIC);
+ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ srv_use_checksums
+@@ -3303,7 +3305,8 @@
+ page_is_corrupt = TRUE;
+ }
+
+- if (checksum_field != 0
++ if (!srv_fast_checksum
++ && checksum_field != 0
+ && checksum_field != BUF_NO_CHECKSUM_MAGIC
+ && checksum_field
+ != buf_calc_page_new_checksum(page)) {
+@@ -3311,6 +3314,17 @@
+ page_is_corrupt = TRUE;
+ }
+
++ if (srv_fast_checksum
++ && checksum_field != 0
++ && checksum_field != BUF_NO_CHECKSUM_MAGIC
++ && checksum_field
++ != buf_calc_page_new_checksum_32(page)
++ && checksum_field
++ != buf_calc_page_new_checksum(page)) {
++
++ page_is_corrupt = TRUE;
++ }
++
+ /* if it is free page, inconsistency is acceptable */
+ if (!offset) {
+ /* header page*/
+@@ -3456,7 +3470,9 @@
+
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
+ srv_use_checksums
+- ? buf_calc_page_new_checksum(page)
++ ? (!srv_fast_checksum
++ ? buf_calc_page_new_checksum(page)
++ : buf_calc_page_new_checksum_32(page))
+ : BUF_NO_CHECKSUM_MAGIC);
+ mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ srv_use_checksums
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:52:23.420480329 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:53:45.029551892 +0900
+@@ -183,6 +183,7 @@
+ #endif /* UNIV_LOG_ARCHIVE */
+ static my_bool innobase_use_doublewrite = TRUE;
+ static my_bool innobase_use_checksums = TRUE;
++static my_bool innobase_fast_checksum = FALSE;
+ static my_bool innobase_recovery_stats = TRUE;
+ static my_bool innobase_locks_unsafe_for_binlog = FALSE;
+ static my_bool innobase_overwrite_relay_log_info = FALSE;
+@@ -2573,6 +2574,7 @@
+
+ srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+ srv_use_checksums = (ibool) innobase_use_checksums;
++ srv_fast_checksum = (ibool) innobase_fast_checksum;
+
+ #ifdef HAVE_LARGE_PAGES
+ if ((os_use_large_pages = (ibool) my_use_large_pages))
+@@ -11321,6 +11323,15 @@
+ "Disable with --skip-innodb-checksums.",
+ NULL, NULL, TRUE);
+
++static MYSQL_SYSVAR_BOOL(fast_checksum, innobase_fast_checksum,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "Change the algorithm of checksum for the whole of datapage to 4-bytes word based. "
++ "The original checksum is checked after the new one. It may be slow for reading page"
++ " which has orginal checksum. Overwrite the page or recreate the InnoDB database, "
++ "if you want the entire benefit for performance at once. "
++ "#### Attention: The checksum is not compatible for normal or disabled version! ####",
++ NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
+ PLUGIN_VAR_READONLY,
+ "The common part for InnoDB table spaces.",
+@@ -11833,6 +11844,7 @@
+ MYSQL_SYSVAR(buffer_pool_size),
+ MYSQL_SYSVAR(buffer_pool_instances),
+ MYSQL_SYSVAR(checksums),
++ MYSQL_SYSVAR(fast_checksum),
+ MYSQL_SYSVAR(commit_concurrency),
+ MYSQL_SYSVAR(concurrency_tickets),
+ MYSQL_SYSVAR(data_file_path),
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-04 15:52:23.458514045 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-04 15:53:45.044514150 +0900
+@@ -531,6 +531,11 @@
+ buf_calc_page_new_checksum(
+ /*=======================*/
+ const byte* page); /*!< in: buffer page */
++UNIV_INTERN
++ulint
++buf_calc_page_new_checksum_32(
++/*==========================*/
++ const byte* page); /*!< in: buffer page */
+ /********************************************************************//**
+ In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
+ looked at the first few bytes of the page. This calculates that old
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h 2010-12-04 15:52:23.466513796 +0900
++++ b/storage/innobase/include/fil0fil.h 2010-12-04 15:53:45.046513558 +0900
+@@ -118,6 +118,7 @@
+ #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
+ contains the space id of the page */
+ #define FIL_PAGE_DATA 38 /*!< start of the data on the page */
++#define FIL_PAGE_DATA_ALIGN_32 40
+ /* @} */
+ /** File page trailer @{ */
+ #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-04 15:52:23.474482590 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:53:45.048512100 +0900
+@@ -226,6 +226,7 @@
+
+ extern ibool srv_use_doublewrite_buf;
+ extern ibool srv_use_checksums;
++extern ibool srv_fast_checksum;
+
+ extern ulong srv_max_buf_pool_modified_pct;
+ extern ulong srv_max_purge_lag;
+diff -ruN a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
+--- a/storage/innobase/include/ut0rnd.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ut0rnd.h 2010-12-04 15:53:45.049510146 +0900
+@@ -124,6 +124,13 @@
+ const byte* str, /*!< in: string of bytes */
+ ulint len) /*!< in: length */
+ __attribute__((pure));
++UNIV_INLINE
++ulint
++ut_fold_binary_32(
++/*==============*/
++ const byte* str, /*!< in: string of bytes */
++ ulint len) /*!< in: length */
++ __attribute__((pure));
+ /***********************************************************//**
+ Looks for a prime number slightly greater than the given argument.
+ The prime is chosen so that it is not near any power of 2.
+diff -ruN a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
+--- a/storage/innobase/include/ut0rnd.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ut0rnd.ic 2010-12-04 15:53:45.050565975 +0900
+@@ -226,3 +226,28 @@
+
+ return(fold);
+ }
++
++UNIV_INLINE
++ulint
++ut_fold_binary_32(
++/*==============*/
++ const byte* str, /*!< in: string of bytes */
++ ulint len) /*!< in: length */
++{
++ const ib_uint32_t* str_end = (const ib_uint32_t*) (str + len);
++ const ib_uint32_t* str_32 = (const ib_uint32_t*) str;
++ ulint fold = 0;
++
++ ut_ad(str);
++ /* This function is only for word-aligned data */
++ ut_ad(len % 4 == 0);
++ ut_ad((ulint)str % 4 == 0);
++
++ while (str_32 < str_end) {
++ fold = ut_fold_ulint_pair(fold, (ulint)(*str_32));
++
++ str_32++;
++ }
++
++ return(fold);
++}
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:52:23.498513634 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:53:45.053550283 +0900
+@@ -412,6 +412,7 @@
+
+ UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
+ UNIV_INTERN ibool srv_use_checksums = TRUE;
++UNIV_INTERN ibool srv_fast_checksum = FALSE;
+
+ UNIV_INTERN ulong srv_replication_delay = 0;
+
--- /dev/null
+# name : innodb_files_extend.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-04 15:55:21.358513751 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:55:58.243481131 +0900
+@@ -4800,9 +4800,9 @@
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(buf);
+ ut_ad(len > 0);
+-#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
+-# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
+-#endif
++//#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
++//# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
++//#endif
+ ut_ad(fil_validate());
+ #ifndef UNIV_HOTBACKUP
+ # ifndef UNIV_LOG_DEBUG
+diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
+--- a/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:52:23.411513754 +0900
++++ b/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:55:58.244514273 +0900
+@@ -657,15 +657,16 @@
+ ulint offset) /*!< in: page offset */
+ {
+ #ifndef DOXYGEN /* Doxygen gets confused of these */
+-# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \
+- + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+-# error
+-# endif
++//# if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET
++// + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
++//# error
++//# endif
+ # if PAGE_ZIP_MIN_SIZE <= XDES_ARR_OFFSET \
+ + (PAGE_ZIP_MIN_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE
+ # error
+ # endif
+ #endif /* !DOXYGEN */
++ ut_a(UNIV_PAGE_SIZE > XDES_ARR_OFFSET + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) * XDES_SIZE);
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (!zip_size) {
+@@ -1464,12 +1465,12 @@
+ mtr);
+ xdes_init(descr, mtr);
+
+-#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
+-# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
+-#endif
+-#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
+-# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
+-#endif
++//#if UNIV_PAGE_SIZE % FSP_EXTENT_SIZE
++//# error "UNIV_PAGE_SIZE % FSP_EXTENT_SIZE != 0"
++//#endif
++//#if PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE
++//# error "PAGE_ZIP_MIN_SIZE % FSP_EXTENT_SIZE != 0"
++//#endif
+
+ if (UNIV_UNLIKELY(init_xdes)) {
+
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:55:21.367482924 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:55:58.248549631 +0900
+@@ -148,6 +148,9 @@
+ static ulong innobase_write_io_threads;
+ static long innobase_buffer_pool_instances = 1;
+
++static ulong innobase_page_size;
++static ulong innobase_log_block_size;
++
+ static my_bool innobase_thread_concurrency_timer_based;
+ static long long innobase_buffer_pool_size, innobase_log_file_size;
+
+@@ -2269,6 +2272,62 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ srv_page_size = 0;
++ srv_page_size_shift = 0;
++
++ if (innobase_page_size != (1 << 14)) {
++ uint n_shift;
++
++ fprintf(stderr,
++ "InnoDB: Warning: innodb_page_size has been changed from default value 16384. (###EXPERIMENTAL### operation)\n");
++ for (n_shift = 12; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) {
++ if (innobase_page_size == ((ulong)1 << n_shift)) {
++ srv_page_size_shift = n_shift;
++ srv_page_size = (1 << srv_page_size_shift);
++ fprintf(stderr,
++ "InnoDB: The universal page size of the database is set to %lu.\n",
++ srv_page_size);
++ break;
++ }
++ }
++ } else {
++ srv_page_size_shift = 14;
++ srv_page_size = (1 << srv_page_size_shift);
++ }
++
++ if (!srv_page_size_shift) {
++ fprintf(stderr,
++ "InnoDB: Error: %lu is not valid value for innodb_page_size.\n",
++ innobase_page_size);
++ goto error;
++ }
++
++ srv_log_block_size = 0;
++ if (innobase_log_block_size != (1 << 9)) { /*!=512*/
++ uint n_shift;
++
++ fprintf(stderr,
++ "InnoDB: Warning: innodb_log_block_size has been changed from default value 512. (###EXPERIMENTAL### operation)\n");
++ for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; n_shift++) {
++ if (innobase_log_block_size == ((ulong)1 << n_shift)) {
++ srv_log_block_size = (1 << n_shift);
++ fprintf(stderr,
++ "InnoDB: The log block size is set to %lu.\n",
++ srv_log_block_size);
++ break;
++ }
++ }
++ } else {
++ srv_log_block_size = 512;
++ }
++
++ if (!srv_log_block_size) {
++ fprintf(stderr,
++ "InnoDB: Error: %lu is not valid value for innodb_log_block_size.\n",
++ innobase_log_block_size);
++ goto error;
++ }
++
+ #ifndef MYSQL_SERVER
+ innodb_overwrite_relay_log_info = FALSE;
+ #endif
+@@ -7212,9 +7271,9 @@
+ | DICT_TF_COMPACT
+ | DICT_TF_FORMAT_ZIP
+ << DICT_TF_FORMAT_SHIFT;
+-#if DICT_TF_ZSSIZE_MAX < 1
+-# error "DICT_TF_ZSSIZE_MAX < 1"
+-#endif
++//#if DICT_TF_ZSSIZE_MAX < 1
++//# error "DICT_TF_ZSSIZE_MAX < 1"
++//#endif
+ }
+ }
+
+@@ -11332,6 +11391,16 @@
+ "#### Attention: The checksum is not compatible for normal or disabled version! ####",
+ NULL, NULL, FALSE);
+
++static MYSQL_SYSVAR_ULONG(page_size, innobase_page_size,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "###EXPERIMENTAL###: The universal page size of the database. Changing for created database is not supported. Use on your own risk!",
++ NULL, NULL, (1 << 14), (1 << 12), (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
++
++static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!",
++ NULL, NULL, (1 << 9)/*512*/, (1 << 9)/*512*/, (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
++
+ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
+ PLUGIN_VAR_READONLY,
+ "The common part for InnoDB table spaces.",
+@@ -11839,6 +11908,8 @@
+ NULL, NULL, 0, 0, 1, 0);
+
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
++ MYSQL_SYSVAR(page_size),
++ MYSQL_SYSVAR(log_block_size),
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+ MYSQL_SYSVAR(buffer_pool_size),
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-04 15:55:21.375482937 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-04 15:55:58.258469088 +0900
+@@ -1602,7 +1602,7 @@
+ time_t last_printout_time;
+ /*!< when buf_print_io was last time
+ called */
+- buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES + 1];
++ buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
+ /*!< Statistics of buddy system,
+ indexed by block size */
+ buf_pool_stat_t stat; /*!< current statistics */
+@@ -1698,7 +1698,7 @@
+ /* @{ */
+ UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
+ /*!< unmodified compressed pages */
+- UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES];
++ UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX];
+ /*!< buddy free lists */
+
+ buf_page_t watch[BUF_POOL_WATCH_SIZE];
+@@ -1706,9 +1706,9 @@
+ pool watches. Protected by
+ buf_pool->mutex. */
+
+-#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
+-# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
+-#endif
++//#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
++//# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
++//#endif
+ #if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE
+ # error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE"
+ #endif
+diff -ruN a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
+--- a/storage/innobase/include/buf0types.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0types.h 2010-12-04 15:55:58.259482590 +0900
+@@ -72,6 +72,7 @@
+ buddy system; must be at least
+ sizeof(buf_page_t) */
+ #define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
++#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX - BUF_BUDDY_LOW_SHIFT)
+ /*!< number of buddy sizes */
+
+ /** twice the maximum block size of the buddy system;
+diff -ruN a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
+--- a/storage/innobase/include/log0log.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/log0log.h 2010-12-09 18:16:47.737728305 +0900
+@@ -672,6 +672,9 @@
+ when mysqld is first time started
+ on the restored database, it can
+ print helpful info for the user */
++#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64
++ /* extend to record log_block_size
++ of XtraDB. 0 means default 512 */
+ #define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
+ /* this 4-byte field is TRUE when
+ the writing of an archived log file
+diff -ruN a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
+--- a/storage/innobase/include/mtr0log.ic 2010-12-04 15:37:50.590551517 +0900
++++ b/storage/innobase/include/mtr0log.ic 2010-12-04 15:55:58.260482404 +0900
+@@ -203,7 +203,7 @@
+ system tablespace */
+ if ((space == TRX_SYS_SPACE
+ || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
+- && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
++ && offset >= (ulint)FSP_EXTENT_SIZE && offset < 3 * (ulint)FSP_EXTENT_SIZE) {
+ if (trx_doublewrite_buf_is_being_created) {
+ /* Do nothing: we only come to this branch in an
+ InnoDB database creation. We do not redo log
+diff -ruN a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
+--- a/storage/innobase/include/os0file.h 2010-12-09 18:16:02.323727427 +0900
++++ b/storage/innobase/include/os0file.h 2010-12-09 18:16:47.738694194 +0900
+@@ -101,7 +101,7 @@
+ if this fails for a log block, then it is equivalent to a media failure in the
+ log. */
+
+-#define OS_FILE_LOG_BLOCK_SIZE 512
++#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size
+
+ /** Options for file_create @{ */
+ #define OS_FILE_OPEN 51
+@@ -190,6 +190,8 @@
+ extern ulint os_n_file_writes;
+ extern ulint os_n_fsyncs;
+
++extern ulint srv_log_block_size;
++
+ #ifdef UNIV_PFS_IO
+ /* Keys to register InnoDB I/O with performance schema */
+ extern mysql_pfs_key_t innodb_file_data_key;
+diff -ruN a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
+--- a/storage/innobase/include/page0types.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0types.h 2010-12-04 15:55:58.261483930 +0900
+@@ -56,8 +56,9 @@
+
+ /** Number of supported compressed page sizes */
+ #define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
+-#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)
+-# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)"
++#define PAGE_ZIP_NUM_SSIZE_MAX (UNIV_PAGE_SIZE_SHIFT_MAX - PAGE_ZIP_MIN_SIZE_SHIFT + 2)
++#if PAGE_ZIP_NUM_SSIZE_MAX > (1 << PAGE_ZIP_SSIZE_BITS)
++# error "PAGE_ZIP_NUM_SSIZE_MAX > (1 << PAGE_ZIP_SSIZE_BITS)"
+ #endif
+
+ /** Compressed page descriptor */
+@@ -98,7 +99,7 @@
+ typedef struct page_zip_stat_struct page_zip_stat_t;
+
+ /** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
+-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
++extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE_MAX - 1];
+
+ /**********************************************************************//**
+ Write the "deleted" flag of a record on a compressed page. The flag must
+diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
+--- a/storage/innobase/include/trx0sys.h 2010-12-04 15:37:50.593480594 +0900
++++ b/storage/innobase/include/trx0sys.h 2010-12-04 15:55:58.262549554 +0900
+@@ -521,9 +521,9 @@
+ /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
+ #define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
+
+-#if UNIV_PAGE_SIZE < 4096
+-# error "UNIV_PAGE_SIZE < 4096"
+-#endif
++//#if UNIV_PAGE_SIZE < 4096
++//# error "UNIV_PAGE_SIZE < 4096"
++//#endif
+ /** The offset of the MySQL replication info in the trx system header;
+ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+ #define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
+diff -ruN a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
+--- a/storage/innobase/include/univ.i 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/univ.i 2010-12-04 15:55:58.263549721 +0900
+@@ -292,9 +292,13 @@
+ */
+
+ /* The 2-logarithm of UNIV_PAGE_SIZE: */
+-#define UNIV_PAGE_SIZE_SHIFT 14
++/* #define UNIV_PAGE_SIZE_SHIFT 14 */
++#define UNIV_PAGE_SIZE_SHIFT_MAX 14
++#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
+ /* The universal page size of the database */
+-#define UNIV_PAGE_SIZE (1 << UNIV_PAGE_SIZE_SHIFT)
++/* #define UNIV_PAGE_SIZE (1 << UNIV_PAGE_SIZE_SHIFT) */
++#define UNIV_PAGE_SIZE srv_page_size
++#define UNIV_PAGE_SIZE_MAX (1 << UNIV_PAGE_SIZE_SHIFT_MAX)
+
+ /* Maximum number of parallel threads in a parallelized operation */
+ #define UNIV_MAX_PARALLELISM 32
+@@ -401,7 +405,7 @@
+ stored part of the field in the tablespace. The length field then
+ contains the sum of the following flag and the locally stored len. */
+
+-#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
++#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_MAX)
+
+ /* Some macros to improve branch prediction and reduce cache misses */
+ #if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+@@ -504,4 +508,6 @@
+ UNIV_MEM_ALLOC(addr, size); \
+ } while (0)
+
++extern ulint srv_page_size_shift;
++extern ulint srv_page_size;
+ #endif
+diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
+--- a/storage/innobase/log/log0log.c 2010-12-03 15:18:48.899986203 +0900
++++ b/storage/innobase/log/log0log.c 2010-12-04 15:55:58.266551567 +0900
+@@ -603,7 +603,9 @@
+
+ offset = (gr_lsn_size_offset + difference) % group_size;
+
++ if (sizeof(ulint) == 4) {
+ ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
++ }
+
+ /* fprintf(stderr,
+ "Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
+@@ -1200,6 +1202,9 @@
+ /* Wipe over possible label of ibbackup --restore */
+ memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
+
++ mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE,
++ srv_log_block_size);
++
+ dest_offset = nth_file * group->file_size;
+
+ #ifdef UNIV_DEBUG
+@@ -1793,9 +1798,7 @@
+ ulint i;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+-#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
+-# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
+-#endif
++ ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
+
+ buf = group->checkpoint_buf;
+
+@@ -1809,6 +1812,7 @@
+ mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+
+ #ifdef UNIV_LOG_ARCHIVE
++#error "UNIV_LOG_ARCHIVE could not be enabled"
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ archived_lsn = IB_ULONGLONG_MAX;
+ } else {
+@@ -1822,7 +1826,9 @@
+
+ mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
+ #else /* UNIV_LOG_ARCHIVE */
+- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
++ mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN,
++ (ib_uint64_t)log_group_calc_lsn_offset(
++ log_sys->next_checkpoint_lsn, group));
+ #endif /* UNIV_LOG_ARCHIVE */
+
+ for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c 2010-12-03 17:32:15.638986405 +0900
++++ b/storage/innobase/log/log0recv.c 2010-12-04 15:55:58.269550689 +0900
+@@ -712,8 +712,22 @@
+
+ group->lsn = mach_read_from_8(
+ buf + LOG_CHECKPOINT_LSN);
++
++#ifdef UNIV_LOG_ARCHIVE
++#error "UNIV_LOG_ARCHIVE could not be enabled"
++#endif
++ {
++ ib_uint64_t tmp_lsn_offset = mach_read_from_8(
++ buf + LOG_CHECKPOINT_ARCHIVED_LSN);
++ if (sizeof(ulint) != 4
++ && tmp_lsn_offset != IB_ULONGLONG_MAX) {
++ group->lsn_offset = (ulint) tmp_lsn_offset;
++ } else {
+ group->lsn_offset = mach_read_from_4(
+ buf + LOG_CHECKPOINT_OFFSET);
++ }
++ }
++
+ checkpoint_no = mach_read_from_8(
+ buf + LOG_CHECKPOINT_NO);
+
+@@ -2955,6 +2969,7 @@
+ log_group_t* max_cp_group;
+ log_group_t* up_to_date_group;
+ ulint max_cp_field;
++ ulint log_hdr_log_block_size;
+ ib_uint64_t checkpoint_lsn;
+ ib_uint64_t checkpoint_no;
+ ib_uint64_t old_scanned_lsn;
+@@ -3056,6 +3071,20 @@
+ log_hdr_buf, max_cp_group);
+ }
+
++ log_hdr_log_block_size
++ = mach_read_from_4(log_hdr_buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE);
++ if (log_hdr_log_block_size == 0) {
++ /* 0 means default value */
++ log_hdr_log_block_size = 512;
++ }
++ if (log_hdr_log_block_size != srv_log_block_size) {
++ fprintf(stderr,
++ "InnoDB: Error: The block size of ib_logfile (%lu) "
++ "is not equal to innodb_log_block_size.\n",
++ log_hdr_log_block_size);
++ return(DB_ERROR);
++ }
++
+ #ifdef UNIV_LOG_ARCHIVE
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+diff -ruN a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
+--- a/storage/innobase/page/page0zip.c 2010-12-04 15:52:23.484482786 +0900
++++ b/storage/innobase/page/page0zip.c 2010-12-04 15:55:58.274551431 +0900
+@@ -49,7 +49,7 @@
+
+ #ifndef UNIV_HOTBACKUP
+ /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+-UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
++UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE_MAX - 1];
+ #endif /* !UNIV_HOTBACKUP */
+
+ /* Please refer to ../include/page0zip.ic for a description of the
+diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
+--- a/storage/innobase/row/row0merge.c 2010-12-04 15:52:23.490513562 +0900
++++ b/storage/innobase/row/row0merge.c 2010-12-04 15:55:58.277550562 +0900
+@@ -97,7 +97,7 @@
+ row_merge_block_t. Thus, it must be able to hold one merge record,
+ whose maximum size is the same as the minimum size of
+ row_merge_block_t. */
+-typedef byte mrec_buf_t[UNIV_PAGE_SIZE];
++typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX];
+
+ /** @brief Merge record in row_merge_block_t.
+
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:55:21.384486344 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:55:58.282550845 +0900
+@@ -239,6 +239,13 @@
+ UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
+ UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
+
++/* The universal page size of the database */
++UNIV_INTERN ulint srv_page_size_shift = 0;
++UNIV_INTERN ulint srv_page_size = 0;
++
++/* The log block size */
++UNIV_INTERN ulint srv_log_block_size = 0;
++
+ /* User settable value of the number of pages that must be present
+ in the buffer cache and accessed sequentially for InnoDB to trigger a
+ readahead request. */
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:52:23.502513556 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:55:58.285550583 +0900
+@@ -1521,10 +1521,12 @@
+ }
+ #endif /* UNIV_LOG_ARCHIVE */
+
+- if (srv_n_log_files * srv_log_file_size >= 262144) {
++ if (sizeof(ulint) == 4
++ && srv_n_log_files * srv_log_file_size
++ >= ((ulint)1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
+ fprintf(stderr,
+ "InnoDB: Error: combined size of log files"
+- " must be < 4 GB\n");
++ " must be < 4 GB on 32-bit systems\n");
+
+ return(DB_ERROR);
+ }
+@@ -1533,7 +1535,7 @@
+
+ for (i = 0; i < srv_n_data_files; i++) {
+ #ifndef __WIN__
+- if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) {
++ if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= ((ulint)1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
+ fprintf(stderr,
+ "InnoDB: Error: file size must be < 4 GB"
+ " with this MySQL binary\n"
--- /dev/null
+# name : innodb_fix_misc.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+#
+# comment: http://lists.mysql.com/commits/112400 is applied also for innodb_plugin
+# to pass innodb_bug53756.test by innodb_plugin
+diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
+--- a/storage/innobase/dict/dict0load.c 2010-12-04 15:37:50.559480289 +0900
++++ b/storage/innobase/dict/dict0load.c 2010-12-04 15:57:53.078513745 +0900
+@@ -1851,6 +1851,8 @@
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
++ table = NULL;
++
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+@@ -1877,15 +1879,17 @@
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+- if (!btr_pcur_is_on_user_rec(&pcur)
+- || rec_get_deleted_flag(rec, 0)) {
++ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ /* Not found */
++ goto func_exit;
++ }
+
+- btr_pcur_close(&pcur);
+- mtr_commit(&mtr);
+- mem_heap_free(heap);
+-
+- return(NULL);
++ /* Find the first record that is not delete marked */
++ while (rec_get_deleted_flag(rec, 0)) {
++ if (!btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
++ goto func_exit;
++ }
++ rec = btr_pcur_get_rec(&pcur);
+ }
+
+ /*---------------------------------------------------*/
+@@ -1898,12 +1902,7 @@
+
+ /* Check if the table id in record is the one searched for */
+ if (table_id != mach_read_from_8(field)) {
+-
+- btr_pcur_close(&pcur);
+- mtr_commit(&mtr);
+- mem_heap_free(heap);
+-
+- return(NULL);
++ goto func_exit;
+ }
+
+ /* Now we get the table name from the record */
+@@ -1911,7 +1910,7 @@
+ /* Load the table definition to memory */
+ table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len),
+ TRUE);
+-
++func_exit:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:57:13.035513990 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:57:53.084513775 +0900
+@@ -12007,7 +12007,7 @@
+ &innobase_storage_engine,
+ innobase_hton_name,
+ "Innobase Oy",
+- "Supports transactions, row-level locking, and foreign keys",
++ "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys",
+ PLUGIN_LICENSE_GPL,
+ innobase_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+diff -ruN a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
+--- a/storage/innobase/include/univ.i 2010-12-04 15:57:13.050485224 +0900
++++ b/storage/innobase/include/univ.i 2010-12-04 15:57:53.091592933 +0900
+@@ -48,6 +48,11 @@
+ #define INNODB_VERSION_MINOR 1
+ #define INNODB_VERSION_BUGFIX 4
+
++#ifndef PERCONA_INNODB_VERSION
++#define PERCONA_INNODB_VERSION 12.1
++#endif
++
++
+ /* The following is the InnoDB version as shown in
+ SELECT plugin_version FROM information_schema.plugins;
+ calculated in make_version_string() in sql/sql_show.cc like this:
+@@ -58,13 +63,15 @@
+ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+
+ /* auxiliary macros to help creating the version as string */
+-#define __INNODB_VERSION(a, b, c) (#a "." #b "." #c)
+-#define _INNODB_VERSION(a, b, c) __INNODB_VERSION(a, b, c)
++#define __INNODB_VERSION(a, b, c, d) (#a "." #b "." #c "-" #d)
++#define _INNODB_VERSION(a, b, c, d) __INNODB_VERSION(a, b, c, d)
++
+
+ #define INNODB_VERSION_STR \
+ _INNODB_VERSION(INNODB_VERSION_MAJOR, \
+ INNODB_VERSION_MINOR, \
+- INNODB_VERSION_BUGFIX)
++ INNODB_VERSION_BUGFIX, \
++ PERCONA_INNODB_VERSION)
+
+ #define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/"
+
+diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
+--- a/storage/innobase/row/row0mysql.c 2010-12-04 15:37:50.598481116 +0900
++++ b/storage/innobase/row/row0mysql.c 2010-12-04 15:57:53.092563335 +0900
+@@ -1194,6 +1194,13 @@
+
+ thr = que_fork_get_first_thr(prebuilt->ins_graph);
+
++ if (!prebuilt->mysql_has_locked && !(prebuilt->table->flags & (DICT_TF2_TEMPORARY << DICT_TF2_SHIFT))) {
++ fprintf(stderr, "InnoDB: Error: row_insert_for_mysql is called without ha_innobase::external_lock()\n");
++ if (trx->mysql_thd != NULL) {
++ innobase_mysql_print_thd(stderr, trx->mysql_thd, 600);
++ }
++ }
++
+ if (prebuilt->sql_stat_start) {
+ node->state = INS_NODE_SET_IX_LOCK;
+ prebuilt->sql_stat_start = FALSE;
+diff -ruN a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
+--- a/storage/innobase/row/row0sel.c 2010-12-04 15:52:23.494514495 +0900
++++ b/storage/innobase/row/row0sel.c 2010-12-04 16:01:38.320883699 +0900
+@@ -3366,6 +3366,7 @@
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ ibool table_lock_waited = FALSE;
++ ibool problematic_use = FALSE;
+
+ rec_offs_init(offsets_);
+
+@@ -3732,6 +3733,17 @@
+
+ /* Do some start-of-statement preparations */
+
++ if (!prebuilt->mysql_has_locked) {
++ if (!(prebuilt->table->flags & (DICT_TF2_TEMPORARY << DICT_TF2_SHIFT))) {
++ fprintf(stderr, "InnoDB: Error: row_search_for_mysql() is called without ha_innobase::external_lock()\n");
++ if (trx->mysql_thd != NULL) {
++ innobase_mysql_print_thd(stderr, trx->mysql_thd, 600);
++ }
++ }
++ problematic_use = TRUE;
++ }
++retry_check:
++
+ if (!prebuilt->sql_stat_start) {
+ /* No need to set an intention lock or assign a read view */
+
+@@ -3742,6 +3754,18 @@
+ " perform a consistent read\n"
+ "InnoDB: but the read view is not assigned!\n",
+ stderr);
++ if (problematic_use) {
++ fprintf(stderr, "InnoDB: It may be caused by calling "
++ "without ha_innobase::external_lock()\n"
++ "InnoDB: For the first-aid, avoiding the crash. "
++ "But it should be fixed ASAP.\n");
++ if (prebuilt->table->flags & (DICT_TF2_TEMPORARY << DICT_TF2_SHIFT)
++ && trx->mysql_thd != NULL) {
++ innobase_mysql_print_thd(stderr, trx->mysql_thd, 600);
++ }
++ prebuilt->sql_stat_start = TRUE;
++ goto retry_check;
++ }
+ trx_print(stderr, trx, 600);
+ fputc('\n', stderr);
+ ut_error;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:57:13.073495392 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 16:02:50.704884053 +0900
+@@ -2032,7 +2032,7 @@
+ if (srv_print_verbose_log) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+- " InnoDB: %s started; "
++ " Percona XtraDB (http://www.percona.com) %s started; "
+ "log sequence number %llu\n",
+ INNODB_VERSION_STR, srv_start_lsn);
+ }
+diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
+--- a/storage/innobase/trx/trx0purge.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/trx/trx0purge.c 2010-12-04 15:57:53.106551154 +0900
+@@ -1131,8 +1131,7 @@
+ /* If we cannot advance the 'purge view' because of an old
+ 'consistent read view', then the DML statements cannot be delayed.
+ Also, srv_max_purge_lag <= 0 means 'infinity'. */
+- if (srv_max_purge_lag > 0
+- && !UT_LIST_GET_LAST(trx_sys->view_list)) {
++ if (srv_max_purge_lag > 0) {
+ float ratio = (float) trx_sys->rseg_history_len
+ / srv_max_purge_lag;
+ if (ratio > ULINT_MAX / 10000) {
--- /dev/null
+# name : innodb_io_patches.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:09:51.273986410 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:10:08.934990091 +0900
+@@ -320,6 +320,7 @@
+
+ /* When we traverse all the flush lists we don't want another
+ thread to add a dirty page to any flush list. */
++ if (srv_buf_pool_instances > 1)
+ log_flush_order_mutex_enter();
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+@@ -343,6 +344,7 @@
+ }
+ }
+
++ if (srv_buf_pool_instances > 1)
+ log_flush_order_mutex_exit();
+
+ /* The returned answer may be out of date: the flush_list can
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:10:08.934990091 +0900
+@@ -1348,7 +1348,7 @@
+
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+
+- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
++ if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN || !srv_flush_neighbor_pages) {
+ /* If there is little space, it is better not to flush
+ any block except from the end of the LRU list */
+
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:10:08.937050537 +0900
+@@ -260,6 +260,10 @@
+ = BUF_READ_AHEAD_LINEAR_AREA(buf_pool);
+ ulint threshold;
+
++ if (!(srv_read_ahead & 2)) {
++ return(0);
++ }
++
+ if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
+ /* No read-ahead to avoid thread deadlocks */
+ return(0);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:09:51.283956391 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:10:08.963980444 +0900
+@@ -425,6 +425,12 @@
+ "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
+ NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
+
++static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
++ "Set to 0 (write and flush once per second),"
++ " 1 (write and flush at each commit)"
++ " or 2 (write at commit, flush once per second).",
++ NULL, NULL, 1, 0, 2, 0);
++
+
+ static handler *innobase_create_handler(handlerton *hton,
+ TABLE_SHARE *table,
+@@ -819,6 +825,17 @@
+ }
+ }
+
++/******************************************************************//**
++*/
++extern "C" UNIV_INTERN
++ulong
++thd_flush_log_at_trx_commit(
++/*================================*/
++ void* thd)
++{
++ return(THDVAR((THD*) thd, flush_log_at_trx_commit));
++}
++
+ /********************************************************************//**
+ Obtain the InnoDB transaction of a MySQL thread.
+ @return reference to transaction pointer */
+@@ -2390,6 +2407,9 @@
+ srv_n_read_io_threads = (ulint) innobase_read_io_threads;
+ srv_n_write_io_threads = (ulint) innobase_write_io_threads;
+
++ srv_read_ahead &= 3;
++ srv_adaptive_flushing_method %= 3;
++
+ srv_force_recovery = (ulint) innobase_force_recovery;
+
+ srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+@@ -10107,6 +10127,10 @@
+ && (all
+ || !thd_test_options(
+ thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
++ if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
++ /* choose group commit rather than binlog order */
++ return(error);
++ }
+
+ /* For ibbackup to work the order of transactions in binlog
+ and InnoDB must be the same. Consider the situation
+@@ -10917,9 +10941,9 @@
+
+ static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+- "Purge threads can be either 0 or 1. Default is 0.",
++ "Purge threads can be either 0 or 1. Default is 1.",
+ NULL, NULL,
+- 0, /* Default setting */
++ 1, /* Default setting */
+ 0, /* Minimum value */
+ 1, 0); /* Maximum value */
+
+@@ -10961,12 +10985,18 @@
+ innodb_file_format_max_validate,
+ innodb_file_format_max_update, "Antelope");
+
+-static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
+- PLUGIN_VAR_OPCMDARG,
+- "Set to 0 (write and flush once per second),"
+- " 1 (write and flush at each commit)"
+- " or 2 (write at commit, flush once per second).",
+- NULL, NULL, 1, 0, 2, 0);
++/* Changed to the THDVAR */
++//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
++// PLUGIN_VAR_OPCMDARG,
++// "Set to 0 (write and flush once per second),"
++// " 1 (write and flush at each commit)"
++// " or 2 (write at commit, flush once per second).",
++// NULL, NULL, 1, 0, 2, 0);
++
++static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
++ PLUGIN_VAR_NOCMDARG,
++ "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
++ NULL, NULL, TRUE);
+
+ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -11061,7 +11091,7 @@
+ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
+- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
++ NULL, NULL, 128*1024*1024L, 32*1024*1024L, LONGLONG_MAX, 1024*1024L);
+
+ static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -11206,6 +11236,100 @@
+ "trigger a readahead.",
+ NULL, NULL, 56, 0, 64, 0);
+
++static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "The maximum size of the insert buffer. (in bytes)",
++ NULL, NULL, LONGLONG_MAX, 0, LONGLONG_MAX, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_active_contract, srv_ibuf_active_contract,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable active_contract of insert buffer. 0:disable 1:enable",
++ NULL, NULL, 1, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(ibuf_accel_rate, srv_ibuf_accel_rate,
++ PLUGIN_VAR_RQCMDARG,
++ "Tunes amount of insert buffer processing of background, in addition to innodb_io_capacity. (in percentage)",
++ NULL, NULL, 100, 100, 999999999, 0);
++
++static MYSQL_SYSVAR_ULONG(checkpoint_age_target, srv_checkpoint_age_target,
++ PLUGIN_VAR_RQCMDARG,
++ "Control soft limit of checkpoint age. (0 : not control)",
++ NULL, NULL, 0, 0, ~0UL, 0);
++
++static MYSQL_SYSVAR_ULONG(flush_neighbor_pages, srv_flush_neighbor_pages,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable flushing also neighbor pages. 0:disable 1:enable",
++ NULL, NULL, 1, 0, 1, 0);
++
++static
++void
++innodb_read_ahead_update(
++ THD* thd,
++ struct st_mysql_sys_var* var,
++ void* var_ptr,
++ const void* save)
++{
++ *(long *)var_ptr= (*(long *)save) & 3;
++}
++const char *read_ahead_names[]=
++{
++ "none", /* 0 */
++ "random",
++ "linear",
++ "both", /* 3 */
++ /* For compatibility of the older patch */
++ "0", /* 4 ("none" + 4) */
++ "1",
++ "2",
++ "3", /* 7 ("both" + 4) */
++ NullS
++};
++TYPELIB read_ahead_typelib=
++{
++ array_elements(read_ahead_names) - 1, "read_ahead_typelib",
++ read_ahead_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(read_ahead, srv_read_ahead,
++ PLUGIN_VAR_RQCMDARG,
++ "Control read ahead activity (none, random, [linear], both). [from 1.0.5: random read ahead is ignored]",
++ NULL, innodb_read_ahead_update, 2, &read_ahead_typelib);
++
++static
++void
++innodb_adaptive_flushing_method_update(
++ THD* thd,
++ struct st_mysql_sys_var* var,
++ void* var_ptr,
++ const void* save)
++{
++ *(long *)var_ptr= (*(long *)save) % 4;
++}
++const char *adaptive_flushing_method_names[]=
++{
++ "native", /* 0 */
++ "estimate", /* 1 */
++ "keep_average", /* 2 */
++ /* For compatibility of the older patch */
++ "0", /* 3 ("none" + 3) */
++ "1", /* 4 ("estimate" + 3) */
++ "2", /* 5 ("keep_average" + 3) */
++ NullS
++};
++TYPELIB adaptive_flushing_method_typelib=
++{
++ array_elements(adaptive_flushing_method_names) - 1, "adaptive_flushing_method_typelib",
++ adaptive_flushing_method_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(adaptive_flushing_method, srv_adaptive_flushing_method,
++ PLUGIN_VAR_RQCMDARG,
++ "Choose method of innodb_adaptive_flushing. (native, [estimate], keep_average)",
++ NULL, innodb_adaptive_flushing_method_update, 1, &adaptive_flushing_method_typelib);
++
++static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
++ NULL, NULL, 0, 0, 1, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -11226,6 +11350,7 @@
+ MYSQL_SYSVAR(file_format_check),
+ MYSQL_SYSVAR(file_format_max),
+ MYSQL_SYSVAR(flush_log_at_trx_commit),
++ MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
+ MYSQL_SYSVAR(flush_method),
+ MYSQL_SYSVAR(force_recovery),
+ MYSQL_SYSVAR(locks_unsafe_for_binlog),
+@@ -11262,6 +11387,14 @@
+ MYSQL_SYSVAR(show_verbose_locks),
+ MYSQL_SYSVAR(show_locks_held),
+ MYSQL_SYSVAR(version),
++ MYSQL_SYSVAR(ibuf_max_size),
++ MYSQL_SYSVAR(ibuf_active_contract),
++ MYSQL_SYSVAR(ibuf_accel_rate),
++ MYSQL_SYSVAR(checkpoint_age_target),
++ MYSQL_SYSVAR(flush_neighbor_pages),
++ MYSQL_SYSVAR(read_ahead),
++ MYSQL_SYSVAR(adaptive_flushing_method),
++ MYSQL_SYSVAR(enable_unsafe_group_commit),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(use_native_aio),
+ MYSQL_SYSVAR(change_buffering),
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:10:09.073984282 +0900
+@@ -524,8 +524,10 @@
+ grow in size, as the references on the upper levels of the tree can
+ change */
+
+- ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
+- / IBUF_POOL_SIZE_PER_MAX_SIZE;
++ ibuf->max_size = ut_min( buf_pool_get_curr_size() / UNIV_PAGE_SIZE
++ / IBUF_POOL_SIZE_PER_MAX_SIZE, (ulint) srv_ibuf_max_size / UNIV_PAGE_SIZE);
++
++ srv_ibuf_max_size = (long long) ibuf->max_size * UNIV_PAGE_SIZE;
+
+ mutex_create(ibuf_pessimistic_insert_mutex_key,
+ &ibuf_pessimistic_insert_mutex,
+@@ -2651,9 +2653,11 @@
+ size = ibuf->size;
+ max_size = ibuf->max_size;
+
++ if (!srv_ibuf_active_contract) {
+ if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
+ return;
+ }
++ }
+
+ sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
+
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0rea.h 2010-12-03 15:10:09.076066335 +0900
+@@ -124,8 +124,7 @@
+
+ /** The size in pages of the area which the read-ahead algorithms read if
+ invoked */
+-#define BUF_READ_AHEAD_AREA(b) \
+- ut_min(64, ut_2_power_up((b)->curr_size / 32))
++#define BUF_READ_AHEAD_AREA(b) 64
+
+ /** @name Modes used in read-ahead @{ */
+ /** read only pages belonging to the insert buffer tree */
+diff -ruN a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
+--- a/storage/innobase/include/ha_prototypes.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/ha_prototypes.h 2010-12-03 15:10:09.078026360 +0900
+@@ -275,5 +275,12 @@
+ /*===================*/
+ void* thd, /*!< in: thread handle (THD*) */
+ ulint value); /*!< in: time waited for the lock */
++/******************************************************************//**
++*/
++
++ulong
++thd_flush_log_at_trx_commit(
++/*================================*/
++ void* thd);
+
+ #endif
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:09:51.291955835 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:10:09.079029047 +0900
+@@ -141,7 +141,8 @@
+ extern ulint srv_n_log_files;
+ extern ulint srv_log_file_size;
+ extern ulint srv_log_buffer_size;
+-extern ulong srv_flush_log_at_trx_commit;
++//extern ulong srv_flush_log_at_trx_commit;
++extern char srv_use_global_flush_log_at_trx_commit;
+ extern char srv_adaptive_flushing;
+
+
+@@ -214,6 +215,16 @@
+ extern ulong srv_max_purge_lag;
+
+ extern ulong srv_replication_delay;
++
++extern long long srv_ibuf_max_size;
++extern ulint srv_ibuf_active_contract;
++extern ulint srv_ibuf_accel_rate;
++extern ulint srv_checkpoint_age_target;
++extern ulint srv_flush_neighbor_pages;
++extern ulint srv_enable_unsafe_group_commit;
++extern ulint srv_read_ahead;
++extern ulint srv_adaptive_flushing_method;
++
+ /*-------------------------------------------*/
+
+ extern ulint srv_n_rows_inserted;
+@@ -389,8 +400,9 @@
+ when writing data files, but do flush
+ after writing to log files */
+ SRV_UNIX_NOSYNC, /*!< do not flush after writing */
+- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
++ SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
+ data files */
++ SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
+ };
+
+ /** Alternatives for file i/o in Windows */
+diff -ruN a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
+--- a/storage/innobase/log/log0log.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/log/log0log.c 2010-12-03 15:10:09.084023562 +0900
+@@ -359,6 +359,33 @@
+ }
+
+ /************************************************************//**
++*/
++UNIV_INLINE
++ulint
++log_max_modified_age_async()
++{
++ if (srv_checkpoint_age_target) {
++ return(ut_min(log_sys->max_modified_age_async,
++ srv_checkpoint_age_target
++ - srv_checkpoint_age_target / 8));
++ } else {
++ return(log_sys->max_modified_age_async);
++ }
++}
++
++UNIV_INLINE
++ulint
++log_max_checkpoint_age_async()
++{
++ if (srv_checkpoint_age_target) {
++ return(ut_min(log_sys->max_checkpoint_age_async,
++ srv_checkpoint_age_target));
++ } else {
++ return(log_sys->max_checkpoint_age_async);
++ }
++}
++
++/************************************************************//**
+ Closes the log.
+ @return lsn */
+ UNIV_INTERN
+@@ -427,7 +454,7 @@
+ }
+ }
+
+- if (checkpoint_age <= log->max_modified_age_async) {
++ if (checkpoint_age <= log_max_modified_age_async()) {
+
+ goto function_exit;
+ }
+@@ -435,8 +462,8 @@
+ oldest_lsn = buf_pool_get_oldest_modification();
+
+ if (!oldest_lsn
+- || lsn - oldest_lsn > log->max_modified_age_async
+- || checkpoint_age > log->max_checkpoint_age_async) {
++ || lsn - oldest_lsn > log_max_modified_age_async()
++ || checkpoint_age > log_max_checkpoint_age_async()) {
+
+ log->check_flush_or_checkpoint = TRUE;
+ }
+@@ -1100,6 +1127,7 @@
+ group = (log_group_t*)((ulint)group - 1);
+
+ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
++ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
+
+ fil_flush(group->space_id);
+@@ -1121,8 +1149,9 @@
+ logs and cannot end up here! */
+
+ if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
++ && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
+ && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
+- && srv_flush_log_at_trx_commit != 2) {
++ && thd_flush_log_at_trx_commit(NULL) != 2) {
+
+ fil_flush(group->space_id);
+ }
+@@ -1501,7 +1530,8 @@
+
+ mutex_exit(&(log_sys->mutex));
+
+- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
++ if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
++ || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
+ /* O_DSYNC means the OS did not buffer the log file at all:
+ so we have also flushed to disk what we have written */
+
+@@ -2120,10 +2150,10 @@
+
+ sync = TRUE;
+ advance = 2 * (age - log->max_modified_age_sync);
+- } else if (age > log->max_modified_age_async) {
++ } else if (age > log_max_modified_age_async()) {
+
+ /* A flush is not urgent: we do an asynchronous preflush */
+- advance = age - log->max_modified_age_async;
++ advance = age - log_max_modified_age_async();
+ } else {
+ advance = 0;
+ }
+@@ -2137,7 +2167,7 @@
+
+ do_checkpoint = TRUE;
+
+- } else if (checkpoint_age > log->max_checkpoint_age_async) {
++ } else if (checkpoint_age > log_max_checkpoint_age_async()) {
+ /* A checkpoint is not urgent: do it asynchronously */
+
+ do_checkpoint = TRUE;
+@@ -3349,6 +3379,17 @@
+ log_sys->flushed_to_disk_lsn,
+ log_sys->last_checkpoint_lsn);
+
++ fprintf(file,
++ "Max checkpoint age %lu\n"
++ "Checkpoint age target %lu\n"
++ "Modified age %lu\n"
++ "Checkpoint age %lu\n",
++ (ulong) log_sys->max_checkpoint_age,
++ (ulong) log_max_checkpoint_age_async(),
++ (ulong) (log_sys->lsn -
++ log_buf_pool_get_oldest_modification()),
++ (ulong) (log_sys->lsn - log_sys->last_checkpoint_lsn));
++
+ current_time = time(NULL);
+
+ time_elapsed = 0.001 + difftime(current_time,
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/log/log0recv.c 2010-12-03 15:10:09.089024191 +0900
+@@ -2906,9 +2906,12 @@
+ ib_uint64_t archived_lsn;
+ #endif /* UNIV_LOG_ARCHIVE */
+ byte* buf;
+- byte log_hdr_buf[LOG_FILE_HDR_SIZE];
++ byte* log_hdr_buf;
++ byte log_hdr_buf_base[LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE];
+ ulint err;
+
++ log_hdr_buf = ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE);
++
+ #ifdef UNIV_LOG_ARCHIVE
+ ut_ad(type != LOG_CHECKPOINT || limit_lsn == IB_ULONGLONG_MAX);
+ /** TRUE when recovering from a checkpoint */
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/os/os0file.c 2010-12-03 15:10:09.093023540 +0900
+@@ -1399,7 +1399,7 @@
+ #endif
+ #ifdef UNIV_NON_BUFFERED_IO
+ # ifndef UNIV_HOTBACKUP
+- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
++ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
+ /* Do not use unbuffered i/o to log files because
+ value 2 denotes that we do not flush the log at every
+ commit, but only once per second */
+@@ -1415,7 +1415,7 @@
+ attributes = 0;
+ #ifdef UNIV_NON_BUFFERED_IO
+ # ifndef UNIV_HOTBACKUP
+- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
++ if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
+ /* Do not use unbuffered i/o to log files because
+ value 2 denotes that we do not flush the log at every
+ commit, but only once per second */
+@@ -1560,6 +1560,11 @@
+ os_file_set_nocache(file, name, mode_str);
+ }
+
++ /* ALL_O_DIRECT: O_DIRECT also for transaction log file */
++ if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
++ os_file_set_nocache(file, name, mode_str);
++ }
++
+ #ifdef USE_FILE_LOCK
+ if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
+
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:09:51.301987792 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:13:29.369986988 +0900
+@@ -188,7 +188,8 @@
+ UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
+ /* size in database pages */
+ UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
+-UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
++//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
++UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
+
+ /* Try to flush dirty pages so as to avoid IO bursts at
+ the checkpoints. */
+@@ -399,6 +400,17 @@
+
+ UNIV_INTERN ulong srv_replication_delay = 0;
+
++UNIV_INTERN long long srv_ibuf_max_size = 0;
++UNIV_INTERN ulint srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint srv_ibuf_accel_rate = 100;
++#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
++
++UNIV_INTERN ulint srv_checkpoint_age_target = 0;
++UNIV_INTERN ulint srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
++
++UNIV_INTERN ulint srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
++UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
+ /*-------------------------------------------*/
+ UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+ UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+@@ -2703,6 +2715,7 @@
+ ulint n_pages_purged = 0;
+ ulint n_bytes_merged;
+ ulint n_pages_flushed;
++ ulint n_pages_flushed_prev = 0;
+ ulint n_bytes_archived;
+ ulint n_tables_to_drop;
+ ulint n_ios;
+@@ -2710,7 +2723,20 @@
+ ulint n_ios_very_old;
+ ulint n_pend_ios;
+ ulint next_itr_time;
++ ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
++ ulint inner_loop = 0;
++ ibool skip_sleep = FALSE;
+ ulint i;
++ struct t_prev_flush_info_struct {
++ ulint count;
++ unsigned space:32;
++ unsigned offset:32;
++ ib_uint64_t oldest_modification;
++ } prev_flush_info[MAX_BUFFER_POOLS];
++
++ ib_uint64_t lsn_old;
++
++ ib_uint64_t oldest_lsn;
+
+ #ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Master thread starts, id %lu\n",
+@@ -2732,6 +2758,9 @@
+
+ mutex_exit(&kernel_mutex);
+
++ mutex_enter(&(log_sys->mutex));
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
+ loop:
+ /*****************************************************************/
+ /* ---- When there is database activity by users, we cycle in this
+@@ -2762,9 +2791,13 @@
+ /* Sleep for 1 second on entrying the for loop below the first time. */
+ next_itr_time = ut_time_ms() + 1000;
+
++ skip_sleep = FALSE;
++
+ for (i = 0; i < 10; i++) {
+ ulint cur_time = ut_time_ms();
+
++ n_pages_flushed = 0; /* initialize */
++
+ /* ALTER TABLE in MySQL requires on Unix that the table handler
+ can drop tables lazily after there no longer are SELECT
+ queries to them. */
+@@ -2788,6 +2821,7 @@
+ srv_main_thread_op_info = "sleeping";
+ srv_main_1_second_loops++;
+
++ if (!skip_sleep) {
+ if (next_itr_time > cur_time
+ && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+
+@@ -2798,10 +2832,26 @@
+ (next_itr_time - cur_time)
+ * 1000));
+ srv_main_sleeps++;
++
++ /*
++ mutex_enter(&(log_sys->mutex));
++ oldest_lsn = buf_pool_get_oldest_modification();
++ ib_uint64_t lsn = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++
++ if(oldest_lsn)
++ fprintf(stderr,
++ "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
++ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
++ lsn - lsn_old);
++ */
+ }
+
+ /* Each iteration should happen at 1 second interval. */
+ next_itr_time = ut_time_ms() + 1000;
++ } /* if (!skip_sleep) */
++
++ skip_sleep = FALSE;
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+@@ -2821,7 +2871,7 @@
+ if (n_pend_ios < SRV_PEND_IO_THRESHOLD
+ && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
++ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+@@ -2838,7 +2888,11 @@
+ n_pages_flushed = buf_flush_list(
+ PCT_IO(100), IB_ULONGLONG_MAX);
+
+- } else if (srv_adaptive_flushing) {
++ mutex_enter(&(log_sys->mutex));
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++ prev_adaptive_flushing_method = ULINT_UNDEFINED;
++ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
+
+ /* Try to keep the rate of flushing of dirty
+ pages such that redo log generation does not
+@@ -2854,6 +2908,223 @@
+ n_flush,
+ IB_ULONGLONG_MAX);
+ }
++
++ mutex_enter(&(log_sys->mutex));
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++ prev_adaptive_flushing_method = ULINT_UNDEFINED;
++ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
++
++ /* Try to keep modified age not to exceed
++ max_checkpoint_age * 7/8 line */
++
++ mutex_enter(&(log_sys->mutex));
++
++ oldest_lsn = buf_pool_get_oldest_modification();
++ if (oldest_lsn == 0) {
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++
++ } else {
++ if ((log_sys->lsn - oldest_lsn)
++ > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
++ /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
++ /* We should not flush from here. */
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++ } else if ((log_sys->lsn - oldest_lsn)
++ > (log_sys->max_checkpoint_age)/4 ) {
++
++ /* defence line (max_checkpoint_age * 1/2) */
++ ib_uint64_t lsn = log_sys->lsn;
++
++ ib_uint64_t level, bpl;
++ buf_page_t* bpage;
++ ulint j;
++
++ mutex_exit(&(log_sys->mutex));
++
++ bpl = 0;
++
++ for (j = 0; j < srv_buf_pool_instances; j++) {
++ buf_pool_t* buf_pool;
++ ulint n_blocks;
++
++ buf_pool = buf_pool_from_array(j);
++
++ /* The scanning flush_list is optimistic here */
++
++ level = 0;
++ n_blocks = 0;
++ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++
++ while (bpage != NULL) {
++ ib_uint64_t oldest_modification = bpage->oldest_modification;
++ if (oldest_modification != 0) {
++ level += log_sys->max_checkpoint_age
++ - (lsn - oldest_modification);
++ }
++ bpage = UT_LIST_GET_NEXT(list, bpage);
++ n_blocks++;
++ }
++
++ if (level) {
++ bpl += ((ib_uint64_t) n_blocks * n_blocks
++ * (lsn - lsn_old)) / level;
++ }
++
++ }
++
++ if (!srv_use_doublewrite_buf) {
++ /* flush is faster than when doublewrite */
++ bpl = (bpl * 7) / 8;
++ }
++
++ if (bpl) {
++retry_flush_batch:
++ n_pages_flushed = buf_flush_list(bpl,
++ oldest_lsn + (lsn - lsn_old));
++ if (n_pages_flushed == ULINT_UNDEFINED) {
++ os_thread_sleep(5000);
++ goto retry_flush_batch;
++ }
++ }
++
++ lsn_old = lsn;
++ /*
++ fprintf(stderr,
++ "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
++ (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
++ lsn - lsn_old, bpl);
++ */
++ } else {
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++ }
++ }
++ prev_adaptive_flushing_method = 1;
++ } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
++ buf_pool_t* buf_pool;
++ buf_page_t* bpage;
++ ib_uint64_t lsn;
++ ulint j;
++
++ mutex_enter(&(log_sys->mutex));
++ oldest_lsn = buf_pool_get_oldest_modification();
++ lsn = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++
++ /* upper loop/sec. (x10) */
++ next_itr_time -= 900; /* 1000 - 900 == 100 */
++ inner_loop++;
++ if (inner_loop < 10) {
++ i--;
++ } else {
++ inner_loop = 0;
++ }
++
++ if (prev_adaptive_flushing_method == 2) {
++ lint n_flush;
++ lint blocks_sum, new_blocks_sum, flushed_blocks_sum;
++
++ blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
++
++ /* prev_flush_info[j] should be the previous loop's */
++ for (j = 0; j < srv_buf_pool_instances; j++) {
++ lint blocks_num, new_blocks_num, flushed_blocks_num;
++ ibool found;
++
++ buf_pool = buf_pool_from_array(j);
++
++ blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
++ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++ new_blocks_num = 0;
++
++ found = FALSE;
++ while (bpage != NULL) {
++ if (prev_flush_info[j].space == bpage->space
++ && prev_flush_info[j].offset == bpage->offset
++ && prev_flush_info[j].oldest_modification
++ == bpage->oldest_modification) {
++ found = TRUE;
++ break;
++ }
++ bpage = UT_LIST_GET_NEXT(list, bpage);
++ new_blocks_num++;
++ }
++ if (!found) {
++ new_blocks_num = blocks_num;
++ }
++
++ flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
++ - blocks_num;
++ if (flushed_blocks_num < 0) {
++ flushed_blocks_num = 0;
++ }
++
++ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++
++ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
++ if (bpage) {
++ prev_flush_info[j].space = bpage->space;
++ prev_flush_info[j].offset = bpage->offset;
++ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
++ } else {
++ prev_flush_info[j].space = 0;
++ prev_flush_info[j].offset = 0;
++ prev_flush_info[j].oldest_modification = 0;
++ }
++
++ new_blocks_sum += new_blocks_num;
++ flushed_blocks_sum += flushed_blocks_num;
++ blocks_sum += blocks_num;
++ }
++
++ n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
++ if (flushed_blocks_sum > n_pages_flushed_prev) {
++ n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
++ }
++
++ if (n_flush > 0) {
++ n_flush++;
++ n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
++ } else {
++ n_pages_flushed = 0;
++ }
++ } else {
++ /* store previous first pages of the flush_list */
++ for (j = 0; j < srv_buf_pool_instances; j++) {
++ buf_pool = buf_pool_from_array(j);
++
++ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
++
++ prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
++ if (bpage) {
++ prev_flush_info[j].space = bpage->space;
++ prev_flush_info[j].offset = bpage->offset;
++ prev_flush_info[j].oldest_modification = bpage->oldest_modification;
++ } else {
++ prev_flush_info[j].space = 0;
++ prev_flush_info[j].offset = 0;
++ prev_flush_info[j].oldest_modification = 0;
++ }
++ }
++ n_pages_flushed = 0;
++ }
++
++ lsn_old = lsn;
++ prev_adaptive_flushing_method = 2;
++ } else {
++ mutex_enter(&(log_sys->mutex));
++ lsn_old = log_sys->lsn;
++ mutex_exit(&(log_sys->mutex));
++ prev_adaptive_flushing_method = ULINT_UNDEFINED;
++ }
++
++ if (n_pages_flushed == ULINT_UNDEFINED) {
++ n_pages_flushed_prev = 0;
++ } else {
++ n_pages_flushed_prev = n_pages_flushed;
+ }
+
+ if (srv_activity_count == old_activity_count) {
+@@ -2902,7 +3173,7 @@
+ even if the server were active */
+
+ srv_main_thread_op_info = "doing insert buffer merge";
+- ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
++ ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+@@ -3010,7 +3281,7 @@
+ buf_flush_list below. Otherwise, the system favors
+ clean pages over cleanup throughput. */
+ n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
+- PCT_IO(100));
++ PCT_IBUF_IO(100));
+ }
+
+ srv_main_thread_op_info = "reserving kernel mutex";
+@@ -3156,6 +3427,7 @@
+ srv_slot_t* slot;
+ ulint slot_no = ULINT_UNDEFINED;
+ ulint n_total_purged = ULINT_UNDEFINED;
++ ulint next_itr_time;
+
+ ut_a(srv_n_purge_threads == 1);
+
+@@ -3178,9 +3450,12 @@
+
+ mutex_exit(&kernel_mutex);
+
++ next_itr_time = ut_time_ms();
++
+ while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+
+ ulint n_pages_purged;
++ ulint cur_time;
+
+ /* If there are very few records to purge or the last
+ purge didn't purge any records then wait for activity.
+@@ -3221,6 +3496,16 @@
+ } while (n_pages_purged > 0 && !srv_fast_shutdown);
+
+ srv_sync_log_buffer_in_background();
++
++ cur_time = ut_time_ms();
++ if (next_itr_time > cur_time) {
++ os_thread_sleep(ut_min(1000000,
++ (next_itr_time - cur_time)
++ * 1000));
++ next_itr_time = ut_time_ms() + 1000;
++ } else {
++ next_itr_time = cur_time + 1000;
++ }
+ }
+
+ mutex_enter(&kernel_mutex);
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-03 15:10:09.103023543 +0900
+@@ -1184,6 +1184,9 @@
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
+ srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
+
++ } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
++ srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
++
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
+ srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
+
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:10:09.106023937 +0900
+@@ -865,6 +865,7 @@
+ trx->read_view = NULL;
+
+ if (lsn) {
++ ulint flush_log_at_trx_commit;
+
+ mutex_exit(&kernel_mutex);
+
+@@ -873,6 +874,12 @@
+ trx_undo_insert_cleanup(trx);
+ }
+
++ if (srv_use_global_flush_log_at_trx_commit) {
++ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
++ } else {
++ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
++ }
++
+ /* NOTE that we could possibly make a group commit more
+ efficient here: call os_thread_yield here to allow also other
+ trxs to come to commit! */
+@@ -904,9 +911,9 @@
+ if (trx->flush_log_later) {
+ /* Do nothing yet */
+ trx->must_flush_log_later = TRUE;
+- } else if (srv_flush_log_at_trx_commit == 0) {
++ } else if (flush_log_at_trx_commit == 0) {
+ /* Do nothing */
+- } else if (srv_flush_log_at_trx_commit == 1) {
++ } else if (flush_log_at_trx_commit == 1) {
+ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+ /* Write the log but do not flush it to disk */
+
+@@ -918,7 +925,7 @@
+
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+ }
+- } else if (srv_flush_log_at_trx_commit == 2) {
++ } else if (flush_log_at_trx_commit == 2) {
+
+ /* Write the log but do not flush it to disk */
+
+@@ -1582,16 +1589,23 @@
+ trx_t* trx) /*!< in: trx handle */
+ {
+ ib_uint64_t lsn = trx->commit_lsn;
++ ulint flush_log_at_trx_commit;
+
+ ut_a(trx);
+
+ trx->op_info = "flushing log";
+
++ if (srv_use_global_flush_log_at_trx_commit) {
++ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
++ } else {
++ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
++ }
++
+ if (!trx->must_flush_log_later) {
+ /* Do nothing */
+- } else if (srv_flush_log_at_trx_commit == 0) {
++ } else if (flush_log_at_trx_commit == 0) {
+ /* Do nothing */
+- } else if (srv_flush_log_at_trx_commit == 1) {
++ } else if (flush_log_at_trx_commit == 1) {
+ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+ /* Write the log but do not flush it to disk */
+
+@@ -1602,7 +1616,7 @@
+
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+ }
+- } else if (srv_flush_log_at_trx_commit == 2) {
++ } else if (flush_log_at_trx_commit == 2) {
+
+ /* Write the log but do not flush it to disk */
+
+@@ -1855,6 +1869,8 @@
+ /*--------------------------------------*/
+
+ if (lsn) {
++ ulint flush_log_at_trx_commit;
++
+ /* Depending on the my.cnf options, we may now write the log
+ buffer to the log files, making the prepared state of the
+ transaction durable if the OS does not crash. We may also
+@@ -1874,9 +1890,15 @@
+
+ mutex_exit(&kernel_mutex);
+
+- if (srv_flush_log_at_trx_commit == 0) {
++ if (srv_use_global_flush_log_at_trx_commit) {
++ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
++ } else {
++ flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
++ }
++
++ if (flush_log_at_trx_commit == 0) {
+ /* Do nothing */
+- } else if (srv_flush_log_at_trx_commit == 1) {
++ } else if (flush_log_at_trx_commit == 1) {
+ if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
+ /* Write the log but do not flush it to disk */
+
+@@ -1888,7 +1910,7 @@
+
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
+ }
+- } else if (srv_flush_log_at_trx_commit == 2) {
++ } else if (flush_log_at_trx_commit == 2) {
+
+ /* Write the log but do not flush it to disk */
+
--- /dev/null
+# name : innodb_lru_dump_restore.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:49:59.185023424 +0900
++++ b/storage/innobase/buf/buf0lru.c 2010-12-04 15:33:37.626482350 +0900
+@@ -2250,6 +2250,285 @@
+ memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
+ }
+
++/********************************************************************//**
++Dump the LRU page list to the specific file. */
++#define LRU_DUMP_FILE "ib_lru_dump"
++
++UNIV_INTERN
++ibool
++buf_LRU_file_dump(void)
++/*===================*/
++{
++ os_file_t dump_file = -1;
++ ibool success;
++ byte* buffer_base = NULL;
++ byte* buffer = NULL;
++ buf_page_t* bpage;
++ ulint buffers;
++ ulint offset;
++ ibool ret = FALSE;
++ ulint i;
++
++ for (i = 0; i < srv_n_data_files; i++) {
++ if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
++ fprintf(stderr,
++ " InnoDB: The name '%s' seems to be used for"
++ " innodb_data_file_path. Dumping LRU list is not"
++ " done for safeness.\n", LRU_DUMP_FILE);
++ goto end;
++ }
++ }
++
++ buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
++ buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
++ if (!buffer) {
++ fprintf(stderr,
++ " InnoDB: cannot allocate buffer.\n");
++ goto end;
++ }
++
++ dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
++ OS_FILE_NORMAL, OS_DATA_FILE, &success);
++ if (!success) {
++ os_file_get_last_error(TRUE);
++ fprintf(stderr,
++ " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
++ goto end;
++ }
++
++ buffers = offset = 0;
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++
++ while (bpage != NULL) {
++ if (offset == 0) {
++ memset(buffer, 0, UNIV_PAGE_SIZE);
++ }
++
++ mach_write_to_4(buffer + offset * 4, bpage->space);
++ offset++;
++ mach_write_to_4(buffer + offset * 4, bpage->offset);
++ offset++;
++
++ if (offset == UNIV_PAGE_SIZE/4) {
++ success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
++ (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
++ (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
++ UNIV_PAGE_SIZE);
++ if (!success) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ fprintf(stderr,
++ " InnoDB: cannot write page %lu of %s\n",
++ buffers, LRU_DUMP_FILE);
++ goto end;
++ }
++ buffers++;
++ offset = 0;
++ }
++
++ bpage = UT_LIST_GET_PREV(LRU, bpage);
++ }
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ }
++
++ if (offset == 0) {
++ memset(buffer, 0, UNIV_PAGE_SIZE);
++ }
++
++ mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
++ offset++;
++ mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
++ offset++;
++
++ success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
++ (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
++ (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
++ UNIV_PAGE_SIZE);
++ if (!success) {
++ goto end;
++ }
++
++ ret = TRUE;
++end:
++ if (dump_file != -1)
++ os_file_close(dump_file);
++ if (buffer_base)
++ ut_free(buffer_base);
++
++ return(ret);
++}
++
++typedef struct {
++ ib_uint32_t space_id;
++ ib_uint32_t page_no;
++} dump_record_t;
++
++static int dump_record_cmp(const void *a, const void *b)
++{
++ const dump_record_t *rec1 = (dump_record_t *) a;
++ const dump_record_t *rec2 = (dump_record_t *) b;
++
++ if (rec1->space_id < rec2->space_id)
++ return -1;
++ if (rec1->space_id > rec2->space_id)
++ return 1;
++ if (rec1->page_no < rec2->page_no)
++ return -1;
++ return rec1->page_no > rec2->page_no;
++}
++
++/********************************************************************//**
++Read the pages based on the specific file.*/
++UNIV_INTERN
++ibool
++buf_LRU_file_restore(void)
++/*======================*/
++{
++ os_file_t dump_file = -1;
++ ibool success;
++ byte* buffer_base = NULL;
++ byte* buffer = NULL;
++ ulint buffers;
++ ulint offset;
++ ulint reads = 0;
++ ulint req = 0;
++ ibool terminated = FALSE;
++ ibool ret = FALSE;
++ dump_record_t* records = NULL;
++ ulint size;
++ ulint size_high;
++ ulint length;
++
++ dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
++ LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
++ if (!success || !os_file_get_size(dump_file, &size, &size_high)) {
++ os_file_get_last_error(TRUE);
++ fprintf(stderr,
++ " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
++ goto end;
++ }
++ if (size == 0 || size_high > 0 || size % 8) {
++ fprintf(stderr, " InnoDB: broken LRU dump file\n");
++ goto end;
++ }
++ buffer_base = ut_malloc(2 * UNIV_PAGE_SIZE);
++ buffer = ut_align(buffer_base, UNIV_PAGE_SIZE);
++ records = ut_malloc(size);
++ if (!buffer || !records) {
++ fprintf(stderr,
++ " InnoDB: cannot allocate buffer.\n");
++ goto end;
++ }
++
++ buffers = 0;
++ length = 0;
++ while (!terminated) {
++ success = os_file_read(dump_file, buffer,
++ (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
++ (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
++ UNIV_PAGE_SIZE);
++ if (!success) {
++ fprintf(stderr,
++ " InnoDB: cannot read page %lu of %s,"
++ " or meet unexpected terminal.\n",
++ buffers, LRU_DUMP_FILE);
++ goto end;
++ }
++
++ for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
++ ulint space_id;
++ ulint page_no;
++
++ space_id = mach_read_from_4(buffer + offset * 4);
++ page_no = mach_read_from_4(buffer + (offset + 1) * 4);
++ if (space_id == 0xFFFFFFFFUL
++ || page_no == 0xFFFFFFFFUL) {
++ terminated = TRUE;
++ break;
++ }
++
++ records[length].space_id = space_id;
++ records[length].page_no = page_no;
++ length++;
++ if (length * 8 >= size) {
++ fprintf(stderr,
++ " InnoDB: could not find the "
++ "end-of-file marker after reading "
++ "the expected %lu bytes from the "
++ "LRU dump file.\n"
++ " InnoDB: this could be caused by a "
++ "broken or incomplete file.\n"
++ " InnoDB: trying to process what has "
++ "been read so far.\n",
++ size);
++ terminated= TRUE;
++ break;
++ }
++ }
++ buffers++;
++ }
++
++ qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
++
++ for (offset = 0; offset < length; offset++) {
++ ulint space_id;
++ ulint page_no;
++ ulint zip_size;
++ ulint err;
++ ib_int64_t tablespace_version;
++
++ space_id = records[offset].space_id;
++ page_no = records[offset].page_no;
++
++ if (offset % 16 == 15) {
++ os_aio_simulated_wake_handler_threads();
++ buf_flush_free_margins(FALSE);
++ }
++
++ zip_size = fil_space_get_zip_size(space_id);
++ if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
++ continue;
++ }
++
++ if (fil_area_is_exist(space_id, zip_size, page_no, 0,
++ zip_size ? zip_size : UNIV_PAGE_SIZE)) {
++
++ tablespace_version = fil_space_get_version(space_id);
++
++ req++;
++ reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
++ | OS_AIO_SIMULATED_WAKE_LATER,
++ space_id, zip_size, TRUE,
++ tablespace_version, page_no, NULL);
++ buf_LRU_stat_inc_io();
++ }
++ }
++
++ os_aio_simulated_wake_handler_threads();
++ buf_flush_free_margins(FALSE);
++
++ ut_print_timestamp(stderr);
++ fprintf(stderr,
++ " InnoDB: reading pages based on the dumped LRU list was done."
++ " (requested: %lu, read: %lu)\n", req, reads);
++ ret = TRUE;
++end:
++ if (dump_file != -1)
++ os_file_close(dump_file);
++ if (buffer_base)
++ ut_free(buffer_base);
++ if (records)
++ ut_free(records);
++
++ return(ret);
++}
++
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ /**********************************************************************//**
+ Validates the LRU list for one buffer pool instance. */
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 17:49:11.576124814 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:33:37.628480605 +0900
+@@ -58,7 +58,7 @@
+ which case it is never read into the pool, or if the tablespace does
+ not exist or is being dropped
+ @return 1 if read request is issued. 0 if it is not */
+-static
++UNIV_INTERN
+ ulint
+ buf_read_page_low(
+ /*==============*/
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-03 17:49:11.581025127 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:33:37.632482885 +0900
+@@ -4939,6 +4939,78 @@
+ return(DB_SUCCESS);
+ }
+
++/********************************************************************//**
++Confirm whether the parameters are valid or not */
++UNIV_INTERN
++ibool
++fil_area_is_exist(
++/*==============*/
++ ulint space_id, /*!< in: space id */
++ ulint zip_size, /*!< in: compressed page size in bytes;
++ 0 for uncompressed pages */
++ ulint block_offset, /*!< in: offset in number of blocks */
++ ulint byte_offset, /*!< in: remainder of offset in bytes; in
++ aio this must be divisible by the OS block
++ size */
++ ulint len) /*!< in: how many bytes to read or write; this
++ must not cross a file boundary; in aio this
++ must be a block size multiple */
++{
++ fil_space_t* space;
++ fil_node_t* node;
++
++ /* Reserve the fil_system mutex and make sure that we can open at
++ least one file while holding it, if the file is not already open */
++
++ fil_mutex_enter_and_prepare_for_io(space_id);
++
++ space = fil_space_get_by_id(space_id);
++
++ if (!space) {
++ mutex_exit(&fil_system->mutex);
++ return(FALSE);
++ }
++
++ node = UT_LIST_GET_FIRST(space->chain);
++
++ for (;;) {
++ if (UNIV_UNLIKELY(node == NULL)) {
++ mutex_exit(&fil_system->mutex);
++ return(FALSE);
++ }
++
++ if (space->id != 0 && node->size == 0) {
++ /* We do not know the size of a single-table tablespace
++ before we open the file */
++
++ break;
++ }
++
++ if (node->size > block_offset) {
++ /* Found! */
++ break;
++ } else {
++ block_offset -= node->size;
++ node = UT_LIST_GET_NEXT(chain, node);
++ }
++ }
++
++ /* Open file if closed */
++ fil_node_prepare_for_io(node, fil_system, space);
++ fil_node_complete_io(node, fil_system, OS_FILE_READ);
++
++ /* Check that at least the start offset is within the bounds of a
++ single-table tablespace */
++ if (UNIV_UNLIKELY(node->size <= block_offset)
++ && space->id != 0 && space->purpose == FIL_TABLESPACE) {
++ mutex_exit(&fil_system->mutex);
++ return(FALSE);
++ }
++
++ mutex_exit(&fil_system->mutex);
++ return(TRUE);
++}
++
+ #ifndef UNIV_HOTBACKUP
+ /**********************************************************************//**
+ Waits for an aio operation to complete. This function is used to write the
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:49:11.589956135 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:33:37.645555490 +0900
+@@ -11708,6 +11708,12 @@
+ "Limit the allocated memory for dictionary cache. (0: unlimited)",
+ NULL, NULL, 0, 0, LONG_MAX, 0);
+
++static MYSQL_SYSVAR_UINT(auto_lru_dump, srv_auto_lru_dump,
++ PLUGIN_VAR_RQCMDARG,
++ "Time in seconds between automatic buffer pool dumps. "
++ "0 (the default) disables automatic dumps.",
++ NULL, NULL, 0, 0, UINT_MAX32, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -11791,6 +11797,7 @@
+ #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+ MYSQL_SYSVAR(read_ahead_threshold),
+ MYSQL_SYSVAR(io_capacity),
++ MYSQL_SYSVAR(auto_lru_dump),
+ MYSQL_SYSVAR(purge_threads),
+ MYSQL_SYSVAR(purge_batch_size),
+ NULL
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 17:34:35.286211349 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-04 15:33:37.677480733 +0900
+@@ -50,6 +50,7 @@
+ #include "trx0rseg.h" /* for trx_rseg_struct */
+ #include "trx0sys.h" /* for trx_sys */
+ #include "dict0dict.h" /* for dict_sys */
++#include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */
+ }
+
+ static const char plugin_author[] = "Innobase Oy";
+@@ -4255,6 +4256,36 @@
+ "Hello!");
+ goto end_func;
+ }
++ else if (!strncasecmp("XTRA_LRU_DUMP", ptr, 13)) {
++ ut_print_timestamp(stderr);
++ fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_DUMP'"
++ " was detected.\n");
++
++ if (buf_LRU_file_dump()) {
++ field_store_string(i_s_table->field[0],
++ "XTRA_LRU_DUMP was succeeded.");
++ } else {
++ field_store_string(i_s_table->field[0],
++ "XTRA_LRU_DUMP was failed.");
++ }
++
++ goto end_func;
++ }
++ else if (!strncasecmp("XTRA_LRU_RESTORE", ptr, 16)) {
++ ut_print_timestamp(stderr);
++ fprintf(stderr, " InnoDB: administration command 'XTRA_LRU_RESTORE'"
++ " was detected.\n");
++
++ if (buf_LRU_file_restore()) {
++ field_store_string(i_s_table->field[0],
++ "XTRA_LRU_RESTORE was succeeded.");
++ } else {
++ field_store_string(i_s_table->field[0],
++ "XTRA_LRU_RESTORE was failed.");
++ }
++
++ goto end_func;
++ }
+
+ field_store_string(i_s_table->field[0],
+ "Undefined XTRA_* command.");
+diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
+--- a/storage/innobase/include/buf0lru.h 2010-12-03 15:49:59.223956070 +0900
++++ b/storage/innobase/include/buf0lru.h 2010-12-04 15:33:37.681481467 +0900
+@@ -219,6 +219,18 @@
+ void
+ buf_LRU_stat_update(void);
+ /*=====================*/
++/********************************************************************//**
++Dump the LRU page list to the specific file. */
++UNIV_INTERN
++ibool
++buf_LRU_file_dump(void);
++/*===================*/
++/********************************************************************//**
++Read the pages based on the specific file.*/
++UNIV_INTERN
++ibool
++buf_LRU_file_restore(void);
++/*======================*/
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ /**********************************************************************//**
+diff -ruN a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
+--- a/storage/innobase/include/buf0rea.h 2010-12-03 17:49:11.596953870 +0900
++++ b/storage/innobase/include/buf0rea.h 2010-12-04 15:33:37.682563900 +0900
+@@ -31,6 +31,37 @@
+ #include "buf0types.h"
+
+ /********************************************************************//**
++Low-level function which reads a page asynchronously from a file to the
++buffer buf_pool if it is not already there, in which case does nothing.
++Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
++flag is cleared and the x-lock released by an i/o-handler thread.
++@return 1 if a read request was queued, 0 if the page already resided
++in buf_pool, or if the page is in the doublewrite buffer blocks in
++which case it is never read into the pool, or if the tablespace does
++not exist or is being dropped
++@return 1 if read request is issued. 0 if it is not */
++UNIV_INTERN
++ulint
++buf_read_page_low(
++/*==============*/
++ ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
++ trying to read from a non-existent tablespace, or a
++ tablespace which is just now being dropped */
++ ibool sync, /*!< in: TRUE if synchronous aio is desired */
++ ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
++ ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
++ at read-ahead functions) */
++ ulint space, /*!< in: space id */
++ ulint zip_size,/*!< in: compressed page size, or 0 */
++ ibool unzip, /*!< in: TRUE=request uncompressed page */
++ ib_int64_t tablespace_version, /*!< in: if the space memory object has
++ this timestamp different from what we are giving here,
++ treat the tablespace as dropped; this is a timestamp we
++ use to stop dangling page reads from a tablespace
++ which we have DISCARDed + IMPORTed back */
++ ulint offset, /*!< in: page number */
++ trx_t* trx);
++/********************************************************************//**
+ High-level function which reads a page asynchronously from a file to the
+ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+ an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h 2010-12-03 17:49:11.597953501 +0900
++++ b/storage/innobase/include/fil0fil.h 2010-12-04 15:33:37.684551372 +0900
+@@ -644,6 +644,22 @@
+ void* message, /*!< in: message for aio handler if non-sync
+ aio used, else ignored */
+ trx_t* trx);
++/********************************************************************//**
++Confirm whether the parameters are valid or not */
++UNIV_INTERN
++ibool
++fil_area_is_exist(
++/*==============*/
++ ulint space_id, /*!< in: space id */
++ ulint zip_size, /*!< in: compressed page size in bytes;
++ 0 for uncompressed pages */
++ ulint block_offset, /*!< in: offset in number of blocks */
++ ulint byte_offset, /*!< in: remainder of offset in bytes; in
++ aio this must be divisible by the OS block
++ size */
++ ulint len); /*!< in: how many bytes to read or write; this
++ must not cross a file boundary; in aio this
++ must be a block size multiple */
+ /**********************************************************************//**
+ Waits for an aio operation to complete. This function is used to write the
+ handler for completed requests. The aio array of pending requests is divided
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 17:49:11.603969747 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:33:37.685550816 +0900
+@@ -356,6 +356,9 @@
+ reading of a disk page */
+ extern ulint srv_buf_pool_reads;
+
++/** Time in seconds between automatic buffer pool dumps */
++extern uint srv_auto_lru_dump;
++
+ /** Status variables to be passed to MySQL */
+ typedef struct export_var_struct export_struc;
+
+@@ -655,6 +658,16 @@
+ /*=====================*/
+ void* arg); /*!< in: a dummy parameter required by
+ os_thread_create */
++/*********************************************************************//**
++A thread which restores the buffer pool from a dump file on startup and does
++periodic buffer pool dumps.
++@return a dummy parameter */
++UNIV_INTERN
++os_thread_ret_t
++srv_LRU_dump_restore_thread(
++/*====================*/
++ void* arg); /*!< in: a dummy parameter required by
++ os_thread_create */
+ /******************************************************************//**
+ Outputs to a file the output of the InnoDB Monitor.
+ @return FALSE if not all information printed
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:49:11.620986661 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:33:37.708550811 +0900
+@@ -327,6 +327,9 @@
+ reading of a disk page */
+ UNIV_INTERN ulint srv_buf_pool_reads = 0;
+
++/** Time in seconds between automatic buffer pool dumps */
++UNIV_INTERN uint srv_auto_lru_dump = 0;
++
+ /* structure to pass status variables to MySQL */
+ UNIV_INTERN export_struc export_vars;
+
+@@ -2663,6 +2666,56 @@
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
++ os_thread_exit(NULL);
++
++ OS_THREAD_DUMMY_RETURN;
++}
++
++/*********************************************************************//**
++A thread which restores the buffer pool from a dump file on startup and does
++periodic buffer pool dumps.
++@return a dummy parameter */
++UNIV_INTERN
++os_thread_ret_t
++srv_LRU_dump_restore_thread(
++/*====================*/
++ void* arg __attribute__((unused)))
++ /*!< in: a dummy parameter required by
++ os_thread_create */
++{
++ uint auto_lru_dump;
++ time_t last_dump_time;
++ time_t time_elapsed;
++
++#ifdef UNIV_DEBUG_THREAD_CREATION
++ fprintf(stderr, "LRU dump/restore thread starts, id %lu\n",
++ os_thread_pf(os_thread_get_curr_id()));
++#endif
++
++ if (srv_auto_lru_dump)
++ buf_LRU_file_restore();
++
++ last_dump_time = time(NULL);
++
++loop:
++ os_thread_sleep(5000000);
++
++ if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
++ goto exit_func;
++ }
++
++ time_elapsed = time(NULL) - last_dump_time;
++ auto_lru_dump = srv_auto_lru_dump;
++ if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
++ last_dump_time = time(NULL);
++ buf_LRU_file_dump();
++ }
++
++ goto loop;
++exit_func:
++ /* We count the number of threads in os_thread_exit(). A created
++ thread should always use that to exit and not use return() to exit. */
++
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-03 15:18:48.916955609 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:33:37.711484798 +0900
+@@ -121,9 +121,9 @@
+ static os_file_t files[1000];
+
+ /** io_handler_thread parameters for thread identification */
+-static ulint n[SRV_MAX_N_IO_THREADS + 6];
++static ulint n[SRV_MAX_N_IO_THREADS + 7];
+ /** io_handler_thread identifiers */
+-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
++static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7];
+
+ /** We use this mutex to test the return value of pthread_mutex_trylock
+ on successful locking. HP-UX does NOT return 0, though Linux et al do. */
+@@ -1737,6 +1737,10 @@
+ os_thread_create(&srv_monitor_thread, NULL,
+ thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+
++ /* Create the thread which automaticaly dumps/restore buffer pool */
++ os_thread_create(&srv_LRU_dump_restore_thread, NULL,
++ thread_ids + 5 + SRV_MAX_N_IO_THREADS);
++
+ srv_is_being_started = FALSE;
+
+ err = dict_create_or_check_foreign_constraint_tables();
--- /dev/null
+# name : innodb_opt_lru_count.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:20:49.593024343 +0900
+@@ -137,7 +137,7 @@
+ ut_ad(buf_page_get_state(ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+ UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:18:48.866986963 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:20:49.595987311 +0900
+@@ -881,9 +881,9 @@
+ block->page.in_zip_hash = FALSE;
+ block->page.in_flush_list = FALSE;
+ block->page.in_free_list = FALSE;
+- block->page.in_LRU_list = FALSE;
+ block->in_unzip_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
++ block->page.in_LRU_list = FALSE;
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -1494,7 +1494,7 @@
+
+ memcpy(dpage, bpage, sizeof *dpage);
+
+- ut_d(bpage->in_LRU_list = FALSE);
++ bpage->in_LRU_list = FALSE;
+ ut_d(bpage->in_page_hash = FALSE);
+
+ /* relocate buf_pool->LRU */
+@@ -3729,8 +3729,8 @@
+ bpage->in_zip_hash = FALSE;
+ bpage->in_flush_list = FALSE;
+ bpage->in_free_list = FALSE;
+- bpage->in_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
++ bpage->in_LRU_list = FALSE;
+
+ ut_d(bpage->in_page_hash = TRUE);
+
+@@ -3893,7 +3893,7 @@
+ ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin(buf_pool);
++ buf_flush_free_margin(buf_pool, FALSE);
+
+ frame = block->frame;
+
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:18:48.868953442 +0900
++++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:20:49.599986956 +0900
+@@ -403,19 +403,21 @@
+ buf_page_in_file(bpage) and in the LRU list */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++ //ut_ad(bpage->in_LRU_list);
+
+- if (UNIV_LIKELY(buf_page_in_file(bpage))) {
++ if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+
+ return(bpage->oldest_modification == 0
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE
+ && bpage->buf_fix_count == 0);
+ }
+
++ /* permited not to own LRU_mutex.. */
++/*
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: buffer block state %lu"
+@@ -423,6 +425,7 @@
+ (ulong) buf_page_get_state(bpage));
+ ut_print_buf(stderr, bpage, sizeof(buf_page_t));
+ putc('\n', stderr);
++*/
+
+ return(FALSE);
+ }
+@@ -1955,8 +1958,14 @@
+ buf_page_t* bpage;
+ ulint n_replaceable;
+ ulint distance = 0;
++ ibool have_LRU_mutex = FALSE;
+
+- buf_pool_mutex_enter(buf_pool);
++ if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++ have_LRU_mutex = TRUE;
++retry:
++ //buf_pool_mutex_enter(buf_pool);
++ if (have_LRU_mutex)
++ buf_pool_mutex_enter(buf_pool);
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+@@ -1967,7 +1976,13 @@
+ + BUF_FLUSH_EXTRA_MARGIN(buf_pool))
+ && (distance < BUF_LRU_FREE_SEARCH_LEN(buf_pool))) {
+
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
++ mutex_t* block_mutex;
++ if (!bpage->in_LRU_list) {
++ /* reatart. but it is very optimistic */
++ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
++ continue;
++ }
++ block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+
+@@ -1982,11 +1997,18 @@
+ bpage = UT_LIST_GET_PREV(LRU, bpage);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ if (have_LRU_mutex)
++ buf_pool_mutex_exit(buf_pool);
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
+
+ return(0);
++ } else if (!have_LRU_mutex) {
++ /* confirm it again with LRU_mutex for exactness */
++ have_LRU_mutex = TRUE;
++ distance = 0;
++ goto retry;
+ }
+
+ return(BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)
+@@ -2004,7 +2026,8 @@
+ void
+ buf_flush_free_margin(
+ /*==================*/
+- buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
++ buf_pool_t* buf_pool, /*!< in: Buffer pool instance */
++ ibool wait)
+ {
+ ulint n_to_flush;
+
+@@ -2015,7 +2038,7 @@
+
+ n_flushed = buf_flush_LRU(buf_pool, n_to_flush);
+
+- if (n_flushed == ULINT_UNDEFINED) {
++ if (wait && n_flushed == ULINT_UNDEFINED) {
+ /* There was an LRU type flush batch already running;
+ let us wait for it to end */
+
+@@ -2028,8 +2051,9 @@
+ Flushes pages from the end of all the LRU lists. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margins(void)
++buf_flush_free_margins(
+ /*========================*/
++ ibool wait)
+ {
+ ulint i;
+
+@@ -2038,7 +2062,7 @@
+
+ buf_pool = buf_pool_from_array(i);
+
+- buf_flush_free_margin(buf_pool);
++ buf_flush_free_margin(buf_pool, wait);
+ }
+ }
+
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:20:49.602952786 +0900
+@@ -1016,7 +1016,7 @@
+
+ /* No free block was found: try to flush the LRU list */
+
+- buf_flush_free_margin(buf_pool);
++ buf_flush_free_margin(buf_pool, TRUE);
+ ++srv_buf_pool_wait_free;
+
+ os_aio_simulated_wake_handler_threads();
+@@ -1213,7 +1213,7 @@
+
+ /* Remove the block from the LRU list */
+ UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
+- ut_d(bpage->in_LRU_list = FALSE);
++ bpage->in_LRU_list = FALSE;
+
+ buf_unzip_LRU_remove_block_if_needed(bpage);
+
+@@ -1292,7 +1292,7 @@
+
+ ut_ad(!bpage->in_LRU_list);
+ UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
+- ut_d(bpage->in_LRU_list = TRUE);
++ bpage->in_LRU_list = TRUE;
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+@@ -1362,7 +1362,7 @@
+ buf_pool->LRU_old_len++;
+ }
+
+- ut_d(bpage->in_LRU_list = TRUE);
++ bpage->in_LRU_list = TRUE;
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
+
+@@ -1617,7 +1617,7 @@
+ buf_page_set_old(b, buf_page_is_old(b));
+ #endif /* UNIV_LRU_DEBUG */
+ } else {
+- ut_d(b->in_LRU_list = FALSE);
++ b->in_LRU_list = FALSE;
+ buf_LRU_add_block_low(b, buf_page_is_old(b));
+ }
+
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:18:48.870953384 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:20:49.604956032 +0900
+@@ -200,7 +200,7 @@
+ }
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin(buf_pool);
++ buf_flush_free_margin(buf_pool, TRUE);
+
+ /* Increment number of I/O operations used for LRU policy. */
+ buf_LRU_stat_inc_io();
+@@ -476,7 +476,7 @@
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of the LRU list if necessary */
+- buf_flush_free_margin(buf_pool);
++ buf_flush_free_margin(buf_pool, TRUE);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints && (count > 0)) {
+@@ -565,7 +565,7 @@
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of all the LRU lists if necessary */
+- buf_flush_free_margins();
++ buf_flush_free_margins(FALSE);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+@@ -659,7 +659,7 @@
+ os_aio_simulated_wake_handler_threads();
+
+ /* Flush pages from the end of all the LRU lists if necessary */
+- buf_flush_free_margins();
++ buf_flush_free_margins(FALSE);
+
+ #ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:20:49.608986590 +0900
+@@ -1314,11 +1314,11 @@
+
+ UT_LIST_NODE_T(buf_page_t) LRU;
+ /*!< node of the LRU list */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ ibool in_LRU_list; /*!< TRUE if the page is in
+ the LRU list; used in
+ debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ unsigned old:1; /*!< TRUE if the block is in the old
+ blocks in buf_pool->LRU_old */
+ unsigned freed_page_clock:31;/*!< the value of
+diff -ruN a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
+--- a/storage/innobase/include/buf0flu.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0flu.h 2010-12-03 15:20:49.609953185 +0900
+@@ -65,13 +65,15 @@
+ void
+ buf_flush_free_margin(
+ /*==================*/
+- buf_pool_t* buf_pool);
++ buf_pool_t* buf_pool,
++ ibool wait);
+ /*********************************************************************//**
+ Flushes pages from the end of all the LRU lists. */
+ UNIV_INTERN
+ void
+-buf_flush_free_margins(void);
++buf_flush_free_margins(
+ /*=========================*/
++ ibool wait);
+ #endif /* !UNIV_HOTBACKUP */
+ /********************************************************************//**
+ Initializes a page for writing to the tablespace. */
--- /dev/null
+# name : innodb_overwrite_relay_log_info.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:37:45.516105468 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:38:20.318952987 +0900
+@@ -42,6 +42,8 @@
+ #pragma implementation // gcc: Class implementation
+ #endif
+
++#define MYSQL_SERVER
++
+ #include <sql_table.h> // explain_filename, nz2, EXPLAIN_PARTITIONS_AS_COMMENT,
+ // EXPLAIN_FILENAME_MAX_EXTRA_LENGTH
+
+@@ -52,6 +54,15 @@
+ #include <mysql/innodb_priv.h>
+ #include <mysql/psi/psi.h>
+
++#ifdef MYSQL_SERVER
++#include <rpl_mi.h>
++#include <slave.h>
++// Defined in slave.cc
++int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
++int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
++ const char *default_val);
++#endif /* MYSQL_SERVER */
++
+ /** @file ha_innodb.cc */
+
+ /* Include necessary InnoDB headers */
+@@ -91,6 +102,14 @@
+ #include "ha_innodb.h"
+ #include "i_s.h"
+
++#ifdef MYSQL_SERVER
++// Defined in trx0sys.c
++extern char trx_sys_mysql_master_log_name[];
++extern ib_int64_t trx_sys_mysql_master_log_pos;
++extern char trx_sys_mysql_relay_log_name[];
++extern ib_int64_t trx_sys_mysql_relay_log_pos;
++#endif /* MYSQL_SERVER */
++
+ # ifndef MYSQL_PLUGIN_IMPORT
+ # define MYSQL_PLUGIN_IMPORT /* nothing */
+ # endif /* MYSQL_PLUGIN_IMPORT */
+@@ -163,6 +182,7 @@
+ static my_bool innobase_use_doublewrite = TRUE;
+ static my_bool innobase_use_checksums = TRUE;
+ static my_bool innobase_locks_unsafe_for_binlog = FALSE;
++static my_bool innobase_overwrite_relay_log_info = FALSE;
+ static my_bool innobase_rollback_on_timeout = FALSE;
+ static my_bool innobase_create_status_file = FALSE;
+ static my_bool innobase_stats_on_metadata = TRUE;
+@@ -2201,6 +2221,89 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++#ifndef MYSQL_SERVER
++ innodb_overwrite_relay_log_info = FALSE;
++#endif
++
++#ifdef HAVE_REPLICATION
++#ifdef MYSQL_SERVER
++ /* read master log position from relay-log.info if exists */
++ char fname[FN_REFLEN+128];
++ int pos;
++ int info_fd;
++ IO_CACHE info_file;
++
++ fname[0] = '\0';
++
++ if(innobase_overwrite_relay_log_info) {
++
++ fprintf(stderr,
++ "InnoDB: Warning: innodb_overwrite_relay_log_info is enabled."
++ " Updates in other storage engines may have problem with consistency.\n");
++
++ bzero((char*) &info_file, sizeof(info_file));
++ fn_format(fname, relay_log_info_file, mysql_data_home, "", 4+32);
++
++ int error=0;
++
++ if (!access(fname,F_OK)) {
++ /* exist */
++ if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) {
++ error=1;
++ } else if (init_io_cache(&info_file, info_fd, IO_SIZE*2,
++ READ_CACHE, 0L, 0, MYF(MY_WME))) {
++ error=1;
++ }
++
++ if (error) {
++relay_info_error:
++ if (info_fd >= 0)
++ my_close(info_fd, MYF(0));
++ fname[0] = '\0';
++ goto skip_relay;
++ }
++ } else {
++ fname[0] = '\0';
++ goto skip_relay;
++ }
++
++ if (init_strvar_from_file(fname, sizeof(fname), &info_file, "") || /* dummy (it is relay-log) */
++ init_intvar_from_file(&pos, &info_file, BIN_LOG_HEADER_SIZE)) {
++ end_io_cache(&info_file);
++ error=1;
++ goto relay_info_error;
++ }
++
++ fprintf(stderr,
++ "InnoDB: relay-log.info is detected.\n"
++ "InnoDB: relay log: position %u, file name %s\n",
++ pos, fname);
++
++ strncpy(trx_sys_mysql_relay_log_name, fname, TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
++ trx_sys_mysql_relay_log_pos = (ib_int64_t) pos;
++
++ if (init_strvar_from_file(fname, sizeof(fname), &info_file, "") ||
++ init_intvar_from_file(&pos, &info_file, 0)) {
++ end_io_cache(&info_file);
++ error=1;
++ goto relay_info_error;
++ }
++
++ fprintf(stderr,
++ "InnoDB: master log: position %u, file name %s\n",
++ pos, fname);
++
++ strncpy(trx_sys_mysql_master_log_name, fname, TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
++ trx_sys_mysql_master_log_pos = (ib_int64_t) pos;
++
++ end_io_cache(&info_file);
++ if (info_fd >= 0)
++ my_close(info_fd, MYF(0));
++ }
++skip_relay:
++#endif /* MYSQL_SERVER */
++#endif /* HAVE_REPLICATION */
++
+ /* Check that values don't overflow on 32-bit systems. */
+ if (sizeof(ulint) == 4) {
+ if (innobase_buffer_pool_size > UINT_MAX32) {
+@@ -2499,6 +2602,76 @@
+ goto mem_free_and_error;
+ }
+
++#ifdef HAVE_REPLICATION
++#ifdef MYSQL_SERVER
++ if(innobase_overwrite_relay_log_info) {
++ /* If InnoDB progressed from relay-log.info, overwrite it */
++ if (fname[0] == '\0') {
++ fprintf(stderr,
++ "InnoDB: something wrong with relay-info.log. InnoDB will not overwrite it.\n");
++ } else if (0 != strcmp(fname, trx_sys_mysql_master_log_name)
++ || pos != trx_sys_mysql_master_log_pos) {
++ /* Overwrite relay-log.info */
++ bzero((char*) &info_file, sizeof(info_file));
++ fn_format(fname, relay_log_info_file, mysql_data_home, "", 4+32);
++
++ int error = 0;
++
++ if (!access(fname,F_OK)) {
++ /* exist */
++ if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) {
++ error = 1;
++ } else if (init_io_cache(&info_file, info_fd, IO_SIZE*2,
++ WRITE_CACHE, 0L, 0, MYF(MY_WME))) {
++ error = 1;
++ }
++
++ if (error) {
++ if (info_fd >= 0)
++ my_close(info_fd, MYF(0));
++ goto skip_overwrite;
++ }
++ } else {
++ error = 1;
++ goto skip_overwrite;
++ }
++
++ char buff[FN_REFLEN*2+22*2+4], *pos;
++
++ my_b_seek(&info_file, 0L);
++ pos=strmov(buff, trx_sys_mysql_relay_log_name);
++ *pos++='\n';
++ pos=longlong2str(trx_sys_mysql_relay_log_pos, pos, 10);
++ *pos++='\n';
++ pos=strmov(pos, trx_sys_mysql_master_log_name);
++ *pos++='\n';
++ pos=longlong2str(trx_sys_mysql_master_log_pos, pos, 10);
++ *pos='\n';
++
++ if (my_b_write(&info_file, (uchar*) buff, (size_t) (pos-buff)+1))
++ error = 1;
++ if (flush_io_cache(&info_file))
++ error = 1;
++
++ end_io_cache(&info_file);
++ if (info_fd >= 0)
++ my_close(info_fd, MYF(0));
++skip_overwrite:
++ if (error) {
++ fprintf(stderr,
++ "InnoDB: ERROR: error occured during overwriting relay-log.info.\n");
++ } else {
++ fprintf(stderr,
++ "InnoDB: relay-log.info was overwritten.\n");
++ }
++ } else {
++ fprintf(stderr,
++ "InnoDB: InnoDB and relay-log.info are synchronized. InnoDB will not overwrite it.\n");
++ }
++ }
++#endif /* MYSQL_SERVER */
++#endif /* HAVE_REPLICATION */
++
+ innobase_old_blocks_pct = buf_LRU_old_ratio_update(
+ innobase_old_blocks_pct, TRUE);
+
+@@ -2611,6 +2784,25 @@
+ trx_t* trx) /*!< in: transaction handle */
+ {
+ if (trx_is_started(trx)) {
++#ifdef HAVE_REPLICATION
++#ifdef MYSQL_SERVER
++ THD *thd=current_thd;
++
++ if (thd && thd->slave_thread) {
++ /* Update the replication position info inside InnoDB */
++ trx->mysql_master_log_file_name
++ = active_mi->rli.group_master_log_name;
++ trx->mysql_master_log_pos
++ = ((ib_int64_t)active_mi->rli.group_master_log_pos +
++ ((ib_int64_t)active_mi->rli.future_event_relay_log_pos -
++ (ib_int64_t)active_mi->rli.group_relay_log_pos));
++ trx->mysql_relay_log_file_name
++ = active_mi->rli.group_relay_log_name;
++ trx->mysql_relay_log_pos
++ = (ib_int64_t)active_mi->rli.future_event_relay_log_pos;
++ }
++#endif /* MYSQL_SERVER */
++#endif /* HAVE_REPLICATION */
+
+ trx_commit_for_mysql(trx);
+ }
+@@ -10919,6 +11111,12 @@
+ "The common part for InnoDB table spaces.",
+ NULL, NULL, NULL);
+
++static MYSQL_SYSVAR_BOOL(overwrite_relay_log_info, innobase_overwrite_relay_log_info,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "During InnoDB crash recovery on slave overwrite relay-log.info "
++ "to align master log file position if information in InnoDB and relay-log.info is different.",
++ NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Enable InnoDB doublewrite buffer (enabled by default). "
+@@ -11375,6 +11573,7 @@
+ MYSQL_SYSVAR(old_blocks_pct),
+ MYSQL_SYSVAR(old_blocks_time),
+ MYSQL_SYSVAR(open_files),
++ MYSQL_SYSVAR(overwrite_relay_log_info),
+ MYSQL_SYSVAR(rollback_on_timeout),
+ MYSQL_SYSVAR(stats_on_metadata),
+ MYSQL_SYSVAR(stats_sample_pages),
+diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
+--- a/storage/innobase/include/trx0sys.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/trx0sys.h 2010-12-03 15:38:20.321953297 +0900
+@@ -52,6 +52,9 @@
+ extern ib_int64_t trx_sys_mysql_master_log_pos;
+ /* @} */
+
++extern char trx_sys_mysql_relay_log_name[];
++extern ib_int64_t trx_sys_mysql_relay_log_pos;
++
+ /** If this MySQL server uses binary logging, after InnoDB has been inited
+ and if it has done a crash recovery, we store the binlog file name and position
+ here. */
+@@ -293,7 +296,8 @@
+ void
+ trx_sys_update_mysql_binlog_offset(
+ /*===============================*/
+- const char* file_name,/*!< in: MySQL log file name */
++ trx_sysf_t* sys_header,
++ const char* file_name_in,/*!< in: MySQL log file name */
+ ib_int64_t offset, /*!< in: position in that log file */
+ ulint field, /*!< in: offset of the MySQL log info field in
+ the trx sys header */
+@@ -488,6 +492,7 @@
+ @see trx_sys_mysql_master_log_name
+ @see trx_sys_mysql_bin_log_name */
+ #define TRX_SYS_MYSQL_LOG_NAME_LEN 512
++#define TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN 480 /* (500 - 12) is dead line. */
+ /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
+ #define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
+
+@@ -497,6 +502,7 @@
+ /** The offset of the MySQL replication info in the trx system header;
+ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
+ #define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
++#define TRX_SYS_MYSQL_RELAY_LOG_INFO (UNIV_PAGE_SIZE - 1500)
+
+ /** The offset of the MySQL binlog offset info in the trx system header */
+ #define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
+diff -ruN a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
+--- a/storage/innobase/include/trx0trx.h 2010-12-03 15:18:48.894955550 +0900
++++ b/storage/innobase/include/trx0trx.h 2010-12-03 15:38:20.323953416 +0900
+@@ -569,6 +569,21 @@
+ ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field
+ contains the end offset of the binlog
+ entry */
++ const char* mysql_master_log_file_name;
++ /* if the database server is a MySQL
++ replication slave, we have here the
++ master binlog name up to which
++ replication has processed; otherwise
++ this is a pointer to a null
++ character */
++ ib_int64_t mysql_master_log_pos;
++ /* if the database server is a MySQL
++ replication slave, this is the
++ position in the log file up to which
++ replication has processed */
++ const char* mysql_relay_log_file_name;
++ ib_int64_t mysql_relay_log_pos;
++
+ os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
+ with this transaction object */
+ ulint mysql_process_no;/* since in Linux, 'top' reports
+diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
+--- a/storage/innobase/trx/trx0sys.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/trx/trx0sys.c 2010-12-03 15:38:20.325956917 +0900
+@@ -75,13 +75,16 @@
+ file name and position here. */
+ /* @{ */
+ /** Master binlog file name */
+-UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
++UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN];
+ /** Master binlog file position. We have successfully got the updates
+ up to this position. -1 means that no crash recovery was needed, or
+ there was no master log position info inside InnoDB.*/
+ UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
+ /* @} */
+
++UNIV_INTERN char trx_sys_mysql_relay_log_name[TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN];
++UNIV_INTERN ib_int64_t trx_sys_mysql_relay_log_pos = -1;
++
+ /** If this MySQL server uses binary logging, after InnoDB has been inited
+ and if it has done a crash recovery, we store the binlog file name and position
+ here. */
+@@ -683,23 +686,25 @@
+ void
+ trx_sys_update_mysql_binlog_offset(
+ /*===============================*/
+- const char* file_name,/*!< in: MySQL log file name */
++ trx_sysf_t* sys_header,
++ const char* file_name_in,/*!< in: MySQL log file name */
+ ib_int64_t offset, /*!< in: position in that log file */
+ ulint field, /*!< in: offset of the MySQL log info field in
+ the trx sys header */
+ mtr_t* mtr) /*!< in: mtr */
+ {
+- trx_sysf_t* sys_header;
++ const char* file_name;
+
+- if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
++ if (ut_strlen(file_name_in) >= TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN) {
+
+ /* We cannot fit the name to the 512 bytes we have reserved */
++ /* -> To store relay log file information, file_name must fit to the 480 bytes */
+
+- return;
++ file_name = "";
++ } else {
++ file_name = file_name_in;
+ }
+
+- sys_header = trx_sysf_get(mtr);
+-
+ if (mach_read_from_4(sys_header + field
+ + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
+ != TRX_SYS_MYSQL_LOG_MAGIC_N) {
+@@ -821,13 +826,26 @@
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME);
++
++ fprintf(stderr,
++ "InnoDB: and relay log file\n"
++ "InnoDB: position %lu %lu, file name %s\n",
++ (ulong) mach_read_from_4(sys_header
++ + TRX_SYS_MYSQL_RELAY_LOG_INFO
++ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
++ (ulong) mach_read_from_4(sys_header
++ + TRX_SYS_MYSQL_RELAY_LOG_INFO
++ + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
++ sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++ + TRX_SYS_MYSQL_LOG_NAME);
++
+ /* Copy the master log position info to global variables we can
+ use in ha_innobase.cc to initialize glob_mi to right values */
+
+ ut_memcpy(trx_sys_mysql_master_log_name,
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_NAME,
+- TRX_SYS_MYSQL_LOG_NAME_LEN);
++ TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
+
+ trx_sys_mysql_master_log_pos
+ = (((ib_int64_t) mach_read_from_4(
+@@ -836,6 +854,19 @@
+ + ((ib_int64_t) mach_read_from_4(
+ sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+ + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
++
++ ut_memcpy(trx_sys_mysql_relay_log_name,
++ sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++ + TRX_SYS_MYSQL_LOG_NAME,
++ TRX_SYS_MYSQL_MASTER_LOG_NAME_LEN);
++
++ trx_sys_mysql_relay_log_pos
++ = (((ib_int64_t) mach_read_from_4(
++ sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++ + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
++ + ((ib_int64_t) mach_read_from_4(
++ sys_header + TRX_SYS_MYSQL_RELAY_LOG_INFO
++ + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
+ mtr_commit(&mtr);
+ }
+
+diff -ruN a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
+--- a/storage/innobase/trx/trx0trx.c 2010-12-03 15:37:45.549028990 +0900
++++ b/storage/innobase/trx/trx0trx.c 2010-12-03 15:38:20.328957217 +0900
+@@ -135,6 +135,10 @@
+
+ trx->mysql_log_file_name = NULL;
+ trx->mysql_log_offset = 0;
++ trx->mysql_master_log_file_name = "";
++ trx->mysql_master_log_pos = 0;
++ trx->mysql_relay_log_file_name = "";
++ trx->mysql_relay_log_pos = 0;
+
+ mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
+
+@@ -732,6 +736,7 @@
+ trx_rseg_t* rseg;
+ trx_undo_t* undo;
+ mtr_t mtr;
++ trx_sysf_t* sys_header = NULL;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+@@ -787,13 +792,35 @@
+
+ if (trx->mysql_log_file_name
+ && trx->mysql_log_file_name[0] != '\0') {
++ if (!sys_header) {
++ sys_header = trx_sysf_get(&mtr);
++ }
+ trx_sys_update_mysql_binlog_offset(
++ sys_header,
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+ trx->mysql_log_file_name = NULL;
+ }
+
++ if (trx->mysql_master_log_file_name[0] != '\0') {
++ /* This database server is a MySQL replication slave */
++ if (!sys_header) {
++ sys_header = trx_sysf_get(&mtr);
++ }
++ trx_sys_update_mysql_binlog_offset(
++ sys_header,
++ trx->mysql_relay_log_file_name,
++ trx->mysql_relay_log_pos,
++ TRX_SYS_MYSQL_RELAY_LOG_INFO, &mtr);
++ trx_sys_update_mysql_binlog_offset(
++ sys_header,
++ trx->mysql_master_log_file_name,
++ trx->mysql_master_log_pos,
++ TRX_SYS_MYSQL_MASTER_LOG_INFO, &mtr);
++ trx->mysql_master_log_file_name = "";
++ }
++
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
--- /dev/null
+# name : innodb_pass_corrupt_table.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
+--- a/storage/innobase/btr/btr0btr.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0btr.c 2010-12-04 15:38:18.110513593 +0900
+@@ -137,6 +137,12 @@
+ root_page_no = dict_index_get_page(index);
+
+ block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
++
++ if (srv_pass_corrupt_table && !block) {
++ return(0);
++ }
++ ut_a(block);
++
+ ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
+ == dict_table_is_comp(index->table));
+ #ifdef UNIV_BTR_DEBUG
+@@ -422,6 +428,12 @@
+
+ root = btr_root_get(index, &mtr);
+
++ if (srv_pass_corrupt_table && !root) {
++ mtr_commit(&mtr);
++ return(0);
++ }
++ ut_a(root);
++
+ if (flag == BTR_N_LEAF_PAGES) {
+ seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
+
+@@ -869,6 +881,13 @@
+ mtr_start(&mtr);
+
+ root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
++
++ if (srv_pass_corrupt_table && !root) {
++ mtr_commit(&mtr);
++ return;
++ }
++ ut_a(root);
++
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + root, space));
+@@ -891,6 +910,12 @@
+ mtr_start(&mtr);
+
+ root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
++
++ if (srv_pass_corrupt_table && !root) {
++ mtr_commit(&mtr);
++ return;
++ }
++ ut_a(root);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + root, space));
+@@ -924,6 +949,11 @@
+
+ block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
+
++ if (srv_pass_corrupt_table && !block) {
++ return;
++ }
++ ut_a(block);
++
+ btr_search_drop_page_hash_index(block);
+
+ header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c 2010-12-03 17:30:16.239038936 +0900
++++ b/storage/innobase/btr/btr0cur.c 2010-12-04 15:38:18.114551906 +0900
+@@ -238,6 +238,11 @@
+ case BTR_MODIFY_LEAF:
+ mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
+ get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
++
++ if (srv_pass_corrupt_table && !get_block) {
++ return;
++ }
++ ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+ #endif /* UNIV_BTR_DEBUG */
+@@ -251,6 +256,11 @@
+ get_block = btr_block_get(space, zip_size,
+ left_page_no,
+ RW_X_LATCH, mtr);
++
++ if (srv_pass_corrupt_table && !get_block) {
++ return;
++ }
++ ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame)
+ == page_is_comp(page));
+@@ -262,6 +272,11 @@
+
+ get_block = btr_block_get(space, zip_size, page_no,
+ RW_X_LATCH, mtr);
++
++ if (srv_pass_corrupt_table && !get_block) {
++ return;
++ }
++ ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+ #endif /* UNIV_BTR_DEBUG */
+@@ -273,6 +288,11 @@
+ get_block = btr_block_get(space, zip_size,
+ right_page_no,
+ RW_X_LATCH, mtr);
++
++ if (srv_pass_corrupt_table && !get_block) {
++ return;
++ }
++ ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame)
+ == page_is_comp(page));
+@@ -294,6 +314,11 @@
+ get_block = btr_block_get(space, zip_size,
+ left_page_no, mode, mtr);
+ cursor->left_block = get_block;
++
++ if (srv_pass_corrupt_table && !get_block) {
++ return;
++ }
++ ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame)
+ == page_is_comp(page));
+@@ -304,6 +329,11 @@
+ }
+
+ get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
++
++ if (srv_pass_corrupt_table && !get_block) {
++ return;
++ }
++ ut_a(get_block);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
+ #endif /* UNIV_BTR_DEBUG */
+@@ -576,6 +606,19 @@
+ file, line, mtr);
+
+ if (block == NULL) {
++ if (srv_pass_corrupt_table
++ && buf_mode != BUF_GET_IF_IN_POOL
++ && buf_mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
++ page_cursor->block = 0;
++ page_cursor->rec = 0;
++ if (estimate) {
++ cursor->path_arr->nth_rec = ULINT_UNDEFINED;
++ }
++ goto func_exit;
++ }
++ ut_a(buf_mode == BUF_GET_IF_IN_POOL
++ || buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
++
+ /* This must be a search to perform an insert/delete
+ mark/ delete; try using the insert/delete buffer */
+
+@@ -650,6 +693,16 @@
+ block->check_index_page_at_flush = TRUE;
+ page = buf_block_get_frame(block);
+
++ if (srv_pass_corrupt_table && !page) {
++ page_cursor->block = 0;
++ page_cursor->rec = 0;
++ if (estimate) {
++ cursor->path_arr->nth_rec = ULINT_UNDEFINED;
++ }
++ goto func_exit;
++ }
++ ut_a(page);
++
+ if (rw_latch != RW_NO_LATCH) {
+ #ifdef UNIV_ZIP_DEBUG
+ const page_zip_des_t* page_zip
+@@ -854,6 +907,17 @@
+ RW_NO_LATCH, NULL, BUF_GET,
+ file, line, mtr);
+ page = buf_block_get_frame(block);
++
++ if (srv_pass_corrupt_table && !page) {
++ page_cursor->block = 0;
++ page_cursor->rec = 0;
++ if (estimate) {
++ cursor->path_arr->nth_rec = ULINT_UNDEFINED;
++ }
++ break;
++ }
++ ut_a(page);
++
+ ut_ad(index->id == btr_page_get_index_id(page));
+
+ block->check_index_page_at_flush = TRUE;
+@@ -974,6 +1038,14 @@
+ RW_NO_LATCH, NULL, BUF_GET,
+ file, line, mtr);
+ page = buf_block_get_frame(block);
++
++ if (srv_pass_corrupt_table && !page) {
++ page_cursor->block = 0;
++ page_cursor->rec = 0;
++ break;
++ }
++ ut_a(page);
++
+ ut_ad(index->id == btr_page_get_index_id(page));
+
+ if (height == ULINT_UNDEFINED) {
+@@ -1288,6 +1360,12 @@
+ *big_rec = NULL;
+
+ block = btr_cur_get_block(cursor);
++
++ if (srv_pass_corrupt_table && !block) {
++ return(DB_CORRUPTION);
++ }
++ ut_a(block);
++
+ page = buf_block_get_frame(block);
+ index = cursor->index;
+ zip_size = buf_block_get_zip_size(block);
+@@ -3022,6 +3100,11 @@
+
+ block = btr_cur_get_block(cursor);
+
++ if (srv_pass_corrupt_table && !block) {
++ return(DB_CORRUPTION);
++ }
++ ut_a(block);
++
+ ut_ad(page_is_leaf(buf_block_get_frame(block)));
+
+ rec = btr_cur_get_rec(cursor);
+@@ -3826,6 +3909,11 @@
+
+ page = btr_cur_get_page(&cursor);
+
++ if (srv_pass_corrupt_table && !page) {
++ break;
++ }
++ ut_a(page);
++
+ supremum = page_get_supremum_rec(page);
+ if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) {
+ /* the cursor should be the first record of the page. */
+diff -ruN a/storage/innobase/btr/btr0pcur.c b/storage/innobase/btr/btr0pcur.c
+--- a/storage/innobase/btr/btr0pcur.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0pcur.c 2010-12-04 15:38:18.116563877 +0900
+@@ -32,7 +32,7 @@
+ #include "ut0byte.h"
+ #include "rem0cmp.h"
+ #include "trx0trx.h"
+-
++#include "srv0srv.h"
+ /**************************************************************//**
+ Allocates memory for a persistent cursor object and initializes the cursor.
+ @return own: persistent cursor */
+@@ -102,6 +102,12 @@
+ ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+ block = btr_pcur_get_block(cursor);
++
++ if (srv_pass_corrupt_table && !block) {
++ return;
++ }
++ ut_a(block);
++
+ index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
+
+ page_cursor = btr_pcur_get_page_cur(cursor);
+@@ -419,6 +425,15 @@
+ next_block = btr_block_get(space, zip_size, next_page_no,
+ cursor->latch_mode, mtr);
+ next_page = buf_block_get_frame(next_block);
++
++ if (srv_pass_corrupt_table && !next_page) {
++ btr_leaf_page_release(btr_pcur_get_block(cursor),
++ cursor->latch_mode, mtr);
++ btr_pcur_get_page_cur(cursor)->block = 0;
++ btr_pcur_get_page_cur(cursor)->rec = 0;
++ return;
++ }
++ ut_a(next_page);
+ #ifdef UNIV_BTR_DEBUG
+ ut_a(page_is_comp(next_page) == page_is_comp(page));
+ ut_a(btr_page_get_prev(next_page, mtr)
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:49:59.166193407 +0900
++++ b/storage/innobase/btr/btr0sea.c 2010-12-04 15:38:18.118548961 +0900
+@@ -42,7 +42,7 @@
+ #include "btr0pcur.h"
+ #include "btr0btr.h"
+ #include "ha0ha.h"
+-
++#include "srv0srv.h"
+ /** Flag: has the search system been enabled?
+ Protected by btr_search_latch and btr_search_enabled_mutex. */
+ UNIV_INTERN char btr_search_enabled = TRUE;
+@@ -607,6 +607,11 @@
+
+ block = btr_cur_get_block(cursor);
+
++ if (srv_pass_corrupt_table && !block) {
++ return;
++ }
++ ut_a(block);
++
+ /* NOTE that the following two function calls do NOT protect
+ info or block->n_fields etc. with any semaphore, to save CPU time!
+ We cannot assume the fields are consistent when we return from
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-04 15:37:50.554565654 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-04 15:38:18.119548922 +0900
+@@ -52,6 +52,7 @@
+ #include "log0recv.h"
+ #include "page0zip.h"
+ #include "trx0trx.h"
++#include "srv0start.h"
+
+ /* prototypes for new functions added to ha_innodb.cc */
+ trx_t* innobase_get_trx();
+@@ -1131,6 +1132,11 @@
+ ready = buf_flush_ready_for_replace(&block->page);
+ mutex_exit(&block->mutex);
+
++ if (block->page.is_corrupt) {
++ /* corrupt page may remain, it can be skipped */
++ break;
++ }
++
+ if (!ready) {
+
+ return(block);
+@@ -2476,6 +2482,14 @@
+ return(NULL);
+ }
+
++ if (srv_pass_corrupt_table) {
++ if (bpage->is_corrupt) {
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
++ return(NULL);
++ }
++ }
++ ut_a(!(bpage->is_corrupt));
++
+ block_mutex = buf_page_get_mutex_enter(bpage);
+
+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+@@ -3022,6 +3036,14 @@
+ return(NULL);
+ }
+
++ if (srv_pass_corrupt_table) {
++ if (block->page.is_corrupt) {
++ mutex_exit(block_mutex);
++ return(NULL);
++ }
++ }
++ ut_a(!(block->page.is_corrupt));
++
+ switch (buf_block_get_state(block)) {
+ buf_page_t* bpage;
+ ibool success;
+@@ -3689,6 +3711,7 @@
+ bpage->newest_modification = 0;
+ bpage->oldest_modification = 0;
+ HASH_INVALIDATE(bpage, hash);
++ bpage->is_corrupt = FALSE;
+ #ifdef UNIV_DEBUG_FILE_ACCESSES
+ bpage->file_page_was_freed = FALSE;
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+@@ -4199,7 +4222,8 @@
+ void
+ buf_page_io_complete(
+ /*=================*/
+- buf_page_t* bpage) /*!< in: pointer to the block in question */
++ buf_page_t* bpage, /*!< in: pointer to the block in question */
++ trx_t* trx)
+ {
+ enum buf_io_fix io_type;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+@@ -4278,6 +4302,7 @@
+ (ulong) bpage->offset);
+ }
+
++ if (!srv_pass_corrupt_table || !bpage->is_corrupt) {
+ /* From version 3.23.38 up we store the page checksum
+ to the 4 first bytes of the page end lsn field */
+
+@@ -4319,6 +4344,19 @@
+ REFMAN "forcing-recovery.html\n"
+ "InnoDB: about forcing recovery.\n", stderr);
+
++ if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space)
++ && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
++ fprintf(stderr,
++ "InnoDB: space %u will be treated as corrupt.\n",
++ bpage->space);
++ fil_space_set_corrupt(bpage->space);
++ if (trx && trx->dict_operation_lock_mode == 0) {
++ dict_table_set_corrupt_by_space(bpage->space, TRUE);
++ } else {
++ dict_table_set_corrupt_by_space(bpage->space, FALSE);
++ }
++ bpage->is_corrupt = TRUE;
++ } else
+ if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+ fputs("InnoDB: Ending processing because of"
+ " a corrupt database page.\n",
+@@ -4326,6 +4364,7 @@
+ exit(1);
+ }
+ }
++ } /**/
+
+ if (recv_recovery_is_on()) {
+ /* Pages must be uncompressed for crash recovery. */
+@@ -4335,8 +4374,11 @@
+
+ if (uncompressed && !recv_no_ibuf_operations) {
+ ibuf_merge_or_delete_for_page(
++ /* Delete possible entries, if bpage is_corrupt */
++ (srv_pass_corrupt_table && bpage->is_corrupt) ? NULL :
+ (buf_block_t*) bpage, bpage->space,
+ bpage->offset, buf_page_get_zip_size(bpage),
++ (srv_pass_corrupt_table && bpage->is_corrupt) ? FALSE :
+ TRUE);
+ }
+ }
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-04 15:37:50.557553380 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:41:09.784467585 +0900
+@@ -193,12 +193,19 @@
+ ((buf_block_t*) bpage)->frame, bpage, trx);
+ }
+ thd_wait_end(NULL);
++
++ if (srv_pass_corrupt_table) {
++ if (*err != DB_SUCCESS) {
++ bpage->is_corrupt = TRUE;
++ }
++ } else {
+ ut_a(*err == DB_SUCCESS);
++ }
+
+ if (sync) {
+ /* The i/o is already completed when we arrive from
+ fil_read */
+- buf_page_io_complete(bpage);
++ buf_page_io_complete(bpage, trx);
+ }
+
+ return(1);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c 2010-12-03 17:30:16.248987063 +0900
++++ b/storage/innobase/dict/dict0dict.c 2010-12-04 15:45:23.808513973 +0900
+@@ -54,6 +54,7 @@
+ #include "row0merge.h"
+ #include "m_ctype.h" /* my_isspace() */
+ #include "ha_prototypes.h" /* innobase_strcasecmp() */
++#include "srv0start.h" /* SRV_LOG_SPACE_FIRST_ID */
+
+ #include <ctype.h>
+
+@@ -749,7 +750,7 @@
+
+ mutex_exit(&(dict_sys->mutex));
+
+- if (table != NULL) {
++ if (table != NULL && !table->is_corrupt) {
+ /* If table->ibd_file_missing == TRUE, this will
+ print an error message and return without doing
+ anything. */
+@@ -1290,7 +1291,7 @@
+ + dict_sys->size) > srv_dict_size_limit ) {
+ prev_table = UT_LIST_GET_PREV(table_LRU, table);
+
+- if (table == self || table->n_mysql_handles_opened)
++ if (table == self || table->n_mysql_handles_opened || table->is_corrupt)
+ goto next_loop;
+
+ cached_foreign_tables = 0;
+@@ -4314,6 +4315,12 @@
+ heap = mem_heap_create(1000);
+
+ while (index) {
++ if (table->is_corrupt) {
++ ut_a(srv_pass_corrupt_table);
++ mem_heap_free(heap);
++ return(FALSE);
++ }
++
+ size = btr_get_size(index, BTR_TOTAL_SIZE);
+
+ index->stat_index_size = size;
+@@ -4433,6 +4440,12 @@
+ heap = mem_heap_create(1000);
+
+ while (index) {
++ if (table->is_corrupt) {
++ ut_a(srv_pass_corrupt_table);
++ mem_heap_free(heap);
++ return;
++ }
++
+ /*===========================================*/
+ {
+ dict_table_t* sys_stats;
+@@ -4598,6 +4611,12 @@
+ || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+ && dict_index_is_clust(index)))) {
+ ulint size;
++
++ if (table->is_corrupt) {
++ ut_a(srv_pass_corrupt_table);
++ return;
++ }
++
+ size = btr_get_size(index, BTR_TOTAL_SIZE);
+
+ index->stat_index_size = size;
+@@ -5318,4 +5337,42 @@
+ rw_lock_free(&dict_table_stats_latches[i]);
+ }
+ }
++
++/*************************************************************************
++set is_corrupt flag by space_id*/
++
++void
++dict_table_set_corrupt_by_space(
++/*============================*/
++ ulint space_id,
++ ibool need_mutex)
++{
++ dict_table_t* table;
++ ibool found = FALSE;
++
++ ut_a(!trx_sys_sys_space(space_id) && space_id < SRV_LOG_SPACE_FIRST_ID);
++
++ if (need_mutex)
++ mutex_enter(&(dict_sys->mutex));
++
++ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
++
++ while (table) {
++ if (table->space == space_id) {
++ table->is_corrupt = TRUE;
++ found = TRUE;
++ }
++
++ table = UT_LIST_GET_NEXT(table_LRU, table);
++ }
++
++ if (need_mutex)
++ mutex_exit(&(dict_sys->mutex));
++
++ if (!found) {
++ fprintf(stderr, "InnoDB: space to be marked as "
++ "crashed was not found for id %lu.\n",
++ (ulong) space_id);
++ }
++}
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c
+--- a/storage/innobase/dict/dict0mem.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0mem.c 2010-12-04 15:38:18.126549463 +0900
+@@ -92,6 +92,8 @@
+ /* The number of transactions that are either waiting on the
+ AUTOINC lock or have been granted the lock. */
+ table->n_waiting_or_granted_auto_inc_locks = 0;
++
++ table->is_corrupt = FALSE;
+ #endif /* !UNIV_HOTBACKUP */
+
+ ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-04 15:37:50.564551587 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:38:18.128549252 +0900
+@@ -233,6 +233,7 @@
+ file we have written to */
+ ibool is_in_unflushed_spaces; /*!< TRUE if this space is
+ currently in unflushed_spaces */
++ ibool is_corrupt;
+ UT_LIST_NODE_T(fil_space_t) space_list;
+ /*!< list of all spaces */
+ ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
+@@ -1263,6 +1264,8 @@
+ ut_fold_string(name), space);
+ space->is_in_unflushed_spaces = FALSE;
+
++ space->is_corrupt = FALSE;
++
+ UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
+
+ mutex_exit(&fil_system->mutex);
+@@ -4917,6 +4920,22 @@
+ ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
+
++ if (srv_pass_corrupt_table && space->is_corrupt) {
++ /* should ignore i/o for the crashed space */
++ mutex_enter(&fil_system->mutex);
++ fil_node_complete_io(node, fil_system, type);
++ mutex_exit(&fil_system->mutex);
++ if (mode == OS_AIO_NORMAL) {
++ ut_a(space->purpose == FIL_TABLESPACE);
++ buf_page_io_complete(message, trx);
++ }
++ if (type == OS_FILE_READ) {
++ return(DB_TABLESPACE_DELETED);
++ } else {
++ return(DB_SUCCESS);
++ }
++ } else {
++ ut_a(!space->is_corrupt);
+ #ifdef UNIV_HOTBACKUP
+ /* In ibbackup do normal i/o, not aio */
+ if (type == OS_FILE_READ) {
+@@ -4931,6 +4950,8 @@
+ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
+ offset_low, offset_high, len, node, message, trx);
+ #endif
++ } /**/
++
+ ut_a(ret);
+
+ if (mode == OS_AIO_SYNC) {
+@@ -5080,7 +5101,7 @@
+
+ if (fil_node->space->purpose == FIL_TABLESPACE) {
+ srv_set_io_thread_op_info(segment, "complete io for buf page");
+- buf_page_io_complete(message);
++ buf_page_io_complete(message, NULL);
+ } else {
+ srv_set_io_thread_op_info(segment, "complete io for log");
+ log_io_complete(message);
+@@ -5434,3 +5455,46 @@
+ return 0;
+ }
+ }
++
++/*************************************************************************
++functions to access is_corrupt flag of fil_space_t*/
++
++ibool
++fil_space_is_corrupt(
++/*=================*/
++ ulint space_id)
++{
++ fil_space_t* space;
++ ibool ret = FALSE;
++
++ mutex_enter(&fil_system->mutex);
++
++ space = fil_space_get_by_id(space_id);
++
++ if (space && space->is_corrupt) {
++ ret = TRUE;
++ }
++
++ mutex_exit(&fil_system->mutex);
++
++ return(ret);
++}
++
++void
++fil_space_set_corrupt(
++/*==================*/
++ ulint space_id)
++{
++ fil_space_t* space;
++
++ mutex_enter(&fil_system->mutex);
++
++ space = fil_space_get_by_id(space_id);
++
++ if (space) {
++ space->is_corrupt = TRUE;
++ }
++
++ mutex_exit(&fil_system->mutex);
++}
++
+diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
+--- a/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:37:50.569480615 +0900
++++ b/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:38:18.131550103 +0900
+@@ -369,6 +369,12 @@
+ ut_ad(id || !zip_size);
+
+ block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
++
++ if (srv_pass_corrupt_table && !block) {
++ return(0);
++ }
++ ut_a(block);
++
+ header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+@@ -787,6 +793,12 @@
+ fsp_header_t* sp_header;
+
+ block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
++
++ if (srv_pass_corrupt_table && !block) {
++ return(0);
++ }
++ ut_a(block);
++
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
+@@ -1866,6 +1878,11 @@
+ {
+ fseg_inode_t* inode;
+
++ if (srv_pass_corrupt_table && !page) {
++ return(ULINT_UNDEFINED);
++ }
++ ut_a(page);
++
+ for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+
+ inode = fsp_seg_inode_page_get_nth_inode(
+@@ -1979,6 +1996,11 @@
+
+ page = buf_block_get_frame(block);
+
++ if (srv_pass_corrupt_table && !page) {
++ return(0);
++ }
++ ut_a(page);
++
+ n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
+
+ ut_a(n != ULINT_UNDEFINED);
+@@ -2072,6 +2094,11 @@
+
+ inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
+
++ if (srv_pass_corrupt_table && !inode) {
++ return(0);
++ }
++ ut_a(inode);
++
+ if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
+
+ inode = NULL;
+@@ -2098,7 +2125,7 @@
+ {
+ fseg_inode_t* inode
+ = fseg_inode_try_get(header, space, zip_size, mtr);
+- ut_a(inode);
++ ut_a(srv_pass_corrupt_table || inode);
+ return(inode);
+ }
+
+@@ -3304,6 +3331,11 @@
+
+ descr = xdes_get_descriptor(space, zip_size, page, mtr);
+
++ if (srv_pass_corrupt_table && !descr) {
++ /* The page may be corrupt. pass it. */
++ return;
++ }
++
+ ut_a(descr);
+ if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
+ fputs("InnoDB: Dump of the tablespace extent descriptor: ",
+@@ -3551,6 +3583,11 @@
+
+ descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
+
++ if (srv_pass_corrupt_table && !descr) {
++ /* The page may be corrupt. pass it. */
++ return(TRUE);
++ }
++
+ /* Check that the header resides on a page which has not been
+ freed yet */
+
+@@ -3635,6 +3672,12 @@
+
+ inode = fseg_inode_get(header, space, zip_size, mtr);
+
++ if (srv_pass_corrupt_table && !inode) {
++ /* ignore the corruption */
++ return(TRUE);
++ }
++ ut_a(inode);
++
+ descr = fseg_get_first_extent(inode, space, zip_size, mtr);
+
+ if (descr != NULL) {
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:37:50.578486593 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:38:18.137549396 +0900
+@@ -3926,6 +3926,12 @@
+ DBUG_RETURN(1);
+ }
+
++ if (share->ib_table && share->ib_table->is_corrupt) {
++ free_share(share);
++
++ DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
++ }
++
+ /* Create buffers for packing the fields of a record. Why
+ table->reclength did not work here? Obviously, because char
+ fields when packed actually became 1 byte longer, when we also
+@@ -3953,6 +3959,19 @@
+ /* Get pointer to a table object in InnoDB dictionary cache */
+ ib_table = dict_table_get(norm_name, TRUE);
+
++ if (ib_table && ib_table->is_corrupt) {
++ free_share(share);
++ my_free(upd_buff);
++
++ DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
++ }
++
++ if (share->ib_table) {
++ ut_a(share->ib_table == ib_table);
++ } else {
++ share->ib_table = ib_table;
++ }
++
+ if (NULL == ib_table) {
+ if (is_part && retries < 10) {
+ ++retries;
+@@ -5117,6 +5136,10 @@
+
+ ha_statistic_increment(&SSV::ha_write_count);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
+ table->timestamp_field->set_time();
+
+@@ -5334,6 +5357,10 @@
+ func_exit:
+ innobase_active_small();
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ DBUG_RETURN(error_result);
+ }
+
+@@ -5510,6 +5537,10 @@
+
+ ha_statistic_increment(&SSV::ha_update_count);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
+ table->timestamp_field->set_time();
+
+@@ -5599,6 +5630,10 @@
+
+ innobase_active_small();
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ DBUG_RETURN(error);
+ }
+
+@@ -5620,6 +5655,10 @@
+
+ ha_statistic_increment(&SSV::ha_delete_count);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ if (!prebuilt->upd_node) {
+ row_get_prebuilt_update_vector(prebuilt);
+ }
+@@ -5646,6 +5685,10 @@
+
+ innobase_active_small();
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ DBUG_RETURN(error);
+ }
+
+@@ -5885,6 +5928,10 @@
+
+ ha_statistic_increment(&SSV::ha_read_key_count);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ index = prebuilt->index;
+
+ if (UNIV_UNLIKELY(index == NULL)) {
+@@ -5950,6 +5997,10 @@
+ ret = DB_UNSUPPORTED;
+ }
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ switch (ret) {
+ case DB_SUCCESS:
+ error = 0;
+@@ -6060,6 +6111,10 @@
+ {
+ DBUG_ENTER("change_active_index");
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ ut_ad(user_thd == ha_thd());
+ ut_a(prebuilt->trx == thd_to_trx(user_thd));
+
+@@ -6150,6 +6205,10 @@
+
+ DBUG_ENTER("general_fetch");
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ ut_a(prebuilt->trx == thd_to_trx(user_thd));
+
+ innodb_srv_conc_enter_innodb(prebuilt->trx);
+@@ -6159,6 +6218,10 @@
+
+ innodb_srv_conc_exit_innodb(prebuilt->trx);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ switch (ret) {
+ case DB_SUCCESS:
+ error = 0;
+@@ -7424,10 +7487,18 @@
+
+ update_thd(ha_thd());
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ /* Truncate the table in InnoDB */
+
+ error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ error = convert_error_code_to_mysql(error, prebuilt->table->flags,
+ NULL);
+
+@@ -7940,6 +8011,16 @@
+ return(ranges + (double) rows / (double) total_rows * time_for_scan);
+ }
+
++UNIV_INTERN
++bool
++ha_innobase::is_corrupt() const
++{
++ if (share->ib_table)
++ return ((bool)share->ib_table->is_corrupt);
++ else
++ return (FALSE);
++}
++
+ /*********************************************************************//**
+ Calculates the key number used inside MySQL for an Innobase index. We will
+ first check the "index translation table" for a match of the index to get
+@@ -8058,7 +8139,7 @@
+ ib_table = prebuilt->table;
+
+ if (flag & HA_STATUS_TIME) {
+- if (called_from_analyze || innobase_stats_on_metadata) {
++ if ((called_from_analyze || innobase_stats_on_metadata) && !share->ib_table->is_corrupt) {
+ /* In sql_show we call with this flag: update
+ then statistics so that they are up-to-date */
+
+@@ -8349,10 +8430,18 @@
+ THD* thd, /*!< in: connection thread handle */
+ HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
+ {
++ if (share->ib_table->is_corrupt) {
++ return(HA_ADMIN_CORRUPT);
++ }
++
+ /* Simply call ::info() with all the flags */
+ info_low(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
+ true /* called from analyze */);
+
++ if (share->ib_table->is_corrupt) {
++ return(HA_ADMIN_CORRUPT);
++ }
++
+ return(0);
+ }
+
+@@ -8534,6 +8623,10 @@
+ my_error(ER_QUERY_INTERRUPTED, MYF(0));
+ }
+
++ if (share->ib_table->is_corrupt) {
++ return(HA_ADMIN_CORRUPT);
++ }
++
+ DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
+ }
+
+@@ -9304,6 +9397,10 @@
+
+ update_thd(thd);
+
++ if (share->ib_table->is_corrupt) {
++ DBUG_RETURN(HA_ERR_CRASHED);
++ }
++
+ if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+@@ -11722,6 +11819,14 @@
+ "0 (the default) disables automatic dumps.",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
++static MYSQL_SYSVAR_ULONG(pass_corrupt_table, srv_pass_corrupt_table,
++ PLUGIN_VAR_RQCMDARG,
++ "Pass corruptions of user tables as 'corrupt table' instead of not crashing itself, "
++ "when used with file_per_table. "
++ "All file io for the datafile after detected as corrupt are disabled, "
++ "except for the deletion.",
++ NULL, NULL, 0, 0, 1, 0);
++
+ static struct st_mysql_sys_var* innobase_system_variables[]= {
+ MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(autoextend_increment),
+@@ -11809,6 +11914,7 @@
+ MYSQL_SYSVAR(auto_lru_dump),
+ MYSQL_SYSVAR(purge_threads),
+ MYSQL_SYSVAR(purge_batch_size),
++ MYSQL_SYSVAR(pass_corrupt_table),
+ NULL
+ };
+
+diff -ruN a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
+--- a/storage/innobase/handler/ha_innodb.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/handler/ha_innodb.h 2010-12-04 15:38:18.159588579 +0900
+@@ -52,6 +52,7 @@
+ innodb_idx_translate_t idx_trans_tbl; /*!< index translation
+ table between MySQL and
+ Innodb */
++ dict_table_t* ib_table;
+ } INNOBASE_SHARE;
+
+
+@@ -135,6 +136,7 @@
+ int close(void);
+ double scan_time();
+ double read_time(uint index, uint ranges, ha_rows rows);
++ bool is_corrupt() const;
+
+ int write_row(uchar * buf);
+ int update_row(const uchar * old_data, uchar * new_data);
+diff -ruN a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
+--- a/storage/innobase/include/btr0btr.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/btr0btr.ic 2010-12-04 15:38:18.162515035 +0900
+@@ -28,7 +28,7 @@
+ #include "mtr0mtr.h"
+ #include "mtr0log.h"
+ #include "page0zip.h"
+-
++#include "srv0srv.h"
+ #define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
+ (not really a hard limit).
+ Used in debug assertions
+@@ -55,7 +55,9 @@
+ block = buf_page_get_gen(space, zip_size, page_no, mode,
+ NULL, BUF_GET, file, line, mtr);
+
+- if (mode != RW_NO_LATCH) {
++ ut_a(srv_pass_corrupt_table || block);
++
++ if (block && mode != RW_NO_LATCH) {
+
+ buf_block_dbg_add_level(block, SYNC_TREE_NODE);
+ }
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:49:59.218956083 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-04 15:38:18.164513667 +0900
+@@ -913,7 +913,7 @@
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ __attribute__((pure));
+ #else /* UNIV_DEBUG */
+-# define buf_block_get_frame(block) (block)->frame
++# define buf_block_get_frame(block) (block ? (block)->frame : 0)
+ #endif /* UNIV_DEBUG */
+ /*********************************************************************//**
+ Gets the space id of a block.
+@@ -1045,7 +1045,8 @@
+ void
+ buf_page_io_complete(
+ /*=================*/
+- buf_page_t* bpage); /*!< in: pointer to the block in question */
++ buf_page_t* bpage, /*!< in: pointer to the block in question */
++ trx_t* trx);
+ /********************************************************************//**
+ Calculates a folded value of a file page address to use in the page hash
+ table.
+@@ -1360,6 +1361,7 @@
+ 0 if the block was never accessed
+ in the buffer pool */
+ /* @} */
++ ibool is_corrupt;
+ # ifdef UNIV_DEBUG_FILE_ACCESSES
+ ibool file_page_was_freed;
+ /*!< this is set to TRUE when fsp
+diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
+--- a/storage/innobase/include/buf0buf.ic 2010-12-03 15:49:59.221956024 +0900
++++ b/storage/innobase/include/buf0buf.ic 2010-12-04 15:38:18.167513925 +0900
+@@ -34,7 +34,7 @@
+ #include "buf0flu.h"
+ #include "buf0lru.h"
+ #include "buf0rea.h"
+-
++#include "srv0srv.h"
+ /*********************************************************************//**
+ Gets the current size of buffer buf_pool in bytes.
+ @return size in bytes */
+@@ -617,6 +617,12 @@
+ /*================*/
+ const buf_block_t* block) /*!< in: pointer to the control block */
+ {
++ ut_a(srv_pass_corrupt_table || block);
++
++ if (srv_pass_corrupt_table && !block) {
++ return(0);
++ }
++
+ ut_ad(block);
+
+ switch (buf_block_get_state(block)) {
+diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
+--- a/storage/innobase/include/dict0dict.h 2010-12-03 17:30:16.306955940 +0900
++++ b/storage/innobase/include/dict0dict.h 2010-12-04 15:38:18.169513750 +0900
+@@ -1226,6 +1226,15 @@
+ dict_close(void);
+ /*============*/
+
++/*************************************************************************
++set is_corrupt flag by space_id*/
++
++void
++dict_table_set_corrupt_by_space(
++/*============================*/
++ ulint space_id,
++ ibool need_mutex);
++
+ #ifndef UNIV_NONINL
+ #include "dict0dict.ic"
+ #endif
+diff -ruN a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
+--- a/storage/innobase/include/dict0mem.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0mem.h 2010-12-04 15:38:18.171513956 +0900
+@@ -595,6 +595,7 @@
+ the AUTOINC lock on this table. */
+ /* @} */
+ /*----------------------*/
++ ibool is_corrupt;
+ #endif /* !UNIV_HOTBACKUP */
+
+ #ifdef UNIV_DEBUG
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h 2010-12-04 15:35:29.175520016 +0900
++++ b/storage/innobase/include/fil0fil.h 2010-12-04 15:38:18.172483391 +0900
+@@ -757,6 +757,19 @@
+ fil_system_hash_nodes(void);
+ /*========================*/
+
++/*************************************************************************
++functions to access is_corrupt flag of fil_space_t*/
++
++ibool
++fil_space_is_corrupt(
++/*=================*/
++ ulint space_id);
++
++void
++fil_space_set_corrupt(
++/*==================*/
++ ulint space_id);
++
+ typedef struct fil_space_struct fil_space_t;
+
+ #endif
+diff -ruN a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
+--- a/storage/innobase/include/fut0fut.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/fut0fut.ic 2010-12-04 15:38:18.174481728 +0900
+@@ -23,6 +23,7 @@
+ Created 12/13/1995 Heikki Tuuri
+ ***********************************************************************/
+
++#include "srv0srv.h"
+ #include "sync0rw.h"
+ #include "buf0buf.h"
+
+@@ -48,6 +49,12 @@
+ ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+ block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
++
++ if (srv_pass_corrupt_table && !block) {
++ return(0);
++ }
++ ut_a(block);
++
+ ptr = buf_block_get_frame(block) + addr.boffset;
+
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+diff -ruN a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
+--- a/storage/innobase/include/page0page.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0page.h 2010-12-04 15:38:18.175514037 +0900
+@@ -500,7 +500,7 @@
+ page_is_leaf(
+ /*=========*/
+ const page_t* page) /*!< in: page */
+- __attribute__((nonnull, pure));
++ __attribute__((pure));
+ /************************************************************//**
+ Gets the pointer to the next record on the page.
+ @return pointer to next record */
+diff -ruN a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
+--- a/storage/innobase/include/page0page.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0page.ic 2010-12-04 15:38:18.177482672 +0900
+@@ -274,6 +274,9 @@
+ /*=========*/
+ const page_t* page) /*!< in: page */
+ {
++ if (!page) {
++ return(FALSE);
++ }
+ return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
+ }
+
+diff -ruN a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
+--- a/storage/innobase/include/page0zip.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0zip.h 2010-12-04 15:38:18.179513974 +0900
+@@ -114,7 +114,7 @@
+ const page_t* page, /*!< in: uncompressed page */
+ dict_index_t* index, /*!< in: index of the B-tree node */
+ mtr_t* mtr) /*!< in: mini-transaction, or NULL */
+- __attribute__((nonnull(1,2,3)));
++ __attribute__((nonnull(1,3)));
+
+ /**********************************************************************//**
+ Decompress a page. This function should tolerate errors on the compressed
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-04 15:37:50.591516341 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:38:18.180563749 +0900
+@@ -242,6 +242,7 @@
+ extern ulint srv_adaptive_flushing_method;
+
+ extern ulint srv_expand_import;
++extern ulint srv_pass_corrupt_table;
+
+ extern ulint srv_extra_rsegments;
+ extern ulint srv_dict_size_limit;
+diff -ruN a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
+--- a/storage/innobase/page/page0zip.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/page/page0zip.c 2010-12-04 15:38:18.195515935 +0900
+@@ -1153,6 +1153,10 @@
+ FILE* logfile = NULL;
+ #endif
+
++ if (!page) {
++ return(FALSE);
++ }
++
+ ut_a(page_is_comp(page));
+ ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(page_simple_validate_new((page_t*) page));
+diff -ruN a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
+--- a/storage/innobase/row/row0ins.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0ins.c 2010-12-04 15:38:18.198514028 +0900
+@@ -1335,6 +1335,12 @@
+ const rec_t* rec = btr_pcur_get_rec(&pcur);
+ const buf_block_t* block = btr_pcur_get_block(&pcur);
+
++ if (srv_pass_corrupt_table && !block) {
++ err = DB_CORRUPTION;
++ break;
++ }
++ ut_a(block);
++
+ if (page_rec_is_infimum(rec)) {
+
+ continue;
+diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
+--- a/storage/innobase/row/row0merge.c 2010-12-03 17:30:16.330986655 +0900
++++ b/storage/innobase/row/row0merge.c 2010-12-04 15:38:18.201513966 +0900
+@@ -1245,6 +1245,13 @@
+
+ if (UNIV_LIKELY(has_next)) {
+ rec = btr_pcur_get_rec(&pcur);
++
++ if (srv_pass_corrupt_table && !rec) {
++ err = DB_CORRUPTION;
++ goto err_exit;
++ }
++ ut_a(rec);
++
+ offsets = rec_get_offsets(rec, clust_index, NULL,
+ ULINT_UNDEFINED, &row_heap);
+
+diff -ruN a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
+--- a/storage/innobase/row/row0sel.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0sel.c 2010-12-04 15:38:18.205551115 +0900
+@@ -3848,6 +3848,13 @@
+ /* PHASE 4: Look for matching records in a loop */
+
+ rec = btr_pcur_get_rec(pcur);
++
++ if (srv_pass_corrupt_table && !rec) {
++ err = DB_CORRUPTION;
++ goto lock_wait_or_error;
++ }
++ ut_a(rec);
++
+ ut_ad(!!page_rec_is_comp(rec) == comp);
+ #ifdef UNIV_SEARCH_DEBUG
+ /*
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:37:50.602481253 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:38:18.209513823 +0900
+@@ -428,6 +428,7 @@
+ UNIV_INTERN ulint srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
+
+ UNIV_INTERN ulint srv_expand_import = 0; /* 0:disable 1:enable */
++UNIV_INTERN ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
+
+ UNIV_INTERN ulint srv_extra_rsegments = 127; /* extra rseg for users */
+ UNIV_INTERN ulint srv_dict_size_limit = 0;
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:37:50.605491300 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:38:18.212513722 +0900
+@@ -2020,6 +2020,13 @@
+
+ os_fast_mutex_free(&srv_os_test_mutex);
+
++ if (!srv_file_per_table_original_value
++ && srv_pass_corrupt_table) {
++ fprintf(stderr, "InnoDB: Warning:"
++ " innodb_file_per_table is diabled."
++ " So innodb_pass_corrupt_table doesn't make sence\n");
++ }
++
+ if (srv_print_verbose_log) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
--- /dev/null
+# name : innodb_recovery_patches.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:49:59.187028943 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 17:30:41.579956150 +0900
+@@ -122,6 +122,46 @@
+ bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
+ tablespace_version, offset);
+ if (bpage == NULL) {
++ /* bugfix: http://bugs.mysql.com/bug.php?id=43948 */
++ if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) {
++ /* hashed log recs must be treated here */
++ recv_addr_t* recv_addr;
++
++ mutex_enter(&(recv_sys->mutex));
++
++ if (recv_sys->apply_log_recs == FALSE) {
++ mutex_exit(&(recv_sys->mutex));
++ goto not_to_recover;
++ }
++
++ /* recv_get_fil_addr_struct() */
++ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
++ hash_calc_hash(ut_fold_ulint_pair(space, offset),
++ recv_sys->addr_hash));
++ while (recv_addr) {
++ if ((recv_addr->space == space)
++ && (recv_addr->page_no == offset)) {
++ break;
++ }
++ recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
++ }
++
++ if ((recv_addr == NULL)
++ || (recv_addr->state == RECV_BEING_PROCESSED)
++ || (recv_addr->state == RECV_PROCESSED)) {
++ mutex_exit(&(recv_sys->mutex));
++ goto not_to_recover;
++ }
++
++ fprintf(stderr, " (cannot find space: %lu)", space);
++ recv_addr->state = RECV_PROCESSED;
++
++ ut_a(recv_sys->n_addrs);
++ recv_sys->n_addrs--;
++
++ mutex_exit(&(recv_sys->mutex));
++ }
++not_to_recover:
+
+ return(0);
+ }
+@@ -613,6 +653,50 @@
+ /* It is a single table tablespace and the .ibd file is
+ missing: do nothing */
+
++ /* the log records should be treated here same reason
++ for http://bugs.mysql.com/bug.php?id=43948 */
++
++ if (recv_recovery_is_on()) {
++ recv_addr_t* recv_addr;
++
++ mutex_enter(&(recv_sys->mutex));
++
++ if (recv_sys->apply_log_recs == FALSE) {
++ mutex_exit(&(recv_sys->mutex));
++ goto not_to_recover;
++ }
++
++ for (i = 0; i < n_stored; i++) {
++ /* recv_get_fil_addr_struct() */
++ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
++ hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]),
++ recv_sys->addr_hash));
++ while (recv_addr) {
++ if ((recv_addr->space == space)
++ && (recv_addr->page_no == page_nos[i])) {
++ break;
++ }
++ recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
++ }
++
++ if ((recv_addr == NULL)
++ || (recv_addr->state == RECV_BEING_PROCESSED)
++ || (recv_addr->state == RECV_PROCESSED)) {
++ continue;
++ }
++
++ recv_addr->state = RECV_PROCESSED;
++
++ ut_a(recv_sys->n_addrs);
++ recv_sys->n_addrs--;
++ }
++
++ mutex_exit(&(recv_sys->mutex));
++
++ fprintf(stderr, " (cannot find space: %lu)", space);
++ }
++not_to_recover:
++
+ return;
+ }
+
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:30:16.261955714 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:30:41.584971130 +0900
+@@ -182,6 +182,7 @@
+ #endif /* UNIV_LOG_ARCHIVE */
+ static my_bool innobase_use_doublewrite = TRUE;
+ static my_bool innobase_use_checksums = TRUE;
++static my_bool innobase_recovery_stats = TRUE;
+ static my_bool innobase_locks_unsafe_for_binlog = FALSE;
+ static my_bool innobase_overwrite_relay_log_info = FALSE;
+ static my_bool innobase_rollback_on_timeout = FALSE;
+@@ -2529,6 +2530,8 @@
+
+ srv_force_recovery = (ulint) innobase_force_recovery;
+
++ srv_recovery_stats = (ibool) innobase_recovery_stats;
++
+ srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+ srv_use_checksums = (ibool) innobase_use_checksums;
+
+@@ -11170,6 +11173,11 @@
+ "The common part for InnoDB table spaces.",
+ NULL, NULL, NULL);
+
++static MYSQL_SYSVAR_BOOL(recovery_stats, innobase_recovery_stats,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "Output statistics of recovery process after it.",
++ NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(overwrite_relay_log_info, innobase_overwrite_relay_log_info,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "During InnoDB crash recovery on slave overwrite relay-log.info "
+@@ -11658,6 +11666,7 @@
+ MYSQL_SYSVAR(data_file_path),
+ MYSQL_SYSVAR(data_home_dir),
+ MYSQL_SYSVAR(doublewrite),
++ MYSQL_SYSVAR(recovery_stats),
+ MYSQL_SYSVAR(fast_shutdown),
+ MYSQL_SYSVAR(file_io_threads),
+ MYSQL_SYSVAR(read_io_threads),
+diff -ruN a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
+--- a/storage/innobase/include/log0recv.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/log0recv.h 2010-12-03 17:30:41.592958318 +0900
+@@ -438,6 +438,39 @@
+ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
+ ulint n_addrs;/*!< number of not processed hashed file
+ addresses in the hash table */
++
++/* If you modified the following defines at original file,
++ You should also modify them. */
++/* defined in os0file.c */
++#define OS_AIO_MERGE_N_CONSECUTIVE 64
++/* defined in log0recv.c */
++#define RECV_READ_AHEAD_AREA 32
++ time_t stats_recv_start_time;
++ ulint stats_recv_turns;
++
++ ulint stats_read_requested_pages;
++ ulint stats_read_in_area[RECV_READ_AHEAD_AREA];
++
++ ulint stats_read_io_pages;
++ ulint stats_read_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
++ ulint stats_write_io_pages;
++ ulint stats_write_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE];
++
++ ulint stats_doublewrite_check_pages;
++ ulint stats_doublewrite_overwrite_pages;
++
++ ulint stats_recover_pages_with_read;
++ ulint stats_recover_pages_without_read;
++
++ ulint stats_log_recs;
++ ulint stats_log_len_sum;
++
++ ulint stats_applied_log_recs;
++ ulint stats_applied_log_len_sum;
++ ulint stats_pages_already_new;
++
++ ib_uint64_t stats_oldest_modified_lsn;
++ ib_uint64_t stats_newest_modified_lsn;
+ };
+
+ /** The recovery system */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 17:30:16.321953515 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 17:30:41.593985184 +0900
+@@ -129,6 +129,8 @@
+ extern ulint* srv_data_file_sizes;
+ extern ulint* srv_data_file_is_raw_partition;
+
++extern ibool srv_recovery_stats;
++
+ extern ibool srv_auto_extend_last_data_file;
+ extern ulint srv_last_file_size_max;
+ extern char** srv_log_group_home_dirs;
+diff -ruN a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
+--- a/storage/innobase/log/log0recv.c 2010-12-03 15:18:48.903987466 +0900
++++ b/storage/innobase/log/log0recv.c 2010-12-03 17:30:41.598022536 +0900
+@@ -187,6 +187,9 @@
+
+ recv_sys->heap = NULL;
+ recv_sys->addr_hash = NULL;
++
++ recv_sys->stats_recv_start_time = time(NULL);
++ recv_sys->stats_oldest_modified_lsn = IB_ULONGLONG_MAX;
+ }
+
+ /********************************************************//**
+@@ -327,6 +330,11 @@
+ recv_n_pool_free_frames = 512;
+ }
+
++ if (buf_pool_get_curr_size() >= (32 * 1024 * 1024)) {
++ /* Buffer pool of size greater than 32 MB. */
++ recv_n_pool_free_frames = 1024;
++ }
++
+ recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
+ recv_sys->len = 0;
+ recv_sys->recovered_offset = 0;
+@@ -1363,6 +1371,11 @@
+
+ len = rec_end - body;
+
++ if (srv_recovery_stats) {
++ recv_sys->stats_log_recs++;
++ recv_sys->stats_log_len_sum += len;
++ }
++
+ recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
+ recv->type = type;
+ recv->len = rec_end - body;
+@@ -1474,6 +1487,7 @@
+ ib_uint64_t start_lsn;
+ ib_uint64_t end_lsn;
+ ib_uint64_t page_lsn;
++ ib_uint64_t page_lsn_orig;
+ ib_uint64_t page_newest_lsn;
+ ibool modification_to_page;
+ #ifndef UNIV_HOTBACKUP
+@@ -1496,6 +1510,8 @@
+ buf_block_get_page_no(block));
+
+ if ((recv_addr == NULL)
++ /* bugfix: http://bugs.mysql.com/bug.php?id=44140 */
++ || (recv_addr->state == RECV_BEING_READ && !just_read_in)
+ || (recv_addr->state == RECV_BEING_PROCESSED)
+ || (recv_addr->state == RECV_PROCESSED)) {
+
+@@ -1511,6 +1527,14 @@
+
+ recv_addr->state = RECV_BEING_PROCESSED;
+
++ if (srv_recovery_stats) {
++ if (just_read_in) {
++ recv_sys->stats_recover_pages_with_read++;
++ } else {
++ recv_sys->stats_recover_pages_without_read++;
++ }
++ }
++
+ mutex_exit(&(recv_sys->mutex));
+
+ mtr_start(&mtr);
+@@ -1540,6 +1564,7 @@
+
+ /* Read the newest modification lsn from the page */
+ page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
++ page_lsn_orig = page_lsn;
+
+ #ifndef UNIV_HOTBACKUP
+ /* It may be that the page has been modified in the buffer
+@@ -1559,6 +1584,21 @@
+ modification_to_page = FALSE;
+ start_lsn = end_lsn = 0;
+
++ if (srv_recovery_stats) {
++ mutex_enter(&(recv_sys->mutex));
++ if (page_lsn_orig && recv_sys->stats_oldest_modified_lsn > page_lsn_orig) {
++ recv_sys->stats_oldest_modified_lsn = page_lsn_orig;
++ }
++ if (page_lsn_orig && recv_sys->stats_newest_modified_lsn < page_lsn_orig) {
++ recv_sys->stats_newest_modified_lsn = page_lsn_orig;
++ }
++ if (UT_LIST_GET_LAST(recv_addr->rec_list)->start_lsn
++ < page_lsn_orig) {
++ recv_sys->stats_pages_already_new++;
++ }
++ mutex_exit(&(recv_sys->mutex));
++ }
++
+ recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
+
+ while (recv) {
+@@ -1613,6 +1653,13 @@
+ buf + recv->len,
+ block, &mtr);
+
++ if (srv_recovery_stats) {
++ mutex_enter(&(recv_sys->mutex));
++ recv_sys->stats_applied_log_recs++;
++ recv_sys->stats_applied_log_len_sum += recv->len;
++ mutex_exit(&(recv_sys->mutex));
++ }
++
+ end_lsn = recv->start_lsn + recv->len;
+ mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
+ mach_write_to_8(UNIV_PAGE_SIZE
+@@ -1715,6 +1762,13 @@
+ }
+ }
+
++ if (srv_recovery_stats && n) {
++ mutex_enter(&(recv_sys->mutex));
++ recv_sys->stats_read_requested_pages += n;
++ recv_sys->stats_read_in_area[n - 1]++;
++ mutex_exit(&(recv_sys->mutex));
++ }
++
+ buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
+ /*
+ fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
+@@ -1867,6 +1921,10 @@
+
+ if (has_printed) {
+ fprintf(stderr, "InnoDB: Apply batch completed\n");
++
++ if (srv_recovery_stats) {
++ recv_sys->stats_recv_turns++;
++ }
+ }
+
+ mutex_exit(&(recv_sys->mutex));
+@@ -3270,6 +3328,90 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ if (recv_needed_recovery && srv_recovery_stats) {
++ ulint flush_list_len = 0;
++ ulint i;
++
++ fprintf(stderr,
++ "InnoDB: Applying log records was done. Its statistics are followings.\n");
++
++ fprintf(stderr,
++ "============================================================\n"
++ "-------------------\n"
++ "RECOVERY STATISTICS\n"
++ "-------------------\n");
++ fprintf(stderr,
++ "Recovery time: %g sec. (%lu turns)\n",
++ difftime(time(NULL), recv_sys->stats_recv_start_time),
++ recv_sys->stats_recv_turns);
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++ flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
++ }
++ fprintf(stderr,
++ "\n"
++ "Data page IO statistics\n"
++ " Requested pages: %lu\n"
++ " Read pages: %lu\n"
++ " Written pages: %lu\n"
++ " (Dirty blocks): %lu\n",
++ recv_sys->stats_read_requested_pages,
++ recv_sys->stats_read_io_pages,
++ recv_sys->stats_write_io_pages,
++ flush_list_len);
++
++ fprintf(stderr,
++ " Grouping IO [times]:\n"
++ "\tnumber of pages,\n"
++ "\t\tread request neighbors (in %d pages chunk),\n"
++ "\t\t\tcombined read IO,\n"
++ "\t\t\t\tcombined write IO\n",
++ RECV_READ_AHEAD_AREA);
++ for (i = 0; i < ut_max(RECV_READ_AHEAD_AREA,
++ OS_AIO_MERGE_N_CONSECUTIVE); i++) {
++ fprintf(stderr,
++ "\t%3lu,\t%lu,\t%lu,\t%lu\n", i + 1,
++ (i < RECV_READ_AHEAD_AREA) ?
++ recv_sys->stats_read_in_area[i] : 0,
++ (i < OS_AIO_MERGE_N_CONSECUTIVE) ?
++ recv_sys->stats_read_io_consecutive[i] : 0,
++ (i < OS_AIO_MERGE_N_CONSECUTIVE) ?
++ recv_sys->stats_write_io_consecutive[i] : 0);
++ }
++
++ fprintf(stderr,
++ "\n"
++ "Recovery process statistics\n"
++ " Checked pages by doublewrite buffer: %lu\n"
++ " Overwritten pages from doublewrite: %lu\n"
++ " Recovered pages by io_thread: %lu\n"
++ " Recovered pages by main thread: %lu\n"
++ " Parsed log records to apply: %lu\n"
++ " Sum of the length: %lu\n"
++ " Applied log records: %lu\n"
++ " Sum of the length: %lu\n"
++ " Pages which are already new enough: %lu (It may not be accurate, if turns > 1)\n"
++ " Oldest page's LSN: %llu\n"
++ " Newest page's LSN: %llu\n",
++ recv_sys->stats_doublewrite_check_pages,
++ recv_sys->stats_doublewrite_overwrite_pages,
++ recv_sys->stats_recover_pages_with_read,
++ recv_sys->stats_recover_pages_without_read,
++ recv_sys->stats_log_recs,
++ recv_sys->stats_log_len_sum,
++ recv_sys->stats_applied_log_recs,
++ recv_sys->stats_applied_log_len_sum,
++ recv_sys->stats_pages_already_new,
++ recv_sys->stats_oldest_modified_lsn,
++ recv_sys->stats_newest_modified_lsn);
++
++ fprintf(stderr,
++ "============================================================\n");
++ }
++
+ if (recv_needed_recovery) {
+ trx_sys_print_mysql_master_log_pos();
+ trx_sys_print_mysql_binlog_offset();
+diff -ruN a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
+--- a/storage/innobase/os/os0file.c 2010-12-03 15:18:48.908955759 +0900
++++ b/storage/innobase/os/os0file.c 2010-12-03 17:30:41.602022989 +0900
+@@ -43,6 +43,7 @@
+ #include "srv0start.h"
+ #include "fil0fil.h"
+ #include "buf0buf.h"
++#include "log0recv.h"
+ #ifndef UNIV_HOTBACKUP
+ # include "os0sync.h"
+ # include "os0thread.h"
+@@ -4237,6 +4238,18 @@
+ os_thread_exit(NULL);
+ }
+
++ if (srv_recovery_stats && recv_recovery_is_on() && n_consecutive) {
++ mutex_enter(&(recv_sys->mutex));
++ if (slot->type == OS_FILE_READ) {
++ recv_sys->stats_read_io_pages += n_consecutive;
++ recv_sys->stats_read_io_consecutive[n_consecutive - 1]++;
++ } else if (slot->type == OS_FILE_WRITE) {
++ recv_sys->stats_write_io_pages += n_consecutive;
++ recv_sys->stats_write_io_consecutive[n_consecutive - 1]++;
++ }
++ mutex_exit(&(recv_sys->mutex));
++ }
++
+ os_mutex_enter(array->mutex);
+
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 17:30:16.339955597 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 17:30:41.604958138 +0900
+@@ -165,6 +165,8 @@
+ /* size in database pages */
+ UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+
++UNIV_INTERN ibool srv_recovery_stats = FALSE;
++
+ /* if TRUE, then we auto-extend the last data file */
+ UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
+ /* if != 0, this tells the max size auto-extending may increase the
+diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
+--- a/storage/innobase/trx/trx0sys.c 2010-12-03 15:41:52.051986524 +0900
++++ b/storage/innobase/trx/trx0sys.c 2010-12-03 17:30:41.607026818 +0900
+@@ -566,6 +566,12 @@
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ read_buf, NULL);
+
++ if (srv_recovery_stats && recv_recovery_is_on()) {
++ mutex_enter(&(recv_sys->mutex));
++ recv_sys->stats_doublewrite_check_pages++;
++ mutex_exit(&(recv_sys->mutex));
++ }
++
+ /* Check if the page is corrupt */
+
+ if (UNIV_UNLIKELY
+@@ -613,6 +619,13 @@
+ zip_size, page_no, 0,
+ zip_size ? zip_size : UNIV_PAGE_SIZE,
+ page, NULL);
++
++ if (srv_recovery_stats && recv_recovery_is_on()) {
++ mutex_enter(&(recv_sys->mutex));
++ recv_sys->stats_doublewrite_overwrite_pages++;
++ mutex_exit(&(recv_sys->mutex));
++ }
++
+ fprintf(stderr,
+ "InnoDB: Recovered the page from"
+ " the doublewrite buffer.\n");
--- /dev/null
+# name : innodb_separate_doublewrite.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 17:49:11.574962867 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-04 15:35:58.624514033 +0900
+@@ -4247,7 +4247,8 @@
+ read_space_id = mach_read_from_4(
+ frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+- if (bpage->space == TRX_SYS_SPACE
++ if ((bpage->space == TRX_SYS_SPACE
++ || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
+ && trx_doublewrite_page_inside(bpage->offset)) {
+
+ ut_print_timestamp(stderr);
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:49:59.179956111 +0900
++++ b/storage/innobase/buf/buf0flu.c 2010-12-04 15:35:58.624514033 +0900
+@@ -763,7 +763,8 @@
+ write_buf = trx_doublewrite->write_buf;
+ i = 0;
+
+- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
++ fil_io(OS_FILE_WRITE, TRUE,
++ (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
+ trx_doublewrite->block1, 0, len,
+ (void*) write_buf, NULL);
+
+@@ -800,7 +801,8 @@
+ + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+ ut_ad(i == TRX_SYS_DOUBLEWRITE_BLOCK_SIZE);
+
+- fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
++ fil_io(OS_FILE_WRITE, TRUE,
++ (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE), 0,
+ trx_doublewrite->block2, 0, len,
+ (void*) write_buf, NULL);
+
+@@ -830,7 +832,7 @@
+ flush:
+ /* Now flush the doublewrite buffer data to disk */
+
+- fil_flush(TRX_SYS_SPACE);
++ fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
+
+ /* We know that the writes have been flushed to disk now
+ and in recovery we will find them in the doublewrite buffer
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-04 15:35:29.138514157 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-04 15:35:58.626486771 +0900
+@@ -88,7 +88,9 @@
+ wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+ mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
+
+- if (trx_doublewrite && space == TRX_SYS_SPACE
++ if (trx_doublewrite
++ && (space == TRX_SYS_SPACE
++ || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
+ && ( (offset >= trx_doublewrite->block1
+ && offset < trx_doublewrite->block1
+ + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
+--- a/storage/innobase/dict/dict0load.c 2010-12-03 17:30:16.252956569 +0900
++++ b/storage/innobase/dict/dict0load.c 2010-12-04 15:35:58.627482825 +0900
+@@ -781,7 +781,7 @@
+
+ mtr_commit(&mtr);
+
+- if (space_id == 0) {
++ if (trx_sys_sys_space(space_id)) {
+ /* The system tablespace always exists. */
+ } else if (in_crash_recovery) {
+ /* Check that the tablespace (the .ibd file) really
+@@ -1578,7 +1578,7 @@
+ space = mach_read_from_4(field);
+
+ /* Check if the tablespace exists and has the right name */
+- if (space != 0) {
++ if (!trx_sys_sys_space(space)) {
+ flags = dict_sys_tables_get_flags(rec);
+
+ if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
+@@ -1728,7 +1728,7 @@
+ goto err_exit;
+ }
+
+- if (table->space == 0) {
++ if (trx_sys_sys_space(table->space)) {
+ /* The system tablespace is always available. */
+ } else if (!fil_space_for_table_exists_in_mem(
+ table->space, name,
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-12-04 15:35:29.143813775 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-04 15:35:58.628498870 +0900
+@@ -627,7 +627,7 @@
+
+ UT_LIST_ADD_LAST(chain, space->chain, node);
+
+- if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
++ if (id < SRV_EXTRA_SYS_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
+
+ fil_system->max_assigned_id = id;
+ }
+@@ -691,14 +691,14 @@
+ size_bytes = (((ib_int64_t)size_high) << 32)
+ + (ib_int64_t)size_low;
+ #ifdef UNIV_HOTBACKUP
+- if (space->id == 0) {
++ if (trx_sys_sys_space(space->id)) {
+ node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+ os_file_close(node->handle);
+ goto add_size;
+ }
+ #endif /* UNIV_HOTBACKUP */
+ ut_a(space->purpose != FIL_LOG);
+- ut_a(space->id != 0);
++ ut_a(!trx_sys_sys_space(space->id));
+
+ if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
+ fprintf(stderr,
+@@ -744,7 +744,7 @@
+ }
+
+ if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
+- || space_id == 0)) {
++ || trx_sys_sys_space(space_id))) {
+ fprintf(stderr,
+ "InnoDB: Error: tablespace id %lu"
+ " in file %s is not sensible\n",
+@@ -812,7 +812,7 @@
+
+ system->n_open++;
+
+- if (space->purpose == FIL_TABLESPACE && space->id != 0) {
++ if (space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(space->id)) {
+ /* Put the node to the LRU list */
+ UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+ }
+@@ -845,7 +845,7 @@
+ ut_a(system->n_open > 0);
+ system->n_open--;
+
+- if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
++ if (node->space->purpose == FIL_TABLESPACE && !trx_sys_sys_space(node->space->id)) {
+ ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+
+ /* The node is in the LRU list, remove it */
+@@ -931,7 +931,7 @@
+ retry:
+ mutex_enter(&fil_system->mutex);
+
+- if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
++ if (trx_sys_sys_space(space_id) || space_id >= SRV_LOG_SPACE_FIRST_ID) {
+ /* We keep log files and system tablespace files always open;
+ this is important in preventing deadlocks in this module, as
+ a page read completion often performs another read from the
+@@ -1162,7 +1162,7 @@
+ " tablespace memory cache!\n",
+ (ulong) space->id);
+
+- if (id == 0 || purpose != FIL_TABLESPACE) {
++ if (trx_sys_sys_space(id) || purpose != FIL_TABLESPACE) {
+
+ mutex_exit(&fil_system->mutex);
+
+@@ -1224,6 +1224,7 @@
+ space->mark = FALSE;
+
+ if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
++ && UNIV_UNLIKELY(id < SRV_EXTRA_SYS_SPACE_FIRST_ID)
+ && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
+ if (!fil_system->space_id_reuse_warned) {
+ fil_system->space_id_reuse_warned = TRUE;
+@@ -1307,7 +1308,7 @@
+ (ulong) SRV_LOG_SPACE_FIRST_ID);
+ }
+
+- success = (id < SRV_LOG_SPACE_FIRST_ID);
++ success = (id < SRV_EXTRA_SYS_SPACE_FIRST_ID);
+
+ if (success) {
+ *space_id = fil_system->max_assigned_id = id;
+@@ -1570,6 +1571,8 @@
+ UT_LIST_INIT(fil_system->LRU);
+
+ fil_system->max_n_open = max_n_open;
++
++ fil_system->max_assigned_id = TRX_SYS_SPACE_MAX;
+ }
+
+ /*******************************************************************//**
+@@ -1591,7 +1594,7 @@
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+
+ while (space != NULL) {
+- if (space->purpose != FIL_TABLESPACE || space->id == 0) {
++ if (space->purpose != FIL_TABLESPACE || trx_sys_sys_space(space->id)) {
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ while (node != NULL) {
+@@ -1681,6 +1684,10 @@
+ ut_error;
+ }
+
++ if (max_id >= SRV_EXTRA_SYS_SPACE_FIRST_ID) {
++ return;
++ }
++
+ mutex_enter(&fil_system->mutex);
+
+ if (fil_system->max_assigned_id < max_id) {
+@@ -1699,6 +1706,7 @@
+ ulint
+ fil_write_lsn_and_arch_no_to_file(
+ /*==============================*/
++ ulint space_id,
+ ulint sum_of_sizes, /*!< in: combined size of previous files
+ in space, in database pages */
+ ib_uint64_t lsn, /*!< in: lsn to write */
+@@ -1708,14 +1716,16 @@
+ byte* buf1;
+ byte* buf;
+
++ ut_a(trx_sys_sys_space(space_id));
++
+ buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
+ buf = ut_align(buf1, UNIV_PAGE_SIZE);
+
+- fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
++ fil_read(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+
+- fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
++ fil_write(TRUE, space_id, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+
+ mem_free(buf1);
+
+@@ -1751,7 +1761,7 @@
+ always open. */
+
+ if (space->purpose == FIL_TABLESPACE
+- && space->id == 0) {
++ && trx_sys_sys_space(space->id)) {
+ sum_of_sizes = 0;
+
+ node = UT_LIST_GET_FIRST(space->chain);
+@@ -1759,7 +1769,7 @@
+ mutex_exit(&fil_system->mutex);
+
+ err = fil_write_lsn_and_arch_no_to_file(
+- sum_of_sizes, lsn, arch_log_no);
++ space->id, sum_of_sizes, lsn, arch_log_no);
+ if (err != DB_SUCCESS) {
+
+ return(err);
+@@ -3806,7 +3816,7 @@
+ }
+
+ #ifndef UNIV_HOTBACKUP
+- if (space_id == ULINT_UNDEFINED || space_id == 0) {
++ if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
+ fprintf(stderr,
+ "InnoDB: Error: tablespace id %lu in file %s"
+ " is not sensible\n",
+@@ -3815,7 +3825,7 @@
+ goto func_exit;
+ }
+ #else
+- if (space_id == ULINT_UNDEFINED || space_id == 0) {
++ if (space_id == ULINT_UNDEFINED || trx_sys_sys_space(space_id)) {
+ char* new_path;
+
+ fprintf(stderr,
+@@ -4636,7 +4646,7 @@
+ }
+
+ if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
+- && space->id != 0) {
++ && !trx_sys_sys_space(space->id)) {
+ /* The node is in the LRU list, remove it */
+
+ ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+@@ -4682,7 +4692,7 @@
+ }
+
+ if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
+- && node->space->id != 0) {
++ && !trx_sys_sys_space(node->space->id)) {
+ /* The node must be put back to the LRU list */
+ UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+ }
+@@ -5298,7 +5308,7 @@
+ ut_a(fil_node->n_pending == 0);
+ ut_a(fil_node->open);
+ ut_a(fil_node->space->purpose == FIL_TABLESPACE);
+- ut_a(fil_node->space->id != 0);
++ ut_a(!trx_sys_sys_space(fil_node->space->id));
+
+ fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
+ }
+diff -ruN a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
+--- a/storage/innobase/fsp/fsp0fsp.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/fsp/fsp0fsp.c 2010-12-04 15:35:58.632513243 +0900
+@@ -48,7 +48,7 @@
+ # include "log0log.h"
+ #endif /* UNIV_HOTBACKUP */
+ #include "dict0mem.h"
+-
++#include "trx0sys.h"
+
+ #define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header
+ within a file page */
+@@ -999,10 +999,10 @@
+ flst_init(header + FSP_SEG_INODES_FREE, mtr);
+
+ mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
+- if (space == 0) {
++ if (space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE) {
+ fsp_fill_free_list(FALSE, space, header, mtr);
+ btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
+- 0, 0, DICT_IBUF_ID_MIN + space,
++ space, 0, DICT_IBUF_ID_MIN + space,
+ dict_ind_redundant, mtr);
+ } else {
+ fsp_fill_free_list(TRUE, space, header, mtr);
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:35:29.153514047 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-04 15:35:58.636549909 +0900
+@@ -163,6 +163,7 @@
+ static char* innobase_log_group_home_dir = NULL;
+ static char* innobase_file_format_name = NULL;
+ static char* innobase_change_buffering = NULL;
++static char* innobase_doublewrite_file = NULL;
+
+ /* The highest file format being used in the database. The value can be
+ set by user, however, it will be adjusted to the newer file format if
+@@ -2425,6 +2426,8 @@
+ goto error;
+ }
+
++ srv_doublewrite_file = innobase_doublewrite_file;
++
+ srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table;
+
+ /* -------------- Log files ---------------------------*/
+@@ -11553,6 +11556,11 @@
+ "Path to individual files and their sizes.",
+ NULL, NULL, NULL);
+
++static MYSQL_SYSVAR_STR(doublewrite_file, innobase_doublewrite_file,
++ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
++ "Path to special datafile for doublewrite buffer. (default is "": not used) ### ONLY FOR EXPERTS!!! ###",
++ NULL, NULL, NULL);
++
+ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "The AUTOINC lock modes supported by InnoDB: "
+@@ -11723,6 +11731,7 @@
+ MYSQL_SYSVAR(commit_concurrency),
+ MYSQL_SYSVAR(concurrency_tickets),
+ MYSQL_SYSVAR(data_file_path),
++ MYSQL_SYSVAR(doublewrite_file),
+ MYSQL_SYSVAR(data_home_dir),
+ MYSQL_SYSVAR(doublewrite),
+ MYSQL_SYSVAR(recovery_stats),
+diff -ruN a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
+--- a/storage/innobase/include/mtr0log.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/mtr0log.ic 2010-12-04 15:35:58.644607059 +0900
+@@ -27,8 +27,8 @@
+ #include "ut0lst.h"
+ #include "buf0buf.h"
+ #include "fsp0types.h"
++#include "srv0srv.h"
+ #include "trx0sys.h"
+-
+ /********************************************************//**
+ Opens a buffer to mlog. It must be closed with mlog_close.
+ @return buffer, NULL if log mode MTR_LOG_NONE */
+@@ -201,7 +201,8 @@
+ the doublewrite buffer is located in pages
+ FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
+ system tablespace */
+- if (space == TRX_SYS_SPACE
++ if ((space == TRX_SYS_SPACE
++ || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE))
+ && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
+ if (trx_doublewrite_buf_is_being_created) {
+ /* Do nothing: we only come to this branch in an
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-04 15:35:29.177480351 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-04 15:35:58.646556250 +0900
+@@ -132,6 +132,8 @@
+ extern ulint* srv_data_file_sizes;
+ extern ulint* srv_data_file_is_raw_partition;
+
++extern char* srv_doublewrite_file;
++
+ extern ibool srv_recovery_stats;
+
+ extern ibool srv_auto_extend_last_data_file;
+diff -ruN a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
+--- a/storage/innobase/include/srv0start.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/srv0start.h 2010-12-08 17:15:07.602605797 +0900
+@@ -127,4 +127,7 @@
+ /** Log 'spaces' have id's >= this */
+ #define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
+
++/** reserved for extra system tables */
++#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL
++
+ #endif
+diff -ruN a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
+--- a/storage/innobase/include/trx0sys.h 2010-12-03 15:41:52.047049291 +0900
++++ b/storage/innobase/include/trx0sys.h 2010-12-04 15:35:58.647551222 +0900
+@@ -124,6 +124,22 @@
+ /*=============*/
+ ulint space, /*!< in: space */
+ ulint page_no);/*!< in: page number */
++/***************************************************************//**
++Checks if a space is the system tablespaces.
++@return TRUE if system tablespace */
++UNIV_INLINE
++ibool
++trx_sys_sys_space(
++/*==============*/
++ ulint space); /*!< in: space */
++/***************************************************************//**
++Checks if a space is the doublewrite tablespace.
++@return TRUE if doublewrite tablespace */
++UNIV_INLINE
++ibool
++trx_sys_doublewrite_space(
++/*======================*/
++ ulint space); /*!< in: space */
+ /*****************************************************************//**
+ Creates and initializes the central memory structures for the transaction
+ system. This is called when the database is started. */
+@@ -137,6 +153,13 @@
+ void
+ trx_sys_create(void);
+ /*================*/
++/*****************************************************************//**
++Creates and initializes the dummy transaction system page for tablespace. */
++UNIV_INTERN
++void
++trx_sys_dummy_create(
++/*=================*/
++ ulint space);
+ /****************************************************************//**
+ Looks for a free slot for a rollback segment in the trx system file copy.
+ @return slot index or ULINT_UNDEFINED if not found */
+@@ -448,6 +471,8 @@
+
+ /* Space id and page no where the trx system file copy resides */
+ #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
++#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */
++#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */
+ #include "fsp0fsp.h"
+ #define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
+
+diff -ruN a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
+--- a/storage/innobase/include/trx0sys.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/trx0sys.ic 2010-12-04 15:35:58.649473284 +0900
+@@ -71,6 +71,40 @@
+ }
+
+ /***************************************************************//**
++Checks if a space is the system tablespaces.
++@return TRUE if system tablespace */
++UNIV_INLINE
++ibool
++trx_sys_sys_space(
++/*==============*/
++ ulint space) /*!< in: space */
++{
++ if (srv_doublewrite_file) {
++ /* several spaces are reserved */
++ return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE));
++ } else {
++ return((ibool)(space == TRX_SYS_SPACE));
++ }
++}
++
++/***************************************************************//**
++Checks if a space is the doublewrite tablespace.
++@return TRUE if doublewrite tablespace */
++UNIV_INLINE
++ibool
++trx_sys_doublewrite_space(
++/*======================*/
++ ulint space) /*!< in: space */
++{
++ if (srv_doublewrite_file) {
++ /* doublewrite buffer is separated */
++ return((ibool)(space == TRX_DOUBLEWRITE_SPACE));
++ } else {
++ return((ibool)(space == TRX_SYS_SPACE));
++ }
++}
++
++/***************************************************************//**
+ Gets the pointer in the nth slot of the rseg array.
+ @return pointer to rseg object, NULL if slot not in use */
+ UNIV_INLINE
+diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
+--- a/storage/innobase/row/row0mysql.c 2010-12-03 17:30:16.334989510 +0900
++++ b/storage/innobase/row/row0mysql.c 2010-12-04 15:35:58.652496484 +0900
+@@ -3423,7 +3423,7 @@
+ /* Do not drop possible .ibd tablespace if something went
+ wrong: we do not want to delete valuable data of the user */
+
+- if (err == DB_SUCCESS && space_id > 0) {
++ if (err == DB_SUCCESS && !trx_sys_sys_space(space_id)) {
+ if (!fil_space_for_table_exists_in_mem(space_id,
+ name_or_path,
+ is_temp, FALSE,
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-04 15:35:29.180483212 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-04 15:35:58.656550107 +0900
+@@ -168,6 +168,8 @@
+ /* size in database pages */
+ UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+
++UNIV_INTERN char* srv_doublewrite_file = NULL;
++
+ UNIV_INTERN ibool srv_recovery_stats = FALSE;
+
+ /* if TRUE, then we auto-extend the last data file */
+diff -ruN a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
+--- a/storage/innobase/srv/srv0start.c 2010-12-04 15:35:29.183481330 +0900
++++ b/storage/innobase/srv/srv0start.c 2010-12-04 15:35:58.661550545 +0900
+@@ -715,6 +715,7 @@
+ /*======================*/
+ ibool* create_new_db, /*!< out: TRUE if new database should be
+ created */
++ ibool* create_new_doublewrite_file,
+ #ifdef UNIV_LOG_ARCHIVE
+ ulint* min_arch_log_no,/*!< out: min of archived log
+ numbers in data files */
+@@ -747,6 +748,7 @@
+ *sum_of_new_sizes = 0;
+
+ *create_new_db = FALSE;
++ *create_new_doublewrite_file = FALSE;
+
+ srv_normalize_path_for_win(srv_data_home);
+
+@@ -984,6 +986,142 @@
+ srv_data_file_is_raw_partition[i] != 0);
+ }
+
++ /* special file for doublewrite buffer */
++ if (srv_doublewrite_file)
++ {
++ srv_normalize_path_for_win(srv_doublewrite_file);
++
++ fprintf(stderr,
++ "InnoDB: Notice: innodb_doublewrite_file is specified.\n"
++ "InnoDB: This is for expert only. Don't use if you don't understand what is it 'WELL'.\n"
++ "InnoDB: ### Don't specify older file than the last checkpoint ###\n"
++ "InnoDB: otherwise the older doublewrite buffer will break your data during recovery!\n");
++
++ strcpy(name, srv_doublewrite_file);
++
++ /* First we try to create the file: if it already
++ exists, ret will get value FALSE */
++
++ files[i] = os_file_create(innodb_file_data_key, name, OS_FILE_CREATE,
++ OS_FILE_NORMAL,
++ OS_DATA_FILE, &ret);
++
++ if (ret == FALSE && os_file_get_last_error(FALSE)
++ != OS_FILE_ALREADY_EXISTS
++#ifdef UNIV_AIX
++ /* AIX 5.1 after security patch ML7 may have
++ errno set to 0 here, which causes our function
++ to return 100; work around that AIX problem */
++ && os_file_get_last_error(FALSE) != 100
++#endif
++ ) {
++ fprintf(stderr,
++ "InnoDB: Error in creating"
++ " or opening %s\n",
++ name);
++
++ return(DB_ERROR);
++ }
++
++ if (ret == FALSE) {
++ /* We open the data file */
++
++ files[i] = os_file_create(innodb_file_data_key,
++ name, OS_FILE_OPEN, OS_FILE_NORMAL,
++ OS_DATA_FILE, &ret);
++
++ if (!ret) {
++ fprintf(stderr,
++ "InnoDB: Error in opening %s\n", name);
++ os_file_get_last_error(TRUE);
++
++ return(DB_ERROR);
++ }
++
++ ret = os_file_get_size(files[i], &size, &size_high);
++ ut_a(ret);
++ /* Round size downward to megabytes */
++
++ rounded_size_pages
++ = (size / (1024 * 1024) + 4096 * size_high)
++ << (20 - UNIV_PAGE_SIZE_SHIFT);
++
++ if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) {
++
++ fprintf(stderr,
++ "InnoDB: Warning: doublewrite buffer file %s"
++ " is of a different size\n"
++ "InnoDB: %lu pages"
++ " (rounded down to MB)\n"
++ "InnoDB: than intended size"
++ " %lu pages...\n",
++ name,
++ (ulong) rounded_size_pages,
++ (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9);
++ }
++
++ fil_read_flushed_lsn_and_arch_log_no(
++ files[i], one_opened,
++#ifdef UNIV_LOG_ARCHIVE
++ min_arch_log_no, max_arch_log_no,
++#endif /* UNIV_LOG_ARCHIVE */
++ min_flushed_lsn, max_flushed_lsn);
++ one_opened = TRUE;
++ } else {
++ /* We created the data file and now write it full of
++ zeros */
++
++ *create_new_doublewrite_file = TRUE;
++
++ ut_print_timestamp(stderr);
++ fprintf(stderr,
++ " InnoDB: Doublewrite buffer file %s did not"
++ " exist: new to be created\n",
++ name);
++
++ if (*create_new_db == FALSE) {
++ fprintf(stderr,
++ "InnoDB: Warning: Previous version's ibdata files may cause crash.\n"
++ " If you use that, please use the ibdata files of this version.\n");
++ }
++
++ ut_print_timestamp(stderr);
++ fprintf(stderr,
++ " InnoDB: Setting file %s size to %lu MB\n",
++ name,
++ (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)
++ >> (20 - UNIV_PAGE_SIZE_SHIFT)));
++
++ fprintf(stderr,
++ "InnoDB: Database physically writes the"
++ " file full: wait...\n");
++
++ ret = os_file_set_size(
++ name, files[i],
++ srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9),
++ srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9));
++
++ if (!ret) {
++ fprintf(stderr,
++ "InnoDB: Error in creating %s:"
++ " probably out of disk space\n", name);
++
++ return(DB_ERROR);
++ }
++ }
++
++ ret = os_file_close(files[i]);
++ ut_a(ret);
++
++ fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE);
++
++ ut_a(fil_validate());
++
++ fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE);
++
++ i++;
++ }
++
+ return(DB_SUCCESS);
+ }
+
+@@ -997,6 +1135,7 @@
+ /*====================================*/
+ {
+ ibool create_new_db;
++ ibool create_new_doublewrite_file;
+ ibool log_file_created;
+ ibool log_created = FALSE;
+ ibool log_opened = FALSE;
+@@ -1416,6 +1555,7 @@
+ }
+
+ err = open_or_create_data_files(&create_new_db,
++ &create_new_doublewrite_file,
+ #ifdef UNIV_LOG_ARCHIVE
+ &min_arch_log_no, &max_arch_log_no,
+ #endif /* UNIV_LOG_ARCHIVE */
+@@ -1545,6 +1685,14 @@
+ after the double write buffer has been created. */
+ trx_sys_create();
+
++ if (create_new_doublewrite_file) {
++ mtr_start(&mtr);
++ fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
++ mtr_commit(&mtr);
++
++ trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
++ }
++
+ dict_create();
+
+ srv_startup_is_before_trx_rollback_phase = FALSE;
+@@ -1577,6 +1725,13 @@
+ recv_recovery_from_archive_finish();
+ #endif /* UNIV_LOG_ARCHIVE */
+ } else {
++ char* save_srv_doublewrite_file = NULL;
++
++ if (create_new_doublewrite_file) {
++ /* doublewrite_file cannot be used for recovery yet. */
++ save_srv_doublewrite_file = srv_doublewrite_file;
++ srv_doublewrite_file = NULL;
++ }
+
+ /* Check if we support the max format that is stamped
+ on the system tablespace.
+@@ -1663,6 +1818,17 @@
+ we have finished the recovery process so that the
+ image of TRX_SYS_PAGE_NO is not stale. */
+ trx_sys_file_format_tag_init();
++
++ if (create_new_doublewrite_file) {
++ /* restore the value */
++ srv_doublewrite_file = save_srv_doublewrite_file;
++
++ mtr_start(&mtr);
++ fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
++ mtr_commit(&mtr);
++
++ trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
++ }
+ }
+
+ if (!create_new_db && sum_of_new_sizes > 0) {
+diff -ruN a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
+--- a/storage/innobase/trx/trx0sys.c 2010-12-03 17:32:15.651024019 +0900
++++ b/storage/innobase/trx/trx0sys.c 2010-12-04 15:35:58.664550291 +0900
+@@ -414,6 +414,152 @@
+
+ goto start_again;
+ }
++
++ if (srv_doublewrite_file) {
++ /* the same doublewrite buffer to TRX_SYS_SPACE should exist.
++ check and create if not exist.*/
++
++ mtr_start(&mtr);
++ trx_doublewrite_buf_is_being_created = TRUE;
++
++ block = buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, TRX_SYS_PAGE_NO,
++ RW_X_LATCH, &mtr);
++ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
++
++ doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
++
++ if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
++ == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
++ /* The doublewrite buffer has already been created:
++ just read in some numbers */
++
++ mtr_commit(&mtr);
++ } else {
++ fprintf(stderr,
++ "InnoDB: Doublewrite buffer not found in the doublewrite file:"
++ " creating new\n");
++
++ if (buf_pool_get_curr_size()
++ < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
++ + FSP_EXTENT_SIZE / 2 + 100)
++ * UNIV_PAGE_SIZE)) {
++ fprintf(stderr,
++ "InnoDB: Cannot create doublewrite buffer:"
++ " you must\n"
++ "InnoDB: increase your buffer pool size.\n"
++ "InnoDB: Cannot continue operation.\n");
++
++ exit(1);
++ }
++
++ block2 = fseg_create(TRX_DOUBLEWRITE_SPACE, TRX_SYS_PAGE_NO,
++ TRX_SYS_DOUBLEWRITE
++ + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
++
++ /* fseg_create acquires a second latch on the page,
++ therefore we must declare it: */
++
++ buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
++
++ if (block2 == NULL) {
++ fprintf(stderr,
++ "InnoDB: Cannot create doublewrite buffer:"
++ " you must\n"
++ "InnoDB: increase your tablespace size.\n"
++ "InnoDB: Cannot continue operation.\n");
++
++ /* We exit without committing the mtr to prevent
++ its modifications to the database getting to disk */
++
++ exit(1);
++ }
++
++ fseg_header = buf_block_get_frame(block)
++ + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG;
++ prev_page_no = 0;
++
++ for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
++ + FSP_EXTENT_SIZE / 2; i++) {
++ page_no = fseg_alloc_free_page(fseg_header,
++ prev_page_no + 1,
++ FSP_UP, &mtr);
++ if (page_no == FIL_NULL) {
++ fprintf(stderr,
++ "InnoDB: Cannot create doublewrite"
++ " buffer: you must\n"
++ "InnoDB: increase your"
++ " tablespace size.\n"
++ "InnoDB: Cannot continue operation.\n"
++ );
++
++ exit(1);
++ }
++
++ /* We read the allocated pages to the buffer pool;
++ when they are written to disk in a flush, the space
++ id and page number fields are also written to the
++ pages. When we at database startup read pages
++ from the doublewrite buffer, we know that if the
++ space id and page number in them are the same as
++ the page position in the tablespace, then the page
++ has not been written to in doublewrite. */
++
++#ifdef UNIV_SYNC_DEBUG
++ new_block =
++#endif /* UNIV_SYNC_DEBUG */
++ buf_page_get(TRX_DOUBLEWRITE_SPACE, 0, page_no,
++ RW_X_LATCH, &mtr);
++ buf_block_dbg_add_level(new_block,
++ SYNC_NO_ORDER_CHECK);
++
++ if (i == FSP_EXTENT_SIZE / 2) {
++ ut_a(page_no == FSP_EXTENT_SIZE);
++ mlog_write_ulint(doublewrite
++ + TRX_SYS_DOUBLEWRITE_BLOCK1,
++ page_no, MLOG_4BYTES, &mtr);
++ mlog_write_ulint(doublewrite
++ + TRX_SYS_DOUBLEWRITE_REPEAT
++ + TRX_SYS_DOUBLEWRITE_BLOCK1,
++ page_no, MLOG_4BYTES, &mtr);
++ } else if (i == FSP_EXTENT_SIZE / 2
++ + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
++ ut_a(page_no == 2 * FSP_EXTENT_SIZE);
++ mlog_write_ulint(doublewrite
++ + TRX_SYS_DOUBLEWRITE_BLOCK2,
++ page_no, MLOG_4BYTES, &mtr);
++ mlog_write_ulint(doublewrite
++ + TRX_SYS_DOUBLEWRITE_REPEAT
++ + TRX_SYS_DOUBLEWRITE_BLOCK2,
++ page_no, MLOG_4BYTES, &mtr);
++ } else if (i > FSP_EXTENT_SIZE / 2) {
++ ut_a(page_no == prev_page_no + 1);
++ }
++
++ prev_page_no = page_no;
++ }
++
++ mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
++ TRX_SYS_DOUBLEWRITE_MAGIC_N,
++ MLOG_4BYTES, &mtr);
++ mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
++ + TRX_SYS_DOUBLEWRITE_REPEAT,
++ TRX_SYS_DOUBLEWRITE_MAGIC_N,
++ MLOG_4BYTES, &mtr);
++
++ mlog_write_ulint(doublewrite
++ + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
++ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
++ MLOG_4BYTES, &mtr);
++ mtr_commit(&mtr);
++
++ /* Flush the modified pages to disk and make a checkpoint */
++ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
++
++ fprintf(stderr, "InnoDB: Doublewrite buffer created in the doublewrite file\n");
++ trx_sys_multiple_tablespace_format = TRUE;
++ }
++ trx_doublewrite_buf_is_being_created = FALSE;
++ }
+ }
+
+ /****************************************************************//**
+@@ -437,10 +583,19 @@
+ ulint source_page_no;
+ byte* page;
+ byte* doublewrite;
++ ulint doublewrite_space_id;
+ ulint space_id;
+ ulint page_no;
+ ulint i;
+
++ doublewrite_space_id = (srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
++
++ if (srv_doublewrite_file) {
++ fprintf(stderr,
++ "InnoDB: doublewrite file '%s' is used.\n",
++ srv_doublewrite_file);
++ }
++
+ /* We do the file i/o past the buffer pool */
+
+ unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
+@@ -449,7 +604,7 @@
+ /* Read the trx sys header to check if we are using the doublewrite
+ buffer */
+
+- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
++ fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, TRX_SYS_PAGE_NO, 0,
+ UNIV_PAGE_SIZE, read_buf, NULL);
+ doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
+
+@@ -487,10 +642,10 @@
+
+ /* Read the pages from the doublewrite buffer to memory */
+
+- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
++ fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block1, 0,
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+ buf, NULL);
+- fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
++ fil_io(OS_FILE_READ, TRUE, doublewrite_space_id, 0, block2, 0,
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+ buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+ NULL);
+@@ -546,7 +701,8 @@
+ " doublewrite buf.\n",
+ (ulong) space_id, (ulong) page_no, (ulong) i);
+
+- } else if (space_id == TRX_SYS_SPACE
++ } else if ((space_id == TRX_SYS_SPACE
++ || (srv_doublewrite_file && space_id == TRX_DOUBLEWRITE_SPACE))
+ && ((page_no >= block1
+ && page_no
+ < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+@@ -990,6 +1146,83 @@
+ }
+
+ /*****************************************************************//**
++Creates dummy of the file page for the transaction system. */
++static
++void
++trx_sysf_dummy_create(
++/*==================*/
++ ulint space,
++ mtr_t* mtr)
++{
++ buf_block_t* block;
++ page_t* page;
++
++ ut_ad(mtr);
++
++ /* Note that below we first reserve the file space x-latch, and
++ then enter the kernel: we must do it in this order to conform
++ to the latching order rules. */
++
++ mtr_x_lock(fil_space_get_latch(space, NULL), mtr);
++ mutex_enter(&kernel_mutex);
++
++ /* Create the trx sys file block in a new allocated file segment */
++ block = fseg_create(space, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
++ mtr);
++ buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
++
++ fprintf(stderr, "%lu\n", buf_block_get_page_no(block));
++ ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
++
++ page = buf_block_get_frame(block);
++
++ mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
++ MLOG_2BYTES, mtr);
++
++ /* Reset the doublewrite buffer magic number to zero so that we
++ know that the doublewrite buffer has not yet been created (this
++ suppresses a Valgrind warning) */
++
++ mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
++ + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
++
++#ifdef UNDEFINED
++ /* TODO: REMOVE IT: The bellow is not needed, I think */
++ sys_header = trx_sysf_get(mtr);
++
++ /* Start counting transaction ids from number 1 up */
++ mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE,
++ ut_dulint_create(0, 1), mtr);
++
++ /* Reset the rollback segment slots */
++ for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
++
++ trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr);
++ trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
++ }
++
++ /* The remaining area (up to the page trailer) is uninitialized.
++ Silence Valgrind warnings about it. */
++ UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
++ + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
++ + TRX_SYS_RSEG_SPACE),
++ (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
++ - (TRX_SYS_RSEGS
++ + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
++ + TRX_SYS_RSEG_SPACE))
++ + page - sys_header);
++
++ /* Create the first rollback segment in the SYSTEM tablespace */
++ page_no = trx_rseg_header_create(space, 0, ULINT_MAX, &slot_no,
++ mtr);
++ ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
++ ut_a(page_no != FIL_NULL);
++#endif
++
++ mutex_exit(&kernel_mutex);
++}
++
++/*****************************************************************//**
+ Creates and initializes the central memory structures for the transaction
+ system. This is called when the database is started. */
+ UNIV_INTERN
+@@ -1351,6 +1584,26 @@
+ /* Does nothing at the moment */
+ }
+
++/*****************************************************************//**
++Creates and initializes the dummy transaction system page for tablespace. */
++UNIV_INTERN
++void
++trx_sys_dummy_create(
++/*=================*/
++ ulint space)
++{
++ mtr_t mtr;
++
++ /* This function is only for doublewrite file for now */
++ ut_a(space == TRX_DOUBLEWRITE_SPACE);
++
++ mtr_start(&mtr);
++
++ trx_sysf_dummy_create(space, &mtr);
++
++ mtr_commit(&mtr);
++}
++
+ /*********************************************************************
+ Creates the rollback segments */
+ UNIV_INTERN
--- /dev/null
+# name : innodb_show_lock_name.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:34:35.285040381 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:35:12.974975252 +0900
+@@ -9491,8 +9491,8 @@
+ rw_lock_wait_time += mutex->lspent_time;
+ }
+ #else /* UNIV_DEBUG */
+- buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
+- mutex->cfile_name, (ulong) mutex->cline);
++ buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s",
++ mutex->cmutex_name);
+ buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
+ (ulong) mutex->count_os_wait);
+
+@@ -9507,9 +9507,8 @@
+
+ if (block_mutex) {
+ buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+- "combined %s:%lu",
+- block_mutex->cfile_name,
+- (ulong) block_mutex->cline);
++ "combined %s",
++ block_mutex->cmutex_name);
+ buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+ "os_waits=%lu",
+ (ulong) block_mutex_oswait_count);
+@@ -9538,8 +9537,8 @@
+ continue;
+ }
+
+- buf1len = my_snprintf(buf1, sizeof buf1, "%s:%lu",
+- lock->cfile_name, (ulong) lock->cline);
++ buf1len = my_snprintf(buf1, sizeof buf1, "%s",
++ lock->lock_name);
+ buf2len = my_snprintf(buf2, sizeof buf2, "os_waits=%lu",
+ (ulong) lock->count_os_wait);
+
+@@ -9553,9 +9552,8 @@
+
+ if (block_lock) {
+ buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+- "combined %s:%lu",
+- block_lock->cfile_name,
+- (ulong) block_lock->cline);
++ "combined %s",
++ block_lock->lock_name);
+ buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+ "os_waits=%lu",
+ (ulong) block_lock_oswait_count);
+diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
+--- a/storage/innobase/include/sync0rw.h 2010-12-03 15:49:59.225953164 +0900
++++ b/storage/innobase/include/sync0rw.h 2010-12-03 17:35:12.978024458 +0900
+@@ -144,7 +144,7 @@
+ # endif/* UNIV_SYNC_DEBUG */
+ # else /* UNIV_DEBUG */
+ # define rw_lock_create(K, L, level) \
+- rw_lock_create_func((L), __FILE__, __LINE__)
++ rw_lock_create_func((L), #L, NULL, 0)
+ # endif /* UNIV_DEBUG */
+
+ /**************************************************************//**
+@@ -197,7 +197,7 @@
+ # endif/* UNIV_SYNC_DEBUG */
+ # else /* UNIV_DEBUG */
+ # define rw_lock_create(K, L, level) \
+- pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__)
++ pfs_rw_lock_create_func((K), (L), #L, NULL, 0)
+ # endif /* UNIV_DEBUG */
+
+ /******************************************************************
+@@ -255,8 +255,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+- const char* cmutex_name, /*!< in: mutex name */
+ #endif /* UNIV_DEBUG */
++ const char* cmutex_name, /*!< in: mutex name */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline); /*!< in: file line where created */
+ /******************************************************************//**
+@@ -609,7 +609,8 @@
+ struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
+ #endif
+ ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
+- const char* cfile_name;/*!< File name where lock created */
++ //const char* cfile_name;/*!< File name where lock created */
++ const char* lock_name;/*!< lock name */
+ /* last s-lock file/line is not guaranteed to be correct */
+ const char* last_s_file_name;/*!< File name where last s-locked */
+ const char* last_x_file_name;/*!< File name where last x-locked */
+@@ -620,7 +621,7 @@
+ are at the start of this struct, thus we can
+ peek this field without causing much memory
+ bus traffic */
+- unsigned cline:14; /*!< Line where created */
++ //unsigned cline:14; /*!< Line where created */
+ unsigned last_s_line:14; /*!< Line number where last time s-locked */
+ unsigned last_x_line:14; /*!< Line number where last time x-locked */
+ #ifdef UNIV_DEBUG
+@@ -690,8 +691,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+- const char* cmutex_name, /*!< in: mutex name */
+ #endif /* UNIV_DEBUG */
++ const char* cmutex_name, /*!< in: mutex name */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline); /*!< in: file line where created */
+
+diff -ruN a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
+--- a/storage/innobase/include/sync0rw.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0rw.ic 2010-12-03 17:35:12.980024605 +0900
+@@ -640,8 +640,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+- const char* cmutex_name, /*!< in: mutex name */
+ # endif /* UNIV_DEBUG */
++ const char* cmutex_name, /*!< in: mutex name */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline) /*!< in: file line where created */
+ {
+@@ -656,8 +656,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+ level,
+ # endif /* UNIV_SYNC_DEBUG */
+- cmutex_name,
+ # endif /* UNIV_DEBUG */
++ cmutex_name,
+ cfile_name,
+ cline);
+ }
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h 2010-12-03 15:49:59.227955503 +0900
++++ b/storage/innobase/include/sync0sync.h 2010-12-03 17:35:12.982023946 +0900
+@@ -166,7 +166,7 @@
+ # endif/* UNIV_SYNC_DEBUG */
+ # else
+ # define mutex_create(K, M, level) \
+- pfs_mutex_create_func((K), (M), __FILE__, __LINE__)
++ pfs_mutex_create_func((K), (M), #M, NULL, 0)
+ # endif /* UNIV_DEBUG */
+
+ # define mutex_enter(M) \
+@@ -193,7 +193,7 @@
+ # endif /* UNIV_SYNC_DEBUG */
+ # else /* UNIV_DEBUG */
+ # define mutex_create(K, M, level) \
+- mutex_create_func((M), __FILE__, __LINE__)
++ mutex_create_func((M), #M, NULL, 0)
+ # endif /* UNIV_DEBUG */
+
+ # define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
+@@ -217,8 +217,8 @@
+ mutex_create_func(
+ /*==============*/
+ mutex_t* mutex, /*!< in: pointer to memory */
+-#ifdef UNIV_DEBUG
+ const char* cmutex_name, /*!< in: mutex name */
++#ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+@@ -291,8 +291,8 @@
+ /*==================*/
+ PSI_mutex_key key, /*!< in: Performance Schema key */
+ mutex_t* mutex, /*!< in: pointer to memory */
+-# ifdef UNIV_DEBUG
+ const char* cmutex_name, /*!< in: mutex name */
++# ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+@@ -723,9 +723,9 @@
+ ulint line; /*!< Line where the mutex was locked */
+ ulint level; /*!< Level in the global latching order */
+ #endif /* UNIV_SYNC_DEBUG */
++#ifdef UNIV_DEBUG
+ const char* cfile_name;/*!< File name where mutex created */
+ ulint cline; /*!< Line where created */
+-#ifdef UNIV_DEBUG
+ os_thread_id_t thread_id; /*!< The thread id of the thread
+ which locked the mutex. */
+ ulint magic_n; /*!< MUTEX_MAGIC_N */
+@@ -740,9 +740,9 @@
+ ulong count_os_yield; /*!< count of os_wait */
+ ulonglong lspent_time; /*!< mutex os_wait timer msec */
+ ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */
+- const char* cmutex_name; /*!< mutex name */
+ ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
+ #endif /* UNIV_DEBUG */
++ const char* cmutex_name; /*!< mutex name */
+ #ifdef UNIV_PFS_MUTEX
+ struct PSI_mutex* pfs_psi; /*!< The performance schema
+ instrumentation hook */
+diff -ruN a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
+--- a/storage/innobase/include/sync0sync.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0sync.ic 2010-12-03 17:35:12.984024599 +0900
+@@ -320,8 +320,8 @@
+ /*==================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema key */
+ mutex_t* mutex, /*!< in: pointer to memory */
+-# ifdef UNIV_DEBUG
+ const char* cmutex_name, /*!< in: mutex name */
++# ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+@@ -334,8 +334,8 @@
+ : NULL;
+
+ mutex_create_func(mutex,
+-# ifdef UNIV_DEBUG
+ cmutex_name,
++# ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+ level,
+ # endif /* UNIV_SYNC_DEBUG */
+diff -ruN a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
+--- a/storage/innobase/sync/sync0arr.c 2010-12-03 15:09:51.304953409 +0900
++++ b/storage/innobase/sync/sync0arr.c 2010-12-03 17:35:12.985024561 +0900
+@@ -488,12 +488,12 @@
+ mutex = cell->old_wait_mutex;
+
+ fprintf(file,
+- "Mutex at %p created file %s line %lu, lock var %lu\n"
++ "Mutex at %p '%s', lock var %lu\n"
+ #ifdef UNIV_SYNC_DEBUG
+ "Last time reserved in file %s line %lu, "
+ #endif /* UNIV_SYNC_DEBUG */
+ "waiters flag %lu\n",
+- (void*) mutex, mutex->cfile_name, (ulong) mutex->cline,
++ (void*) mutex, mutex->cmutex_name,
+ (ulong) mutex->lock_word,
+ #ifdef UNIV_SYNC_DEBUG
+ mutex->file_name, (ulong) mutex->line,
+@@ -511,9 +511,8 @@
+ rwlock = cell->old_wait_rw_lock;
+
+ fprintf(file,
+- " RW-latch at %p created in file %s line %lu\n",
+- (void*) rwlock, rwlock->cfile_name,
+- (ulong) rwlock->cline);
++ " RW-latch at %p '%s'\n",
++ (void*) rwlock, rwlock->lock_name);
+ writer = rw_lock_get_writer(rwlock);
+ if (writer != RW_LOCK_NOT_LOCKED) {
+ fprintf(file,
+diff -ruN a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
+--- a/storage/innobase/sync/sync0rw.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0rw.c 2010-12-03 17:35:12.987029059 +0900
+@@ -241,8 +241,8 @@
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+- const char* cmutex_name, /*!< in: mutex name */
+ #endif /* UNIV_DEBUG */
++ const char* cmutex_name, /*!< in: mutex name */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline) /*!< in: file line where created */
+ {
+@@ -253,14 +253,15 @@
+ mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock),
+ SYNC_NO_ORDER_CHECK);
+
+- lock->mutex.cfile_name = cfile_name;
+- lock->mutex.cline = cline;
++ ut_d(lock->mutex.cfile_name = cfile_name);
++ ut_d(lock->mutex.cline = cline);
+
+- ut_d(lock->mutex.cmutex_name = cmutex_name);
++ lock->mutex.cmutex_name = cmutex_name;
+ ut_d(lock->mutex.mutex_type = 1);
+ #else /* INNODB_RW_LOCKS_USE_ATOMICS */
+ # ifdef UNIV_DEBUG
+- UT_NOT_USED(cmutex_name);
++ UT_NOT_USED(cfile_name);
++ UT_NOT_USED(cline);
+ # endif
+ #endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+@@ -280,8 +281,7 @@
+
+ ut_d(lock->magic_n = RW_LOCK_MAGIC_N);
+
+- lock->cfile_name = cfile_name;
+- lock->cline = (unsigned int) cline;
++ lock->lock_name = cmutex_name;
+
+ lock->count_os_wait = 0;
+ lock->last_s_file_name = "not yet reserved";
+@@ -401,10 +401,10 @@
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu spin wait rw-s-lock at %p"
+- " cfile %s cline %lu rnds %lu\n",
++ " '%s' rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()),
+ (void*) lock,
+- lock->cfile_name, (ulong) lock->cline, (ulong) i);
++ lock->lock_name, (ulong) i);
+ }
+
+ /* We try once again to obtain the lock */
+@@ -437,10 +437,9 @@
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu OS wait rw-s-lock at %p"
+- " cfile %s cline %lu\n",
++ " '%s'\n",
+ os_thread_pf(os_thread_get_curr_id()),
+- (void*) lock, lock->cfile_name,
+- (ulong) lock->cline);
++ (void*) lock, lock->lock_name);
+ }
+
+ /* these stats may not be accurate */
+@@ -659,9 +658,9 @@
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu spin wait rw-x-lock at %p"
+- " cfile %s cline %lu rnds %lu\n",
++ " '%s' rnds %lu\n",
+ os_thread_pf(os_thread_get_curr_id()), (void*) lock,
+- lock->cfile_name, (ulong) lock->cline, (ulong) i);
++ lock->lock_name, (ulong) i);
+ }
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+@@ -682,9 +681,9 @@
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu OS wait for rw-x-lock at %p"
+- " cfile %s cline %lu\n",
++ " '%s'\n",
+ os_thread_pf(os_thread_get_curr_id()), (void*) lock,
+- lock->cfile_name, (ulong) lock->cline);
++ lock->lock_name);
+ }
+
+ /* these stats may not be accurate */
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c 2010-12-03 15:49:59.233955565 +0900
++++ b/storage/innobase/sync/sync0sync.c 2010-12-03 17:35:12.989024400 +0900
+@@ -249,8 +249,8 @@
+ mutex_create_func(
+ /*==============*/
+ mutex_t* mutex, /*!< in: pointer to memory */
+-#ifdef UNIV_DEBUG
+ const char* cmutex_name, /*!< in: mutex name */
++#ifdef UNIV_DEBUG
+ # ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+ # endif /* UNIV_SYNC_DEBUG */
+@@ -274,11 +274,13 @@
+ mutex->file_name = "not yet reserved";
+ mutex->level = level;
+ #endif /* UNIV_SYNC_DEBUG */
++#ifdef UNIV_DEBUG
+ mutex->cfile_name = cfile_name;
+ mutex->cline = cline;
++#endif /* UNIV_DEBUG */
+ mutex->count_os_wait = 0;
+-#ifdef UNIV_DEBUG
+ mutex->cmutex_name= cmutex_name;
++#ifdef UNIV_DEBUG
+ mutex->count_using= 0;
+ mutex->mutex_type= 0;
+ mutex->lspent_time= 0;
+@@ -532,9 +534,9 @@
+ #ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr,
+ "Thread %lu spin wait mutex at %p"
+- " cfile %s cline %lu rnds %lu\n",
++ " '%s' rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
+- mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
++ mutex->cmutex_name, (ulong) i);
+ #endif
+
+ mutex_spin_round_count += i;
+@@ -609,9 +611,9 @@
+
+ #ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr,
+- "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
++ "Thread %lu OS wait mutex at %p '%s' rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
+- mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
++ mutex->cmutex_name, (ulong) i);
+ #endif
+
+ mutex_os_wait_count++;
+@@ -913,9 +915,8 @@
+
+ if (mutex->magic_n == MUTEX_MAGIC_N) {
+ fprintf(stderr,
+- "Mutex created at %s %lu\n",
+- mutex->cfile_name,
+- (ulong) mutex->cline);
++ "Mutex '%s'\n",
++ mutex->cmutex_name);
+
+ if (mutex_get_lock_word(mutex) != 0) {
+ const char* file_name;
--- /dev/null
+# name : innodb_show_status.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:07:31.786968193 +0900
+@@ -4812,14 +4812,16 @@
+ buf_flush_list_mutex_enter(buf_pool);
+
+ fprintf(file,
+- "Buffer pool size %lu\n"
+- "Free buffers %lu\n"
+- "Database pages %lu\n"
+- "Old database pages %lu\n"
+- "Modified db pages %lu\n"
++ "Buffer pool size %lu\n"
++ "Buffer pool size, bytes %lu\n"
++ "Free buffers %lu\n"
++ "Database pages %lu\n"
++ "Old database pages %lu\n"
++ "Modified db pages %lu\n"
+ "Pending reads %lu\n"
+ "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
+ (ulong) buf_pool->curr_size,
++ (ulong) buf_pool->curr_size * UNIV_PAGE_SIZE,
+ (ulong) UT_LIST_GET_LEN(buf_pool->free),
+ (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
+ (ulong) buf_pool->LRU_old_len,
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-12-03 20:58:26.000000000 +0300
++++ b/storage/innobase/buf/buf0flu.c 2011-01-07 03:37:41.000000000 +0300
+@@ -75,7 +75,7 @@
+ static buf_flush_stat_t buf_flush_stat_sum;
+
+ /** Number of pages flushed through non flush_list flushes. */
+-static ulint buf_lru_flush_page_count = 0;
++// static ulint buf_lru_flush_page_count = 0;
+
+ /* @} */
+
+diff -ruN a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
+--- a/storage/innobase/fil/fil0fil.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/fil/fil0fil.c 2010-12-03 15:07:31.790357112 +0900
+@@ -4858,3 +4858,30 @@
+
+ fil_system = NULL;
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void)
++/*=======================*/
++{
++ if (fil_system) {
++ return (fil_system->spaces->n_cells
++ + fil_system->name_hash->n_cells);
++ } else {
++ return 0;
++ }
++}
++
++ulint
++fil_system_hash_nodes(void)
++/*=======================*/
++{
++ if (fil_system) {
++ return (UT_LIST_GET_LEN(fil_system->space_list)
++ * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE));
++ } else {
++ return 0;
++ }
++}
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:06:58.727955654 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:07:31.799376984 +0900
+@@ -584,6 +584,8 @@
+ (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
+ {"buffer_pool_pages_flushed",
+ (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
++ {"buffer_pool_pages_LRU_flushed",
++ (char*) &export_vars.innodb_buffer_pool_pages_LRU_flushed, SHOW_LONG},
+ {"buffer_pool_pages_free",
+ (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
+ #ifdef UNIV_DEBUG
+@@ -10975,6 +10977,16 @@
+ "Force InnoDB to not use next-key locking, to use only row-level locking.",
+ NULL, NULL, FALSE);
+
++static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
++ PLUGIN_VAR_OPCMDARG,
++ "Whether to show records locked in SHOW INNODB STATUS.",
++ NULL, NULL, 0, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
++ PLUGIN_VAR_RQCMDARG,
++ "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
++ NULL, NULL, 10, 0, 1000, 0);
++
+ #ifdef UNIV_LOG_ARCHIVE
+ static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+@@ -11162,7 +11174,7 @@
+
+ static MYSQL_SYSVAR_STR(version, innodb_version_str,
+ PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
+- "InnoDB version", NULL, NULL, INNODB_VERSION_STR);
++ "Percona-InnoDB-plugin version", NULL, NULL, INNODB_VERSION_STR);
+
+ static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+@@ -11247,6 +11259,8 @@
+ MYSQL_SYSVAR(thread_concurrency),
+ MYSQL_SYSVAR(thread_sleep_delay),
+ MYSQL_SYSVAR(autoinc_lock_mode),
++ MYSQL_SYSVAR(show_verbose_locks),
++ MYSQL_SYSVAR(show_locks_held),
+ MYSQL_SYSVAR(version),
+ MYSQL_SYSVAR(use_sys_malloc),
+ MYSQL_SYSVAR(use_native_aio),
+diff -ruN a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
+--- a/storage/innobase/include/fil0fil.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/fil0fil.h 2010-12-03 15:07:31.812028575 +0900
+@@ -726,6 +726,17 @@
+ /*============================*/
+ ulint id); /*!< in: space id */
+
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++fil_system_hash_cells(void);
++/*========================*/
++
++ulint
++fil_system_hash_nodes(void);
++/*========================*/
++
+ typedef struct fil_space_struct fil_space_t;
+
+ #endif
+diff -ruN a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
+--- a/storage/innobase/include/read0read.h 2010-12-04 02:58:26.000000000 +0900
++++ b/storage/innobase/include/read0read.h 2011-01-21 19:35:44.127631727 +0900
+@@ -88,6 +88,7 @@
+ void
+ read_view_print(
+ /*============*/
++ FILE* file,
+ const read_view_t* view); /*!< in: read view */
+ /*********************************************************************//**
+ Create a consistent cursor view for mysql to be used in cursors. In this
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:07:31.813958103 +0900
+@@ -145,6 +145,9 @@
+ extern char srv_adaptive_flushing;
+
+
++extern ulint srv_show_locks_held;
++extern ulint srv_show_verbose_locks;
++
+ /* The sort order table of the MySQL latin1_swedish_ci character set
+ collation */
+ extern const byte* srv_latin1_ordering;
+@@ -318,6 +321,8 @@
+ buffer pool to disk */
+ extern ulint srv_buf_pool_flushed;
+
++extern ulint buf_lru_flush_page_count;
++
+ /** Number of buffer pool reads that led to the
+ reading of a disk page */
+ extern ulint srv_buf_pool_reads;
+@@ -691,6 +696,7 @@
+ ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
+ ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
+ ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
++ ulint innodb_buffer_pool_pages_LRU_flushed; /*!< buf_lru_flush_page_count */
+ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
+ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
+diff -ruN a/storage/innobase/include/thr0loc.h b/storage/innobase/include/thr0loc.h
+--- a/storage/innobase/include/thr0loc.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/thr0loc.h 2010-12-03 15:07:31.815081509 +0900
+@@ -83,6 +83,17 @@
+ thr_local_get_in_ibuf_field(void);
+ /*=============================*/
+
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void);
++/*=======================*/
++
++ulint
++thr_local_hash_nodes(void);
++/*=======================*/
++
+ #ifndef UNIV_NONINL
+ #include "thr0loc.ic"
+ #endif
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/lock/lock0lock.c 2010-12-03 15:07:31.819023998 +0900
+@@ -4302,6 +4302,7 @@
+
+ putc('\n', file);
+
++ if ( srv_show_verbose_locks ) {
+ block = buf_page_try_get(space, page_no, &mtr);
+
+ for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+@@ -4328,6 +4329,7 @@
+
+ putc('\n', file);
+ }
++ }
+
+ mtr_commit(&mtr);
+ if (UNIV_LIKELY_NULL(heap)) {
+@@ -4511,7 +4513,7 @@
+ }
+ }
+
+- if (!srv_print_innodb_lock_monitor) {
++ if (!srv_print_innodb_lock_monitor && !srv_show_locks_held) {
+ nth_trx++;
+ goto loop;
+ }
+@@ -4583,8 +4585,8 @@
+
+ nth_lock++;
+
+- if (nth_lock >= 10) {
+- fputs("10 LOCKS PRINTED FOR THIS TRX:"
++ if (nth_lock >= srv_show_locks_held) {
++ fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:"
+ " SUPPRESSING FURTHER PRINTS\n",
+ file);
+
+diff -ruN a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c
+--- a/storage/innobase/read/read0read.c 2010-12-04 02:58:26.000000000 +0900
++++ b/storage/innobase/read/read0read.c 2011-01-21 19:37:08.292650181 +0900
+@@ -357,34 +357,35 @@
+ void
+ read_view_print(
+ /*============*/
++ FILE* file,
+ const read_view_t* view) /*!< in: read view */
+ {
+ ulint n_ids;
+ ulint i;
+
+ if (view->type == VIEW_HIGH_GRANULARITY) {
+- fprintf(stderr,
++ fprintf(file,
+ "High-granularity read view undo_n:o %llu\n",
+ (ullint) view->undo_no);
+ } else {
+- fprintf(stderr, "Normal read view\n");
++ fprintf(file, "Normal read view\n");
+ }
+
+- fprintf(stderr, "Read view low limit trx n:o " TRX_ID_FMT "\n",
++ fprintf(file, "Read view low limit trx n:o " TRX_ID_FMT "\n",
+ (ullint) view->low_limit_no);
+
+- fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
++ fprintf(file, "Read view up limit trx id " TRX_ID_FMT "\n",
+ (ullint) view->up_limit_id);
+
+- fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
++ fprintf(file, "Read view low limit trx id " TRX_ID_FMT "\n",
+ (ullint) view->low_limit_id);
+
+- fprintf(stderr, "Read view individually stored trx ids:\n");
++ fprintf(file, "Read view individually stored trx ids:\n");
+
+ n_ids = view->n_trx_ids;
+
+ for (i = 0; i < n_ids; i++) {
+- fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
++ fprintf(file, "Read view trx id " TRX_ID_FMT "\n",
+ (ullint) read_view_get_nth_trx_id(view, i));
+ }
+ }
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:07:31.824022673 +0900
+@@ -84,6 +84,7 @@
+ #include "ha_prototypes.h"
+ #include "trx0i_s.h"
+ #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
++#include "read0read.h"
+ #include "mysql/plugin.h"
+ #include "mysql/service_thd_wait.h"
+
+@@ -193,6 +194,9 @@
+ the checkpoints. */
+ UNIV_INTERN char srv_adaptive_flushing = TRUE;
+
++UNIV_INTERN ulint srv_show_locks_held = 10;
++UNIV_INTERN ulint srv_show_verbose_locks = 0;
++
+ /** Maximum number of times allowed to conditionally acquire
+ mutex before switching to blocking wait on the mutex */
+ #define MAX_MUTEX_NOWAIT 20
+@@ -311,6 +315,7 @@
+ /* variable to count the number of pages that were written from buffer
+ pool to the disk */
+ UNIV_INTERN ulint srv_buf_pool_flushed = 0;
++UNIV_INTERN ulint buf_lru_flush_page_count = 0;
+
+ /** Number of buffer pool reads that led to the
+ reading of a disk page */
+@@ -1787,6 +1792,13 @@
+ ulint n_reserved;
+ ibool ret;
+
++ ulint btr_search_sys_subtotal;
++ ulint lock_sys_subtotal;
++ ulint recv_sys_subtotal;
++
++ ulint i;
++ trx_t* trx;
++
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ current_time = time(NULL);
+@@ -1835,31 +1847,6 @@
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+- /* Only if lock_print_info_summary proceeds correctly,
+- before we call the lock_print_info_all_transactions
+- to print all the lock information. */
+- ret = lock_print_info_summary(file, nowait);
+-
+- if (ret) {
+- if (trx_start) {
+- long t = ftell(file);
+- if (t < 0) {
+- *trx_start = ULINT_UNDEFINED;
+- } else {
+- *trx_start = (ulint) t;
+- }
+- }
+- lock_print_info_all_transactions(file);
+- if (trx_end) {
+- long t = ftell(file);
+- if (t < 0) {
+- *trx_end = ULINT_UNDEFINED;
+- } else {
+- *trx_end = (ulint) t;
+- }
+- }
+- }
+-
+ fputs("--------\n"
+ "FILE I/O\n"
+ "--------\n", file);
+@@ -1890,10 +1877,84 @@
+ "BUFFER POOL AND MEMORY\n"
+ "----------------------\n", file);
+ fprintf(file,
+- "Total memory allocated " ULINTPF
+- "; in additional pool allocated " ULINTPF "\n",
+- ut_total_allocated_memory,
+- mem_pool_get_reserved(mem_comm_pool));
++ "Total memory allocated " ULINTPF
++ "; in additional pool allocated " ULINTPF "\n",
++ ut_total_allocated_memory,
++ mem_pool_get_reserved(mem_comm_pool));
++ /* Calcurate reserved memories */
++ if (btr_search_sys && btr_search_sys->hash_index->heap) {
++ btr_search_sys_subtotal = mem_heap_get_size(btr_search_sys->hash_index->heap);
++ } else {
++ btr_search_sys_subtotal = 0;
++ for (i=0; i < btr_search_sys->hash_index->n_mutexes; i++) {
++ btr_search_sys_subtotal += mem_heap_get_size(btr_search_sys->hash_index->heaps[i]);
++ }
++ }
++
++ lock_sys_subtotal = 0;
++ if (trx_sys) {
++ mutex_enter(&kernel_mutex);
++ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
++ while (trx) {
++ lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0);
++ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
++ }
++ mutex_exit(&kernel_mutex);
++ }
++
++ recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
++ ? mem_heap_get_size(recv_sys->heap) : 0);
++
++ fprintf(file,
++ "Internal hash tables (constant factor + variable factor)\n"
++ " Adaptive hash index %lu \t(%lu + %lu)\n"
++ " Page hash %lu (buffer pool 0 only)\n"
++ " Dictionary cache %lu \t(%lu + %lu)\n"
++ " File system %lu \t(%lu + %lu)\n"
++ " Lock system %lu \t(%lu + %lu)\n"
++ " Recovery system %lu \t(%lu + %lu)\n"
++ " Threads %lu \t(%lu + %lu)\n",
++
++ (ulong) (btr_search_sys
++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0)
++ + btr_search_sys_subtotal,
++ (ulong) (btr_search_sys
++ ? (btr_search_sys->hash_index->n_cells * sizeof(hash_cell_t)) : 0),
++ (ulong) btr_search_sys_subtotal,
++
++ (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
++
++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells
++ ) * sizeof(hash_cell_t)
++ + dict_sys->size) : 0),
++ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells
++ ) * sizeof(hash_cell_t)) : 0),
++ (ulong) (dict_sys ? (dict_sys->size) : 0),
++
++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
++ + fil_system_hash_nodes()),
++ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
++ (ulong) fil_system_hash_nodes(),
++
++ (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
++ + lock_sys_subtotal),
++ (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
++ (ulong) lock_sys_subtotal,
++
++ (ulong) (((recv_sys && recv_sys->addr_hash)
++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
++ + recv_sys_subtotal),
++ (ulong) ((recv_sys && recv_sys->addr_hash)
++ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
++ (ulong) recv_sys_subtotal,
++
++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)
++ + thr_local_hash_nodes()),
++ (ulong) (thr_local_hash_cells() * sizeof(hash_cell_t)),
++ (ulong) thr_local_hash_nodes());
++
+ fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
+ dict_sys->size);
+
+@@ -1909,6 +1970,16 @@
+ fprintf(file, "%lu read views open inside InnoDB\n",
+ UT_LIST_GET_LEN(trx_sys->view_list));
+
++ if (UT_LIST_GET_LEN(trx_sys->view_list)) {
++ read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list);
++
++ if (view) {
++ fprintf(file, "---OLDEST VIEW---\n");
++ read_view_print(file, view);
++ fprintf(file, "-----------------\n");
++ }
++ }
++
+ n_reserved = fil_space_get_n_reserved_extents(0);
+ if (n_reserved > 0) {
+ fprintf(file,
+@@ -1952,6 +2023,31 @@
+ srv_n_rows_deleted_old = srv_n_rows_deleted;
+ srv_n_rows_read_old = srv_n_rows_read;
+
++ /* Only if lock_print_info_summary proceeds correctly,
++ before we call the lock_print_info_all_transactions
++ to print all the lock information. */
++ ret = lock_print_info_summary(file, nowait);
++
++ if (ret) {
++ if (trx_start) {
++ long t = ftell(file);
++ if (t < 0) {
++ *trx_start = ULINT_UNDEFINED;
++ } else {
++ *trx_start = (ulint) t;
++ }
++ }
++ lock_print_info_all_transactions(file);
++ if (trx_end) {
++ long t = ftell(file);
++ if (t < 0) {
++ *trx_end = ULINT_UNDEFINED;
++ } else {
++ *trx_end = (ulint) t;
++ }
++ }
++ }
++
+ fputs("----------------------------\n"
+ "END OF INNODB MONITOR OUTPUT\n"
+ "============================\n", file);
+@@ -1995,6 +2091,7 @@
+ = srv_buf_pool_write_requests;
+ export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
+ export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
++ export_vars.innodb_buffer_pool_pages_LRU_flushed = buf_lru_flush_page_count;
+ export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
+ export_vars.innodb_buffer_pool_read_ahead
+ = stat.n_ra_pages_read;
+diff -ruN a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
+--- a/storage/innobase/sync/sync0arr.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0arr.c 2010-12-03 15:07:31.826041368 +0900
+@@ -477,7 +477,7 @@
+
+ fprintf(file,
+ "--Thread %lu has waited at %s line %lu"
+- " for %.2f seconds the semaphore:\n",
++ " for %#.5g seconds the semaphore:\n",
+ (ulong) os_thread_pf(cell->thread), cell->file,
+ (ulong) cell->line,
+ difftime(time(NULL), cell->reservation_time));
+diff -ruN a/storage/innobase/thr/thr0loc.c b/storage/innobase/thr/thr0loc.c
+--- a/storage/innobase/thr/thr0loc.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/thr/thr0loc.c 2010-12-03 15:07:31.828023915 +0900
+@@ -49,6 +49,7 @@
+
+ /** The hash table. The module is not yet initialized when it is NULL. */
+ static hash_table_t* thr_local_hash = NULL;
++ulint thr_local_hash_n_nodes = 0;
+
+ /** Thread local data */
+ typedef struct thr_local_struct thr_local_t;
+@@ -221,6 +222,7 @@
+ os_thread_pf(os_thread_get_curr_id()),
+ local);
+
++ thr_local_hash_n_nodes++;
+ mutex_exit(&thr_local_mutex);
+ }
+
+@@ -249,6 +251,7 @@
+
+ HASH_DELETE(thr_local_t, hash, thr_local_hash,
+ os_thread_pf(id), local);
++ thr_local_hash_n_nodes--;
+
+ mutex_exit(&thr_local_mutex);
+
+@@ -305,3 +308,29 @@
+ hash_table_free(thr_local_hash);
+ thr_local_hash = NULL;
+ }
++
++/*************************************************************************
++Return local hash table informations. */
++
++ulint
++thr_local_hash_cells(void)
++/*======================*/
++{
++ if (thr_local_hash) {
++ return (thr_local_hash->n_cells);
++ } else {
++ return 0;
++ }
++}
++
++ulint
++thr_local_hash_nodes(void)
++/*======================*/
++{
++ if (thr_local_hash) {
++ return (thr_local_hash_n_nodes
++ * (sizeof(thr_local_t) + MEM_BLOCK_HEADER_SIZE));
++ } else {
++ return 0;
++ }
++}
+diff -ruN a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
+--- a/storage/innobase/trx/trx0purge.c 2010-12-04 02:58:26.000000000 +0900
++++ b/storage/innobase/trx/trx0purge.c 2011-01-21 19:40:42.086683671 +0900
+@@ -1201,7 +1201,7 @@
+ /*=====================*/
+ {
+ fprintf(stderr, "InnoDB: Purge system view:\n");
+- read_view_print(purge_sys->view);
++ read_view_print(stderr, purge_sys->view);
+
+ fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
+ ", undo n:o " TRX_ID_FMT "\n",
--- /dev/null
+# name : innodb_show_status_extend.patch
+# introduced : XtraDB based 5.5.8
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2011-01-21 19:53:42.369599743 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2011-01-21 19:54:44.659599699 +0900
+@@ -618,6 +618,16 @@
+ trx_t* trx); /*!< in: transaction handle */
+
+ static SHOW_VAR innodb_status_variables[]= {
++ {"adaptive_hash_cells",
++ (char*) &export_vars.innodb_adaptive_hash_cells, SHOW_LONG},
++ {"adaptive_hash_heap_buffers",
++ (char*) &export_vars.innodb_adaptive_hash_heap_buffers, SHOW_LONG},
++ {"adaptive_hash_hash_searches",
++ (char*) &export_vars.innodb_adaptive_hash_hash_searches, SHOW_LONG},
++ {"adaptive_hash_non_hash_searches",
++ (char*) &export_vars.innodb_adaptive_hash_non_hash_searches, SHOW_LONG},
++ {"background_log_sync",
++ (char*) &export_vars.innodb_background_log_sync, SHOW_LONG},
+ {"buffer_pool_pages_data",
+ (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
+ {"buffer_pool_pages_dirty",
+@@ -632,8 +642,14 @@
+ {"buffer_pool_pages_latched",
+ (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
+ #endif /* UNIV_DEBUG */
++ {"buffer_pool_pages_made_not_young",
++ (char*) &export_vars.innodb_buffer_pool_pages_made_not_young, SHOW_LONG},
++ {"buffer_pool_pages_made_young",
++ (char*) &export_vars.innodb_buffer_pool_pages_made_young, SHOW_LONG},
+ {"buffer_pool_pages_misc",
+ (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
++ {"buffer_pool_pages_old",
++ (char*) &export_vars.innodb_buffer_pool_pages_old, SHOW_LONG},
+ {"buffer_pool_pages_total",
+ (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
+ {"buffer_pool_read_ahead",
+@@ -648,6 +664,12 @@
+ (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
+ {"buffer_pool_write_requests",
+ (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
++ {"checkpoint_age",
++ (char*) &export_vars.innodb_checkpoint_age, SHOW_LONG},
++ {"checkpoint_max_age",
++ (char*) &export_vars.innodb_checkpoint_max_age, SHOW_LONG},
++ {"checkpoint_target_age",
++ (char*) &export_vars.innodb_checkpoint_target_age, SHOW_LONG},
+ {"data_fsyncs",
+ (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
+ {"data_pending_fsyncs",
+@@ -674,12 +696,66 @@
+ (char*) &export_vars.innodb_dict_tables, SHOW_LONG},
+ {"have_atomic_builtins",
+ (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
++ {"history_list_length",
++ (char*) &export_vars.innodb_history_list_length, SHOW_LONG},
++ {"ibuf_discarded_delete_marks",
++ (char*) &export_vars.innodb_ibuf_discarded_delete_marks, SHOW_LONG},
++ {"ibuf_discarded_deletes",
++ (char*) &export_vars.innodb_ibuf_discarded_deletes, SHOW_LONG},
++ {"ibuf_discarded_inserts",
++ (char*) &export_vars.innodb_ibuf_discarded_inserts, SHOW_LONG},
++ {"ibuf_free_list",
++ (char*) &export_vars.innodb_ibuf_free_list, SHOW_LONG},
++ {"ibuf_merged_delete_marks",
++ (char*) &export_vars.innodb_ibuf_merged_delete_marks, SHOW_LONG},
++ {"ibuf_merged_deletes",
++ (char*) &export_vars.innodb_ibuf_merged_deletes, SHOW_LONG},
++ {"ibuf_merged_inserts",
++ (char*) &export_vars.innodb_ibuf_merged_inserts, SHOW_LONG},
++ {"ibuf_merges",
++ (char*) &export_vars.innodb_ibuf_merges, SHOW_LONG},
++ {"ibuf_segment_size",
++ (char*) &export_vars.innodb_ibuf_segment_size, SHOW_LONG},
++ {"ibuf_size",
++ (char*) &export_vars.innodb_ibuf_size, SHOW_LONG},
+ {"log_waits",
+ (char*) &export_vars.innodb_log_waits, SHOW_LONG},
+ {"log_write_requests",
+ (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
+ {"log_writes",
+ (char*) &export_vars.innodb_log_writes, SHOW_LONG},
++ {"lsn_current",
++ (char*) &export_vars.innodb_lsn_current, SHOW_LONGLONG},
++ {"lsn_flushed",
++ (char*) &export_vars.innodb_lsn_flushed, SHOW_LONGLONG},
++ {"lsn_last_checkpoint",
++ (char*) &export_vars.innodb_lsn_last_checkpoint, SHOW_LONGLONG},
++ {"master_thread_1_second_loops",
++ (char*) &export_vars.innodb_master_thread_1_second_loops, SHOW_LONG},
++ {"master_thread_10_second_loops",
++ (char*) &export_vars.innodb_master_thread_10_second_loops, SHOW_LONG},
++ {"master_thread_background_loops",
++ (char*) &export_vars.innodb_master_thread_background_loops, SHOW_LONG},
++ {"master_thread_main_flush_loops",
++ (char*) &export_vars.innodb_master_thread_main_flush_loops, SHOW_LONG},
++ {"master_thread_sleeps",
++ (char*) &export_vars.innodb_master_thread_sleeps, SHOW_LONG},
++ {"max_trx_id",
++ (char*) &export_vars.innodb_max_trx_id, SHOW_LONGLONG},
++ {"mem_adaptive_hash",
++ (char*) &export_vars.innodb_mem_adaptive_hash, SHOW_LONG},
++ {"mem_dictionary",
++ (char*) &export_vars.innodb_mem_dictionary, SHOW_LONG},
++ {"mem_total",
++ (char*) &export_vars.innodb_mem_total, SHOW_LONG},
++ {"mutex_os_waits",
++ (char*) &export_vars.innodb_mutex_os_waits, SHOW_LONGLONG},
++ {"mutex_spin_rounds",
++ (char*) &export_vars.innodb_mutex_spin_rounds, SHOW_LONGLONG},
++ {"mutex_spin_waits",
++ (char*) &export_vars.innodb_mutex_spin_waits, SHOW_LONGLONG},
++ {"oldest_view_low_limit_trx_id",
++ (char*) &export_vars.innodb_oldest_view_low_limit_trx_id, SHOW_LONGLONG},
+ {"os_log_fsyncs",
+ (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
+ {"os_log_pending_fsyncs",
+@@ -696,8 +772,14 @@
+ (char*) &export_vars.innodb_pages_read, SHOW_LONG},
+ {"pages_written",
+ (char*) &export_vars.innodb_pages_written, SHOW_LONG},
++ {"purge_trx_id",
++ (char*) &export_vars.innodb_purge_trx_id, SHOW_LONGLONG},
++ {"purge_undo_no",
++ (char*) &export_vars.innodb_purge_undo_no, SHOW_LONGLONG},
+ {"row_lock_current_waits",
+ (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
++ {"row_lock_numbers",
++ (char*) &export_vars.innodb_row_lock_numbers, SHOW_LONG},
+ {"row_lock_time",
+ (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
+ {"row_lock_time_avg",
+@@ -714,8 +796,20 @@
+ (char*) &export_vars.innodb_rows_read, SHOW_LONG},
+ {"rows_updated",
+ (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
++ {"s_lock_os_waits",
++ (char*) &export_vars.innodb_s_lock_os_waits, SHOW_LONGLONG},
++ {"s_lock_spin_rounds",
++ (char*) &export_vars.innodb_s_lock_spin_rounds, SHOW_LONGLONG},
++ {"s_lock_spin_waits",
++ (char*) &export_vars.innodb_s_lock_spin_waits, SHOW_LONGLONG},
+ {"truncated_status_writes",
+ (char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
++ {"x_lock_os_waits",
++ (char*) &export_vars.innodb_x_lock_os_waits, SHOW_LONGLONG},
++ {"x_lock_spin_rounds",
++ (char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG},
++ {"x_lock_spin_waits",
++ (char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG},
+ {NullS, NullS, SHOW_LONG}
+ };
+
+diff -ruN a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
+--- a/storage/innobase/include/lock0lock.h 2011-01-21 19:52:38.967683738 +0900
++++ b/storage/innobase/include/lock0lock.h 2011-01-21 19:54:44.660599140 +0900
+@@ -816,6 +816,7 @@
+ /** The lock system struct */
+ struct lock_sys_struct{
+ hash_table_t* rec_hash; /*!< hash table of the record locks */
++ ulint rec_num;
+ };
+
+ /** The lock system */
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2011-01-21 19:53:42.380638228 +0900
++++ b/storage/innobase/include/srv0srv.h 2011-01-21 19:54:44.662600032 +0900
+@@ -727,6 +727,11 @@
+
+ /** Status variables to be passed to MySQL */
+ struct export_var_struct{
++ ulint innodb_adaptive_hash_cells;
++ ulint innodb_adaptive_hash_heap_buffers;
++ ulint innodb_adaptive_hash_hash_searches;
++ ulint innodb_adaptive_hash_non_hash_searches;
++ ulint innodb_background_log_sync;
+ ulint innodb_data_pending_reads; /*!< Pending reads */
+ ulint innodb_data_pending_writes; /*!< Pending writes */
+ ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
+@@ -744,6 +749,9 @@
+ #ifdef UNIV_DEBUG
+ ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
+ #endif /* UNIV_DEBUG */
++ ulint innodb_buffer_pool_pages_made_not_young;
++ ulint innodb_buffer_pool_pages_made_young;
++ ulint innodb_buffer_pool_pages_old;
+ ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
+ ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
+ ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
+@@ -752,13 +760,43 @@
+ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
+ ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
+ ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
++ ulint innodb_checkpoint_age;
++ ulint innodb_checkpoint_max_age;
++ ulint innodb_checkpoint_target_age;
+ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
+ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
+ ulint innodb_deadlocks;
+ ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
++ ulint innodb_history_list_length;
++ ulint innodb_ibuf_size;
++ ulint innodb_ibuf_free_list;
++ ulint innodb_ibuf_segment_size;
++ ulint innodb_ibuf_merges;
++ ulint innodb_ibuf_merged_inserts;
++ ulint innodb_ibuf_merged_delete_marks;
++ ulint innodb_ibuf_merged_deletes;
++ ulint innodb_ibuf_discarded_inserts;
++ ulint innodb_ibuf_discarded_delete_marks;
++ ulint innodb_ibuf_discarded_deletes;
+ ulint innodb_log_waits; /*!< srv_log_waits */
+ ulint innodb_log_write_requests; /*!< srv_log_write_requests */
+ ulint innodb_log_writes; /*!< srv_log_writes */
++ ib_int64_t innodb_lsn_current;
++ ib_int64_t innodb_lsn_flushed;
++ ib_int64_t innodb_lsn_last_checkpoint;
++ ulint innodb_master_thread_1_second_loops;
++ ulint innodb_master_thread_10_second_loops;
++ ulint innodb_master_thread_background_loops;
++ ulint innodb_master_thread_main_flush_loops;
++ ulint innodb_master_thread_sleeps;
++ ib_int64_t innodb_max_trx_id;
++ ulint innodb_mem_adaptive_hash;
++ ulint innodb_mem_dictionary;
++ ulint innodb_mem_total;
++ ib_int64_t innodb_mutex_os_waits;
++ ib_int64_t innodb_mutex_spin_rounds;
++ ib_int64_t innodb_mutex_spin_waits;
++ ib_int64_t innodb_oldest_view_low_limit_trx_id;
+ ulint innodb_os_log_written; /*!< srv_os_log_written */
+ ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
+ ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
+@@ -767,6 +805,8 @@
+ ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
+ ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */
+ ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
++ ib_int64_t innodb_purge_trx_id;
++ ib_int64_t innodb_purge_undo_no;
+ ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
+ ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
+ ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
+@@ -776,11 +816,18 @@
+ / srv_n_lock_wait_count */
+ ulint innodb_row_lock_time_max; /*!< srv_n_lock_max_wait_time
+ / 1000 */
++ ulint innodb_row_lock_numbers;
+ ulint innodb_rows_read; /*!< srv_n_rows_read */
+ ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
+ ulint innodb_rows_updated; /*!< srv_n_rows_updated */
+ ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
+ ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
++ ib_int64_t innodb_s_lock_os_waits;
++ ib_int64_t innodb_s_lock_spin_rounds;
++ ib_int64_t innodb_s_lock_spin_waits;
++ ib_int64_t innodb_x_lock_os_waits;
++ ib_int64_t innodb_x_lock_spin_rounds;
++ ib_int64_t innodb_x_lock_spin_waits;
+ };
+
+ /** Thread slot in the thread table */
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h 2011-01-21 19:48:45.982637372 +0900
++++ b/storage/innobase/include/sync0sync.h 2011-01-21 19:54:44.664638235 +0900
+@@ -760,6 +760,10 @@
+
+ #define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
+
++extern ib_int64_t mutex_spin_round_count;
++extern ib_int64_t mutex_spin_wait_count;
++extern ib_int64_t mutex_os_wait_count;
++
+ /** The number of mutex_exit calls. Intended for performance monitoring. */
+ extern ib_int64_t mutex_exit_count;
+
+diff -ruN a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
+--- a/storage/innobase/lock/lock0lock.c 2011-01-21 19:52:38.998600121 +0900
++++ b/storage/innobase/lock/lock0lock.c 2011-01-21 19:54:44.668637536 +0900
+@@ -571,6 +571,7 @@
+ lock_sys = mem_alloc(sizeof(lock_sys_t));
+
+ lock_sys->rec_hash = hash_create(n_cells);
++ lock_sys->rec_num = 0;
+
+ /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
+
+@@ -1719,6 +1720,7 @@
+
+ HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), lock);
++ lock_sys->rec_num++;
+ if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+
+ lock_set_lock_and_trx_wait(lock, trx);
+@@ -2265,6 +2267,7 @@
+
+ HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), in_lock);
++ lock_sys->rec_num--;
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+
+@@ -2308,6 +2311,7 @@
+
+ HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
+ lock_rec_fold(space, page_no), in_lock);
++ lock_sys->rec_num--;
+
+ UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
+ }
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2011-01-21 19:53:42.390637840 +0900
++++ b/storage/innobase/srv/srv0srv.c 2011-01-21 19:54:44.673637084 +0900
+@@ -2227,12 +2227,49 @@
+ ulint LRU_len;
+ ulint free_len;
+ ulint flush_list_len;
++ ulint mem_adaptive_hash, mem_dictionary;
++ read_view_t* oldest_view;
++ ulint i;
+
+ buf_get_total_stat(&stat);
+ buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+
++ if (btr_search_sys && btr_search_sys->hash_index[0]->heap) {
++ mem_adaptive_hash = mem_heap_get_size(btr_search_sys->hash_index[0]->heap);
++ } else {
++ mem_adaptive_hash = 0;
++ for (i=0; i < btr_search_sys->hash_index[0]->n_mutexes; i++) {
++ mem_adaptive_hash += mem_heap_get_size(btr_search_sys->hash_index[0]->heaps[i]);
++ }
++ }
++ mem_adaptive_hash *= btr_search_index_num;
++ if (btr_search_sys) {
++ mem_adaptive_hash += (btr_search_sys->hash_index[0]->n_cells * btr_search_index_num * sizeof(hash_cell_t));
++ }
++
++ mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells
++ + dict_sys->table_id_hash->n_cells
++ ) * sizeof(hash_cell_t)
++ + dict_sys->size) : 0);
++
+ mutex_enter(&srv_innodb_monitor_mutex);
+
++ export_vars.innodb_adaptive_hash_cells = 0;
++ export_vars.innodb_adaptive_hash_heap_buffers = 0;
++ for (i = 0; i < btr_search_index_num; i++) {
++ hash_table_t* table = btr_search_get_hash_index((index_id_t)i);
++
++ export_vars.innodb_adaptive_hash_cells
++ += hash_get_n_cells(table);
++ export_vars.innodb_adaptive_hash_heap_buffers
++ += (UT_LIST_GET_LEN(table->heap->base) - 1);
++ }
++ export_vars.innodb_adaptive_hash_hash_searches
++ = btr_cur_n_sea;
++ export_vars.innodb_adaptive_hash_non_hash_searches
++ = btr_cur_n_non_sea;
++ export_vars.innodb_background_log_sync
++ = srv_log_writes_and_flush;
+ export_vars.innodb_data_pending_reads
+ = os_n_pending_reads;
+ export_vars.innodb_data_pending_writes
+@@ -2269,6 +2306,101 @@
+
+ export_vars.innodb_buffer_pool_pages_misc
+ = buf_pool_get_n_pages() - LRU_len - free_len;
++
++ export_vars.innodb_buffer_pool_pages_made_young
++ = stat.n_pages_made_young;
++ export_vars.innodb_buffer_pool_pages_made_not_young
++ = stat.n_pages_not_made_young;
++ export_vars.innodb_buffer_pool_pages_old = 0;
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool = buf_pool_from_array(i);
++ export_vars.innodb_buffer_pool_pages_old
++ += buf_pool->LRU_old_len;
++ }
++ export_vars.innodb_checkpoint_age
++ = (log_sys->lsn - log_sys->last_checkpoint_lsn);
++ export_vars.innodb_checkpoint_max_age
++ = log_sys->max_checkpoint_age;
++ export_vars.innodb_checkpoint_target_age
++ = srv_checkpoint_age_target
++ ? ut_min(log_sys->max_checkpoint_age_async, srv_checkpoint_age_target)
++ : log_sys->max_checkpoint_age_async;
++ export_vars.innodb_history_list_length
++ = trx_sys->rseg_history_len;
++ export_vars.innodb_ibuf_size
++ = ibuf->size;
++ export_vars.innodb_ibuf_free_list
++ = ibuf->free_list_len;
++ export_vars.innodb_ibuf_segment_size
++ = ibuf->seg_size;
++ export_vars.innodb_ibuf_merges
++ = ibuf->n_merges;
++ export_vars.innodb_ibuf_merged_inserts
++ = ibuf->n_merged_ops[IBUF_OP_INSERT];
++ export_vars.innodb_ibuf_merged_delete_marks
++ = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK];
++ export_vars.innodb_ibuf_merged_deletes
++ = ibuf->n_merged_ops[IBUF_OP_DELETE];
++ export_vars.innodb_ibuf_discarded_inserts
++ = ibuf->n_discarded_ops[IBUF_OP_INSERT];
++ export_vars.innodb_ibuf_discarded_delete_marks
++ = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK];
++ export_vars.innodb_ibuf_discarded_deletes
++ = ibuf->n_discarded_ops[IBUF_OP_DELETE];
++ export_vars.innodb_lsn_current
++ = log_sys->lsn;
++ export_vars.innodb_lsn_flushed
++ = log_sys->flushed_to_disk_lsn;
++ export_vars.innodb_lsn_last_checkpoint
++ = log_sys->last_checkpoint_lsn;
++ export_vars.innodb_master_thread_1_second_loops
++ = srv_main_1_second_loops;
++ export_vars.innodb_master_thread_10_second_loops
++ = srv_main_10_second_loops;
++ export_vars.innodb_master_thread_background_loops
++ = srv_main_background_loops;
++ export_vars.innodb_master_thread_main_flush_loops
++ = srv_main_flush_loops;
++ export_vars.innodb_master_thread_sleeps
++ = srv_main_sleeps;
++ export_vars.innodb_max_trx_id
++ = trx_sys->max_trx_id;
++ export_vars.innodb_mem_adaptive_hash
++ = mem_adaptive_hash;
++ export_vars.innodb_mem_dictionary
++ = mem_dictionary;
++ export_vars.innodb_mem_total
++ = ut_total_allocated_memory;
++ export_vars.innodb_mutex_os_waits
++ = mutex_os_wait_count;
++ export_vars.innodb_mutex_spin_rounds
++ = mutex_spin_round_count;
++ export_vars.innodb_mutex_spin_waits
++ = mutex_spin_wait_count;
++ export_vars.innodb_s_lock_os_waits
++ = rw_s_os_wait_count;
++ export_vars.innodb_s_lock_spin_rounds
++ = rw_s_spin_round_count;
++ export_vars.innodb_s_lock_spin_waits
++ = rw_s_spin_wait_count;
++ export_vars.innodb_x_lock_os_waits
++ = rw_x_os_wait_count;
++ export_vars.innodb_x_lock_spin_rounds
++ = rw_x_spin_round_count;
++ export_vars.innodb_x_lock_spin_waits
++ = rw_x_spin_wait_count;
++
++ oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
++ export_vars.innodb_oldest_view_low_limit_trx_id
++ = oldest_view ? oldest_view->low_limit_id : 0;
++
++ export_vars.innodb_purge_trx_id
++ = purge_sys->purge_trx_no;
++ export_vars.innodb_purge_undo_no
++ = purge_sys->purge_undo_no;
++ export_vars.innodb_row_lock_numbers
++ = lock_sys->rec_num;
++
+ #ifdef HAVE_ATOMIC_BUILTINS
+ export_vars.innodb_have_atomic_builtins = 1;
+ #else
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c 2011-01-21 19:53:03.458637954 +0900
++++ b/storage/innobase/sync/sync0sync.c 2011-01-21 19:54:44.676637686 +0900
+@@ -170,13 +170,13 @@
+
+ /** The number of iterations in the mutex_spin_wait() spin loop.
+ Intended for performance monitoring. */
+-static ib_int64_t mutex_spin_round_count = 0;
++UNIV_INTERN ib_int64_t mutex_spin_round_count = 0;
+ /** The number of mutex_spin_wait() calls. Intended for
+ performance monitoring. */
+-static ib_int64_t mutex_spin_wait_count = 0;
++UNIV_INTERN ib_int64_t mutex_spin_wait_count = 0;
+ /** The number of OS waits in mutex_spin_wait(). Intended for
+ performance monitoring. */
+-static ib_int64_t mutex_os_wait_count = 0;
++UNIV_INTERN ib_int64_t mutex_os_wait_count = 0;
+ /** The number of mutex_exit() calls. Intended for performance
+ monitoring. */
+ UNIV_INTERN ib_int64_t mutex_exit_count = 0;
--- /dev/null
+# name : innodb_show_sys_tables.patch
+# introduced : 13?
+# maintainer : Yasufumi
+# (It is revived from mysql-5.5.6-rc)
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:53:54.615040167 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 16:07:26.851357007 +0900
+@@ -11673,7 +11673,14 @@
+ i_s_innodb_cmp,
+ i_s_innodb_cmp_reset,
+ i_s_innodb_cmpmem,
+-i_s_innodb_cmpmem_reset
++i_s_innodb_cmpmem_reset,
++i_s_innodb_sys_tables,
++i_s_innodb_sys_tablestats,
++i_s_innodb_sys_indexes,
++i_s_innodb_sys_columns,
++i_s_innodb_sys_fields,
++i_s_innodb_sys_foreign,
++i_s_innodb_sys_foreign_cols
+ mysql_declare_plugin_end;
+
+ /** @brief Initialize the default value of innodb_commit_concurrency.
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:49:59.207956807 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-03 17:10:02.719210529 +0900
+@@ -36,9 +36,11 @@
+ #include <mysql/innodb_priv.h>
+
+ extern "C" {
++#include "btr0pcur.h" /* for file sys_tables related info. */
+ #include "btr0types.h"
+ #include "buf0buddy.h" /* for i_s_cmpmem */
+ #include "buf0buf.h" /* for buf_pool and PAGE_ZIP_MIN_SIZE */
++#include "dict0load.h" /* for file sys_tables related info. */
+ #include "dict0mem.h"
+ #include "dict0types.h"
+ #include "ha_prototypes.h" /* for innobase_convert_name() */
+@@ -1787,6 +1789,1675 @@
+ DBUG_RETURN(0);
+ }
+
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLES */
++static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
++{
++#define SYS_TABLE_ID 0
++ {STRUCT_FLD(field_name, "TABLE_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_SCHEMA 1
++ {STRUCT_FLD(field_name, "SCHEMA"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_NAME 2
++ {STRUCT_FLD(field_name, "NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_FLAG 3
++ {STRUCT_FLD(field_name, "FLAG"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_NUM_COLUMN 4
++ {STRUCT_FLD(field_name, "N_COLS"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLE_SPACE 5
++ {STRUCT_FLD(field_name, "SPACE"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Populate information_schema.innodb_sys_tables table with information
++from SYS_TABLES.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_tables(
++/*=====================*/
++ THD* thd, /*!< in: thread */
++ dict_table_t* table, /*!< in: table */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++ char buf[NAME_LEN * 2 + 2];
++ char* ptr;
++
++ DBUG_ENTER("i_s_dict_fill_sys_tables");
++
++ fields = table_to_fill->field;
++
++ OK(fields[SYS_TABLE_ID]->store(longlong(table->id), TRUE));
++
++ strncpy(buf, table->name, NAME_LEN * 2 + 2);
++ ptr = strchr(buf, '/');
++ if (ptr) {
++ *ptr = '\0';
++ ++ptr;
++
++ OK(field_store_string(fields[SYS_TABLE_SCHEMA], buf));
++ OK(field_store_string(fields[SYS_TABLE_NAME], ptr));
++ } else {
++ fields[SYS_TABLE_SCHEMA]->set_null();
++ OK(field_store_string(fields[SYS_TABLE_NAME], buf));
++ }
++
++ OK(fields[SYS_TABLE_FLAG]->store(table->flags));
++
++ OK(fields[SYS_TABLE_NUM_COLUMN]->store(table->n_cols));
++
++ OK(fields[SYS_TABLE_SPACE]->store(table->space));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_TABLES table, and fill the
++information_schema.innodb_sys_tables table with related table information
++@return 0 on success */
++static
++int
++i_s_sys_tables_fill_table(
++/*======================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_tables_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&(dict_sys->mutex));
++ mtr_start(&mtr);
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
++
++ while (rec) {
++ const char* err_msg;
++ dict_table_t* table_rec;
++
++ /* Create and populate a dict_table_t structure with
++ information from SYS_TABLES row */
++ err_msg = dict_process_sys_tables_rec(
++ heap, rec, &table_rec, DICT_TABLE_LOAD_FROM_RECORD);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_tables(thd, table_rec, tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ /* Since dict_process_sys_tables_rec() is called with
++ DICT_TABLE_LOAD_FROM_RECORD, the table_rec is created in
++ dict_process_sys_tables_rec(), we will need to free it */
++ if (table_rec) {
++ dict_mem_table_free(table_rec);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tables
++@return 0 on success */
++static
++int
++innodb_sys_tables_init(
++/*===================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_tables_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_tables_fields_info;
++ schema->fill_table = i_s_sys_tables_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tables =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_TABLES"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_TABLES"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_tables_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */
++static ST_FIELD_INFO innodb_sys_tablestats_fields_info[] =
++{
++#define SYS_TABLESTATS_ID 0
++ {STRUCT_FLD(field_name, "TABLE_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_SCHEMA 1
++ {STRUCT_FLD(field_name, "SCHEMA"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_NAME 2
++ {STRUCT_FLD(field_name, "NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_INIT 3
++ {STRUCT_FLD(field_name, "STATS_INITIALIZED"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_NROW 4
++ {STRUCT_FLD(field_name, "NUM_ROWS"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_CLUST_SIZE 5
++ {STRUCT_FLD(field_name, "CLUST_INDEX_SIZE"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_INDEX_SIZE 6
++ {STRUCT_FLD(field_name, "OTHER_INDEX_SIZE"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_MODIFIED 7
++ {STRUCT_FLD(field_name, "MODIFIED_COUNTER"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_AUTONINC 8
++ {STRUCT_FLD(field_name, "AUTOINC"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_TABLESTATS_MYSQL_OPEN_HANDLE 9
++ {STRUCT_FLD(field_name, "MYSQL_HANDLES_OPENED"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Populate information_schema.innodb_sys_tablestats table with information
++from SYS_TABLES.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_tablestats(
++/*=========================*/
++ THD* thd, /*!< in: thread */
++ dict_table_t* table, /*!< in: table */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++ char buf[NAME_LEN * 2 + 2];
++ char* ptr;
++
++ DBUG_ENTER("i_s_dict_fill_sys_tablestats");
++
++ fields = table_to_fill->field;
++
++ OK(fields[SYS_TABLESTATS_ID]->store(longlong(table->id), TRUE));
++
++ strncpy(buf, table->name, NAME_LEN * 2 + 2);
++ ptr = strchr(buf, '/');
++ if (ptr) {
++ *ptr = '\0';
++ ++ptr;
++
++ OK(field_store_string(fields[SYS_TABLESTATS_SCHEMA], buf));
++ OK(field_store_string(fields[SYS_TABLESTATS_NAME], ptr));
++ } else {
++ fields[SYS_TABLESTATS_SCHEMA]->set_null();
++ OK(field_store_string(fields[SYS_TABLESTATS_NAME], buf));
++ }
++
++ if (table->stat_initialized) {
++ OK(field_store_string(fields[SYS_TABLESTATS_INIT],
++ "Initialized"));
++ } else {
++ OK(field_store_string(fields[SYS_TABLESTATS_INIT],
++ "Uninitialized"));
++ }
++
++ OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows, TRUE));
++
++ OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
++ table->stat_clustered_index_size));
++
++ OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
++ table->stat_sum_of_other_index_sizes));
++
++ OK(fields[SYS_TABLESTATS_MODIFIED]->store(
++ table->stat_modified_counter));
++
++ OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
++
++ OK(fields[SYS_TABLESTATS_MYSQL_OPEN_HANDLE]->store(
++ table->n_mysql_handles_opened));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_TABLES table, and fill the
++information_schema.innodb_sys_tablestats table with table statistics
++related information
++@return 0 on success */
++static
++int
++i_s_sys_tables_fill_table_stats(
++/*============================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_tables_fill_table_stats");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
++
++ while (rec) {
++ const char* err_msg;
++ dict_table_t* table_rec;
++
++ /* Fetch the dict_table_t structure corresponding to
++ this SYS_TABLES record */
++ err_msg = dict_process_sys_tables_rec(
++ heap, rec, &table_rec, DICT_TABLE_LOAD_FROM_CACHE);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_tablestats(thd, table_rec,
++ tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tablestats
++@return 0 on success */
++static
++int
++innodb_sys_tablestats_init(
++/*=======================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_tablestats_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_tablestats_fields_info;
++ schema->fill_table = i_s_sys_tables_fill_table_stats;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tablestats =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_TABLESTATS"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_TABLESTATS"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_tablestats_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */
++static ST_FIELD_INFO innodb_sysindex_fields_info[] =
++{
++#define SYS_INDEX_ID 0
++ {STRUCT_FLD(field_name, "INDEX_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_NAME 1
++ {STRUCT_FLD(field_name, "NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_TABLE_ID 2
++ {STRUCT_FLD(field_name, "TABLE_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_TYPE 3
++ {STRUCT_FLD(field_name, "TYPE"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_NUM_FIELDS 4
++ {STRUCT_FLD(field_name, "N_FIELDS"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_PAGE_NO 5
++ {STRUCT_FLD(field_name, "PAGE_NO"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_INDEX_SPACE 6
++ {STRUCT_FLD(field_name, "SPACE"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to populate the information_schema.innodb_sys_indexes table with
++collected index information
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_indexes(
++/*======================*/
++ THD* thd, /*!< in: thread */
++ table_id_t table_id, /*!< in: table id */
++ dict_index_t* index, /*!< in: populated dict_index_t
++ struct with index info */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++
++ DBUG_ENTER("i_s_dict_fill_sys_indexes");
++
++ fields = table_to_fill->field;
++
++ OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
++
++ OK(field_store_string(fields[SYS_INDEX_NAME], index->name));
++
++ OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), TRUE));
++
++ OK(fields[SYS_INDEX_TYPE]->store(index->type));
++
++ OK(fields[SYS_INDEX_NUM_FIELDS]->store(index->n_fields));
++
++ OK(fields[SYS_INDEX_PAGE_NO]->store(index->page));
++
++ OK(fields[SYS_INDEX_SPACE]->store(index->space));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_INDEXES table, and fill the
++information_schema.innodb_sys_indexes table with related index information
++@return 0 on success */
++static
++int
++i_s_sys_indexes_fill_table(
++/*=======================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_indexes_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ /* Start scan the SYS_INDEXES table */
++ rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
++
++ /* Process each record in the table */
++ while (rec) {
++ const char* err_msg;;
++ table_id_t table_id;
++ dict_index_t index_rec;
++
++ /* Populate a dict_index_t structure with information from
++ a SYS_INDEXES row */
++ err_msg = dict_process_sys_indexes_rec(heap, rec, &index_rec,
++ &table_id);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_indexes(thd, table_id, &index_rec,
++ tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_indexes
++@return 0 on success */
++static
++int
++innodb_sys_indexes_init(
++/*====================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_index_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sysindex_fields_info;
++ schema->fill_table = i_s_sys_indexes_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_indexes =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_INDEXES"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_INDEXES"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_indexes_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_COLUMNS */
++static ST_FIELD_INFO innodb_sys_columns_fields_info[] =
++{
++#define SYS_COLUMN_TABLE_ID 0
++ {STRUCT_FLD(field_name, "TABLE_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_NAME 1
++ {STRUCT_FLD(field_name, "NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_POSITION 2
++ {STRUCT_FLD(field_name, "POS"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_MTYPE 3
++ {STRUCT_FLD(field_name, "MTYPE"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN__PRTYPE 4
++ {STRUCT_FLD(field_name, "PRTYPE"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_COLUMN_COLUMN_LEN 5
++ {STRUCT_FLD(field_name, "LEN"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to populate the information_schema.innodb_sys_columns with
++related column information
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_columns(
++/*======================*/
++ THD* thd, /*!< in: thread */
++ table_id_t table_id, /*!< in: table ID */
++ const char* col_name, /*!< in: column name */
++ dict_col_t* column, /*!< in: dict_col_t struct holding
++ more column information */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++
++ DBUG_ENTER("i_s_dict_fill_sys_columns");
++
++ fields = table_to_fill->field;
++
++ OK(fields[SYS_COLUMN_TABLE_ID]->store(longlong(table_id), TRUE));
++
++ OK(field_store_string(fields[SYS_COLUMN_NAME], col_name));
++
++ OK(fields[SYS_COLUMN_POSITION]->store(column->ind));
++
++ OK(fields[SYS_COLUMN_MTYPE]->store(column->mtype));
++
++ OK(fields[SYS_COLUMN__PRTYPE]->store(column->prtype));
++
++ OK(fields[SYS_COLUMN_COLUMN_LEN]->store(column->len));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to fill information_schema.innodb_sys_columns with information
++collected by scanning SYS_COLUMNS table.
++@return 0 on success */
++static
++int
++i_s_sys_columns_fill_table(
++/*=======================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ const char* col_name;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_columns_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_COLUMNS);
++
++ while (rec) {
++ const char* err_msg;
++ dict_col_t column_rec;
++ table_id_t table_id;
++
++ /* populate a dict_col_t structure with information from
++ a SYS_COLUMNS row */
++ err_msg = dict_process_sys_columns_rec(heap, rec, &column_rec,
++ &table_id, &col_name);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_columns(thd, table_id, col_name,
++ &column_rec,
++ tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_columns
++@return 0 on success */
++static
++int
++innodb_sys_columns_init(
++/*====================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_columns_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_columns_fields_info;
++ schema->fill_table = i_s_sys_columns_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_columns =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_COLUMNS"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_COLUMNS"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_columns_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_fields */
++static ST_FIELD_INFO innodb_sys_fields_fields_info[] =
++{
++#define SYS_FIELD_INDEX_ID 0
++ {STRUCT_FLD(field_name, "INDEX_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FIELD_NAME 1
++ {STRUCT_FLD(field_name, "NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FIELD_POS 2
++ {STRUCT_FLD(field_name, "POS"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_fields with information
++collected by scanning SYS_FIELDS table.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_fields(
++/*=====================*/
++ THD* thd, /*!< in: thread */
++ index_id_t index_id, /*!< in: index id for the field */
++ dict_field_t* field, /*!< in: table */
++ ulint pos, /*!< in: Field position */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++
++ DBUG_ENTER("i_s_dict_fill_sys_fields");
++
++ fields = table_to_fill->field;
++
++ OK(fields[SYS_FIELD_INDEX_ID]->store(longlong(index_id), TRUE));
++
++ OK(field_store_string(fields[SYS_FIELD_NAME], field->name));
++
++ OK(fields[SYS_FIELD_POS]->store(pos));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to go through each record in SYS_FIELDS table, and fill the
++information_schema.innodb_sys_fields table with related index field
++information
++@return 0 on success */
++static
++int
++i_s_sys_fields_fill_table(
++/*======================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ index_id_t last_id;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_fields_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ /* will save last index id so that we know whether we move to
++ the next index. This is used to calculate prefix length */
++ last_id = 0;
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_FIELDS);
++
++ while (rec) {
++ ulint pos;
++ const char* err_msg;
++ index_id_t index_id;
++ dict_field_t field_rec;
++
++ /* Populate a dict_field_t structure with information from
++ a SYS_FIELDS row */
++ err_msg = dict_process_sys_fields_rec(heap, rec, &field_rec,
++ &pos, &index_id, last_id);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_fields(thd, index_id, &field_rec,
++ pos, tables->table);
++ last_id = index_id;
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_fields
++@return 0 on success */
++static
++int
++innodb_sys_fields_init(
++/*===================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_field_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_fields_fields_info;
++ schema->fill_table = i_s_sys_fields_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_fields =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_FIELDS"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_FIELDS"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_fields_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign */
++static ST_FIELD_INFO innodb_sys_foreign_fields_info[] =
++{
++#define SYS_FOREIGN_ID 0
++ {STRUCT_FLD(field_name, "ID"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_FOR_NAME 1
++ {STRUCT_FLD(field_name, "FOR_NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_REF_NAME 2
++ {STRUCT_FLD(field_name, "REF_NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_NUM_COL 3
++ {STRUCT_FLD(field_name, "N_COLS"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_TYPE 4
++ {STRUCT_FLD(field_name, "TYPE"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_foreign with information
++collected by scanning SYS_FOREIGN table.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_foreign(
++/*======================*/
++ THD* thd, /*!< in: thread */
++ dict_foreign_t* foreign, /*!< in: table */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++
++ DBUG_ENTER("i_s_dict_fill_sys_foreign");
++
++ fields = table_to_fill->field;
++
++ OK(field_store_string(fields[SYS_FOREIGN_ID], foreign->id));
++
++ OK(field_store_string(fields[SYS_FOREIGN_FOR_NAME],
++ foreign->foreign_table_name));
++
++ OK(field_store_string(fields[SYS_FOREIGN_REF_NAME],
++ foreign->referenced_table_name));
++
++ OK(fields[SYS_FOREIGN_NUM_COL]->store(foreign->n_fields));
++
++ OK(fields[SYS_FOREIGN_TYPE]->store(foreign->type));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to populate INFORMATION_SCHEMA.innodb_sys_foreign table. Loop
++through each record in SYS_FOREIGN, and extract the foreign key
++information.
++@return 0 on success */
++static
++int
++i_s_sys_foreign_fill_table(
++/*=======================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_foreign_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN);
++
++ while (rec) {
++ const char* err_msg;
++ dict_foreign_t foreign_rec;
++
++ /* Populate a dict_foreign_t structure with information from
++ a SYS_FOREIGN row */
++ err_msg = dict_process_sys_foreign_rec(heap, rec, &foreign_rec);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_foreign(thd, &foreign_rec,
++ tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mtr_start(&mtr);
++ mutex_enter(&dict_sys->mutex);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign
++@return 0 on success */
++static
++int
++innodb_sys_foreign_init(
++/*====================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_foreign_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_foreign_fields_info;
++ schema->fill_table = i_s_sys_foreign_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_foreign =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_FOREIGN"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_FOREIGN"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_foreign_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols */
++static ST_FIELD_INFO innodb_sys_foreign_cols_fields_info[] =
++{
++#define SYS_FOREIGN_COL_ID 0
++ {STRUCT_FLD(field_name, "ID"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_COL_FOR_NAME 1
++ {STRUCT_FLD(field_name, "FOR_COL_NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_COL_REF_NAME 2
++ {STRUCT_FLD(field_name, "REF_COL_NAME"),
++ STRUCT_FLD(field_length, NAME_LEN + 1),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_FOREIGN_COL_POS 3
++ {STRUCT_FLD(field_name, "POS"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_foreign_cols with information
++collected by scanning SYS_FOREIGN_COLS table.
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_foreign_cols(
++/*==========================*/
++ THD* thd, /*!< in: thread */
++ const char* name, /*!< in: foreign key constraint name */
++ const char* for_col_name, /*!< in: referencing column name*/
++ const char* ref_col_name, /*!< in: referenced column
++ name */
++ ulint pos, /*!< in: column position */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++
++ DBUG_ENTER("i_s_dict_fill_sys_foreign_cols");
++
++ fields = table_to_fill->field;
++
++ OK(field_store_string(fields[SYS_FOREIGN_COL_ID], name));
++
++ OK(field_store_string(fields[SYS_FOREIGN_COL_FOR_NAME], for_col_name));
++
++ OK(field_store_string(fields[SYS_FOREIGN_COL_REF_NAME], ref_col_name));
++
++ OK(fields[SYS_FOREIGN_COL_POS]->store(pos));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to populate INFORMATION_SCHEMA.innodb_sys_foreign_cols table. Loop
++through each record in SYS_FOREIGN_COLS, and extract the foreign key column
++information and fill the INFORMATION_SCHEMA.innodb_sys_foreign_cols table.
++@return 0 on success */
++static
++int
++i_s_sys_foreign_cols_fill_table(
++/*============================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_foreign_cols_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN_COLS);
++
++ while (rec) {
++ const char* err_msg;
++ const char* name;
++ const char* for_col_name;
++ const char* ref_col_name;
++ ulint pos;
++
++ /* Extract necessary information from a SYS_FOREIGN_COLS row */
++ err_msg = dict_process_sys_foreign_col_rec(
++ heap, rec, &name, &for_col_name, &ref_col_name, &pos);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_foreign_cols(
++ thd, name, for_col_name, ref_col_name, pos,
++ tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols
++@return 0 on success */
++static
++int
++innodb_sys_foreign_cols_init(
++/*========================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_foreign_cols_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_foreign_cols_fields_info;
++ schema->fill_table = i_s_sys_foreign_cols_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_foreign_cols =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_FOREIGN_COLS"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "InnoDB SYS_FOREIGN_COLS"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_foreign_cols_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
+ /***********************************************************************
+ */
+ static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] =
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h 2010-12-03 15:37:45.540456499 +0900
++++ b/storage/innobase/handler/i_s.h 2010-12-03 16:08:57.596941207 +0900
+@@ -33,6 +33,13 @@
+ extern struct st_mysql_plugin i_s_innodb_cmp_reset;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem;
+ extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
++extern struct st_mysql_plugin i_s_innodb_sys_tables;
++extern struct st_mysql_plugin i_s_innodb_sys_tablestats;
++extern struct st_mysql_plugin i_s_innodb_sys_indexes;
++extern struct st_mysql_plugin i_s_innodb_sys_columns;
++extern struct st_mysql_plugin i_s_innodb_sys_fields;
++extern struct st_mysql_plugin i_s_innodb_sys_foreign;
++extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols;
+ extern struct st_mysql_plugin i_s_innodb_rseg;
+
+ #endif /* i_s_h */
--- /dev/null
+# name : innodb_split_buf_pool_mutex.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/btr/btr0cur.c 2010-12-03 15:48:29.268957148 +0900
+@@ -4039,7 +4039,8 @@
+
+ mtr_commit(mtr);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ mutex_enter(&block->mutex);
+
+ /* Only free the block if it is still allocated to
+@@ -4050,17 +4051,22 @@
+ && buf_block_get_space(block) == space
+ && buf_block_get_page_no(block) == page_no) {
+
+- if (buf_LRU_free_block(&block->page, all, NULL)
++ if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
+ != BUF_LRU_FREED
+- && all && block->page.zip.data) {
++ && all && block->page.zip.data
++ /* Now, buf_LRU_free_block() may release mutex temporarily */
++ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
++ && buf_block_get_space(block) == space
++ && buf_block_get_page_no(block) == page_no) {
+ /* Attempt to deallocate the uncompressed page
+ if the whole block cannot be deallocted. */
+
+- buf_LRU_free_block(&block->page, FALSE, NULL);
++ buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ mutex_exit(&block->mutex);
+ }
+
+diff -ruN a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c
+--- a/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:03.033037049 +0900
++++ b/storage/innobase/btr/btr0sea.c 2010-12-03 15:48:29.271024260 +0900
+@@ -1211,7 +1211,7 @@
+ ulint* offsets;
+
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ //buf_pool_mutex_enter_all();
+
+ table = btr_search_sys->hash_index;
+
+@@ -1220,6 +1220,8 @@
+
+ buf_pool = buf_pool_from_array(j);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (bpage != NULL) {
+@@ -1301,9 +1303,11 @@
+
+ bpage = UT_LIST_GET_PREV(LRU, bpage);
+ }
++
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+- buf_pool_mutex_exit_all();
++ //buf_pool_mutex_exit_all();
+ rw_lock_x_unlock(&btr_search_latch);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+@@ -1896,7 +1900,7 @@
+ rec_offs_init(offsets_);
+
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ buf_pool_page_hash_x_lock_all();
+
+ cell_count = hash_get_n_cells(btr_search_sys->hash_index);
+
+@@ -1904,11 +1908,11 @@
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if ((i != 0) && ((i % chunk_size) == 0)) {
+- buf_pool_mutex_exit_all();
++ buf_pool_page_hash_x_unlock_all();
+ rw_lock_x_unlock(&btr_search_latch);
+ os_thread_yield();
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ buf_pool_page_hash_x_lock_all();
+ }
+
+ node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
+@@ -2019,11 +2023,11 @@
+ /* We release btr_search_latch every once in a while to
+ give other queries a chance to run. */
+ if (i != 0) {
+- buf_pool_mutex_exit_all();
++ buf_pool_page_hash_x_unlock_all();
+ rw_lock_x_unlock(&btr_search_latch);
+ os_thread_yield();
+ rw_lock_x_lock(&btr_search_latch);
+- buf_pool_mutex_enter_all();
++ buf_pool_page_hash_x_lock_all();
+ }
+
+ if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
+@@ -2031,7 +2035,7 @@
+ }
+ }
+
+- buf_pool_mutex_exit_all();
++ buf_pool_page_hash_x_unlock_all();
+ rw_lock_x_unlock(&btr_search_latch);
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+diff -ruN a/storage/innobase/buf/buf0buddy.c b/storage/innobase/buf/buf0buddy.c
+--- a/storage/innobase/buf/buf0buddy.c 2010-12-03 15:22:36.307986907 +0900
++++ b/storage/innobase/buf/buf0buddy.c 2010-12-03 15:48:29.275025723 +0900
+@@ -73,10 +73,11 @@
+ if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+ ut_ad(buf_pool->zip_free[i].start != bpage);
+- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
++ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
+
+ #ifdef UNIV_DEBUG_VALGRIND
+ if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
+@@ -96,8 +97,8 @@
+ buf_pool->zip_free[] */
+ {
+ #ifdef UNIV_DEBUG_VALGRIND
+- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
+- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
++ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
++ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
+
+ if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
+ if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
+@@ -106,9 +107,10 @@
+ ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
+- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
+
+ #ifdef UNIV_DEBUG_VALGRIND
+ if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
+@@ -128,12 +130,13 @@
+ {
+ buf_page_t* bpage;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_a(i < BUF_BUDDY_SIZES);
+
+ #ifndef UNIV_DEBUG_VALGRIND
+ /* Valgrind would complain about accessing free memory. */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+@@ -177,16 +180,19 @@
+ buf_buddy_block_free(
+ /*=================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+- void* buf) /*!< in: buffer frame to deallocate */
++ void* buf, /*!< in: buffer frame to deallocate */
++ ibool have_page_hash_mutex)
+ {
+ const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
+ buf_page_t* bpage;
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
+
++ mutex_enter(&buf_pool->zip_hash_mutex);
++
+ HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
+ && bpage->in_zip_hash && !bpage->in_page_hash),
+@@ -198,12 +204,14 @@
+ ut_d(bpage->in_zip_hash = FALSE);
+ HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
+
++ mutex_exit(&buf_pool->zip_hash_mutex);
++
+ ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
+ UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
+
+ block = (buf_block_t*) bpage;
+ mutex_enter(&block->mutex);
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ mutex_exit(&block->mutex);
+
+ ut_ad(buf_pool->buddy_n_frames > 0);
+@@ -220,7 +228,7 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+ const ulint fold = BUF_POOL_ZIP_FOLD(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
+
+@@ -232,7 +240,10 @@
+ ut_ad(!block->page.in_page_hash);
+ ut_ad(!block->page.in_zip_hash);
+ ut_d(block->page.in_zip_hash = TRUE);
++
++ mutex_enter(&buf_pool->zip_hash_mutex);
+ HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
++ mutex_exit(&buf_pool->zip_hash_mutex);
+
+ ut_d(buf_pool->buddy_n_frames++);
+ }
+@@ -268,7 +279,7 @@
+ bpage->state = BUF_BLOCK_ZIP_FREE;
+ #ifndef UNIV_DEBUG_VALGRIND
+ /* Valgrind would complain about accessing free memory. */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(
+ ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+@@ -291,25 +302,29 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+- ibool* lru) /*!< in: pointer to a variable that
++ ibool* lru, /*!< in: pointer to a variable that
+ will be assigned TRUE if storage was
+ allocated from the LRU list and
+ buf_pool->mutex was temporarily
+ released, or NULL if the LRU list
+ should not be used */
++ ibool have_page_hash_mutex)
+ {
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+
+ if (i < BUF_BUDDY_SIZES) {
+ /* Try to allocate from the buddy system. */
++ mutex_enter(&buf_pool->zip_free_mutex);
+ block = buf_buddy_alloc_zip(buf_pool, i);
+
+ if (block) {
+ goto func_exit;
+ }
++ mutex_exit(&buf_pool->zip_free_mutex);
+ }
+
+ /* Try allocating from the buf_pool->free list. */
+@@ -326,19 +341,30 @@
+ }
+
+ /* Try replacing an uncompressed page in the buffer pool. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ if (have_page_hash_mutex) {
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ block = buf_LRU_get_free_block(buf_pool, 0);
+ *lru = TRUE;
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ if (have_page_hash_mutex) {
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
+
+ alloc_big:
+ buf_buddy_block_register(block);
+
++ mutex_enter(&buf_pool->zip_free_mutex);
+ block = buf_buddy_alloc_from(
+ buf_pool, block->frame, i, BUF_BUDDY_SIZES);
+
+ func_exit:
+ buf_pool->buddy_stat[i].used++;
++ mutex_exit(&buf_pool->zip_free_mutex);
++
+ return(block);
+ }
+
+@@ -355,7 +381,10 @@
+ buf_page_t* b;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+@@ -364,7 +393,7 @@
+ case BUF_BLOCK_FILE_PAGE:
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ case BUF_BLOCK_ZIP_DIRTY:
+ /* Cannot relocate dirty pages. */
+ return(FALSE);
+@@ -374,9 +403,18 @@
+ }
+
+ mutex_enter(&buf_pool->zip_mutex);
++ mutex_enter(&buf_pool->zip_free_mutex);
+
+ if (!buf_page_can_relocate(bpage)) {
+ mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(&buf_pool->zip_free_mutex);
++ return(FALSE);
++ }
++
++ if (bpage != buf_page_hash_get(buf_pool,
++ bpage->space, bpage->offset)) {
++ mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(&buf_pool->zip_free_mutex);
+ return(FALSE);
+ }
+
+@@ -384,18 +422,19 @@
+ ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
+
+ /* relocate buf_pool->zip_clean */
+- b = UT_LIST_GET_PREV(list, dpage);
+- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
++ b = UT_LIST_GET_PREV(zip_list, dpage);
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
+
+ if (b) {
+- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
++ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
+ } else {
+- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
++ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
+ }
+
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
+ mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(&buf_pool->zip_free_mutex);
+ return(TRUE);
+ }
+
+@@ -409,14 +448,16 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* src, /*!< in: block to relocate */
+ void* dst, /*!< in: free block to relocate to */
+- ulint i) /*!< in: index of
++ ulint i, /*!< in: index of
+ buf_pool->zip_free[] */
++ ibool have_page_hash_mutex)
+ {
+ buf_page_t* bpage;
+ const ulint size = BUF_BUDDY_LOW << i;
+ ullint usec = ut_time_us(NULL);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(!ut_align_offset(src, size));
+ ut_ad(!ut_align_offset(dst, size));
+@@ -438,6 +479,12 @@
+ /* This is a compressed page. */
+ mutex_t* mutex;
+
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->zip_free_mutex);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
++
+ /* The src block may be split into smaller blocks,
+ some of which may be free. Thus, the
+ mach_read_from_4() calls below may attempt to read
+@@ -462,6 +509,11 @@
+ added to buf_pool->page_hash yet. Obviously,
+ it cannot be relocated. */
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->zip_free_mutex);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ return(FALSE);
+ }
+
+@@ -473,18 +525,27 @@
+ For the sake of simplicity, give up. */
+ ut_ad(page_zip_get_size(&bpage->zip) < size);
+
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->zip_free_mutex);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ return(FALSE);
+ }
+
++ /* To keep latch order */
++ if (have_page_hash_mutex)
++ mutex_exit(&buf_pool->zip_free_mutex);
++
+ /* The block must have been allocated, but it may
+ contain uninitialized data. */
+ UNIV_MEM_ASSERT_W(src, size);
+
+- mutex = buf_page_get_mutex(bpage);
++ mutex = buf_page_get_mutex_enter(bpage);
+
+- mutex_enter(mutex);
++ mutex_enter(&buf_pool->zip_free_mutex);
+
+- if (buf_page_can_relocate(bpage)) {
++ if (mutex && buf_page_can_relocate(bpage)) {
+ /* Relocate the compressed page. */
+ ut_a(bpage->zip.data == src);
+ memcpy(dst, src, size);
+@@ -499,10 +560,22 @@
+ buddy_stat->relocated_usec
+ += ut_time_us(NULL) - usec;
+ }
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ return(TRUE);
+ }
+
+- mutex_exit(mutex);
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
++
++ if (mutex) {
++ mutex_exit(mutex);
++ }
+ } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
+ /* This must be a buf_page_t object. */
+ #if UNIV_WORD_SIZE == 4
+@@ -511,10 +584,31 @@
+ about uninitialized pad bytes. */
+ UNIV_MEM_ASSERT_RW(src, size);
+ #endif
++
++ mutex_exit(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
++
+ if (buf_buddy_relocate_block(src, dst)) {
++ mutex_enter(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+
+ goto success;
+ }
++
++ mutex_enter(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ }
+
+ return(FALSE);
+@@ -529,13 +623,15 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint i) /*!< in: index of buf_pool->zip_free[],
++ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
++ ibool have_page_hash_mutex)
+ {
+ buf_page_t* bpage;
+ buf_page_t* buddy;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ ut_ad(!mutex_own(&buf_pool->zip_mutex));
+ ut_ad(i <= BUF_BUDDY_SIZES);
+ ut_ad(buf_pool->buddy_stat[i].used > 0);
+@@ -546,7 +642,9 @@
+ ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
+
+ if (i == BUF_BUDDY_SIZES) {
+- buf_buddy_block_free(buf_pool, buf);
++ mutex_exit(&buf_pool->zip_free_mutex);
++ buf_buddy_block_free(buf_pool, buf, have_page_hash_mutex);
++ mutex_enter(&buf_pool->zip_free_mutex);
+ return;
+ }
+
+@@ -591,7 +689,7 @@
+ ut_a(bpage != buf);
+
+ {
+- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
++ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
+ UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
+ bpage = next;
+ }
+@@ -600,13 +698,13 @@
+ #ifndef UNIV_DEBUG_VALGRIND
+ buddy_nonfree:
+ /* Valgrind would complain about accessing free memory. */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE)));
+ #endif /* UNIV_DEBUG_VALGRIND */
+
+ /* The buddy is not free. Is there a free block of this size? */
+- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
++ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
+
+ if (bpage) {
+ /* Remove the block from the free list, because a successful
+@@ -616,7 +714,7 @@
+ buf_buddy_remove_from_free(buf_pool, bpage, i);
+
+ /* Try to relocate the buddy of buf to the free block. */
+- if (buf_buddy_relocate(buf_pool, buddy, bpage, i)) {
++ if (buf_buddy_relocate(buf_pool, buddy, bpage, i, have_page_hash_mutex)) {
+
+ ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
+ goto buddy_free2;
+@@ -636,14 +734,14 @@
+
+ (Parts of the buddy can be free in
+ buf_pool->zip_free[j] with j < i.) */
+- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
++ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
+ ut_ad(buf_page_get_state(
+ ut_list_node_313)
+ == BUF_BLOCK_ZIP_FREE
+ && ut_list_node_313 != buddy)));
+ #endif /* !UNIV_DEBUG_VALGRIND */
+
+- if (buf_buddy_relocate(buf_pool, buddy, buf, i)) {
++ if (buf_buddy_relocate(buf_pool, buddy, buf, i, have_page_hash_mutex)) {
+
+ buf = bpage;
+ UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
+diff -ruN a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
+--- a/storage/innobase/buf/buf0buf.c 2010-12-03 15:22:36.314943336 +0900
++++ b/storage/innobase/buf/buf0buf.c 2010-12-03 15:48:29.282947357 +0900
+@@ -263,6 +263,7 @@
+ #ifdef UNIV_PFS_RWLOCK
+ /* Keys to register buffer block related rwlocks and mutexes with
+ performance schema */
++UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
+ UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
+@@ -273,6 +274,10 @@
+ UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
++UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
+ UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
+ #endif /* UNIV_PFS_MUTEX */
+
+@@ -881,9 +886,9 @@
+ block->page.in_zip_hash = FALSE;
+ block->page.in_flush_list = FALSE;
+ block->page.in_free_list = FALSE;
+- block->in_unzip_LRU_list = FALSE;
+ #endif /* UNIV_DEBUG */
+ block->page.in_LRU_list = FALSE;
++ block->in_unzip_LRU_list = FALSE;
+ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
+ block->n_pointers = 0;
+ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
+@@ -981,9 +986,11 @@
+ memset(block->frame, '\0', UNIV_PAGE_SIZE);
+ #endif
+ /* Add the block to the free list */
+- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
++ mutex_enter(&buf_pool->free_list_mutex);
++ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
+
+ ut_d(block->page.in_free_list = TRUE);
++ mutex_exit(&buf_pool->free_list_mutex);
+ ut_ad(buf_pool_from_block(block) == buf_pool);
+
+ block++;
+@@ -1038,7 +1045,8 @@
+ buf_chunk_t* chunk = buf_pool->chunks;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->zip_free_mutex));
+ for (n = buf_pool->n_chunks; n--; chunk++) {
+
+ buf_block_t* block = buf_chunk_contains_zip(chunk, data);
+@@ -1138,7 +1146,7 @@
+ buf_block_t* block;
+ const buf_block_t* block_end;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool)); /* but we need all mutex here */
+
+ block_end = chunk->blocks + chunk->size;
+
+@@ -1150,8 +1158,10 @@
+ ut_ad(!block->in_unzip_LRU_list);
+ ut_ad(!block->page.in_flush_list);
+ /* Remove the block from the free list. */
++ mutex_enter(&buf_pool->free_list_mutex);
+ ut_ad(block->page.in_free_list);
+- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ /* Free the latches. */
+ mutex_free(&block->mutex);
+@@ -1208,9 +1218,21 @@
+ ------------------------------- */
+ mutex_create(buf_pool_mutex_key,
+ &buf_pool->mutex, SYNC_BUF_POOL);
++ mutex_create(buf_pool_LRU_list_mutex_key,
++ &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
++ rw_lock_create(buf_pool_page_hash_key,
++ &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
++ mutex_create(buf_pool_free_list_mutex_key,
++ &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
++ mutex_create(buf_pool_zip_free_mutex_key,
++ &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
++ mutex_create(buf_pool_zip_hash_mutex_key,
++ &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
+ mutex_create(buf_pool_zip_mutex_key,
+ &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_enter(buf_pool);
+
+ if (buf_pool_size > 0) {
+@@ -1223,6 +1245,8 @@
+ mem_free(chunk);
+ mem_free(buf_pool);
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+
+ return(DB_ERROR);
+@@ -1253,6 +1277,8 @@
+
+ /* All fields are initialized by mem_zalloc(). */
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+
+ return(DB_SUCCESS);
+@@ -1467,7 +1493,11 @@
+ ulint fold;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+ ut_a(bpage->buf_fix_count == 0);
+@@ -1554,7 +1584,8 @@
+
+ try_again:
+ btr_search_disable(); /* Empty the adaptive hash index again */
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ shrink_again:
+ if (buf_pool->n_chunks <= 1) {
+@@ -1625,7 +1656,7 @@
+
+ buf_LRU_make_block_old(&block->page);
+ dirty++;
+- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
++ } else if (buf_LRU_free_block(&block->page, TRUE, NULL, TRUE)
+ != BUF_LRU_FREED) {
+ nonfree++;
+ }
+@@ -1633,7 +1664,8 @@
+ mutex_exit(&block->mutex);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ /* Request for a flush of the chunk if it helps.
+ Do not flush if there are non-free blocks, since
+@@ -1683,7 +1715,8 @@
+ func_done:
+ buf_pool->old_pool_size = buf_pool->curr_pool_size;
+ func_exit:
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ btr_search_enable();
+ }
+
+@@ -1724,7 +1757,9 @@
+ hash_table_t* zip_hash;
+ hash_table_t* page_hash;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ /* Free, create, and populate the hash table. */
+ hash_table_free(buf_pool->page_hash);
+@@ -1765,8 +1800,9 @@
+ All such blocks are either in buf_pool->zip_clean or
+ in buf_pool->flush_list. */
+
++ mutex_enter(&buf_pool->zip_mutex);
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ ut_ad(!b->in_flush_list);
+ ut_ad(b->in_LRU_list);
+@@ -1776,10 +1812,11 @@
+ HASH_INSERT(buf_page_t, hash, page_hash,
+ buf_page_address_fold(b->space, b->offset), b);
+ }
++ mutex_exit(&buf_pool->zip_mutex);
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
+ ut_ad(b->in_flush_list);
+ ut_ad(b->in_LRU_list);
+ ut_ad(b->in_page_hash);
+@@ -1806,7 +1843,9 @@
+ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+
+ /********************************************************************
+@@ -1853,21 +1892,32 @@
+ buf_page_t* bpage;
+ ulint i;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
++ mutex_t* block_mutex;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
++ if (bpage) {
++ block_mutex = buf_page_get_mutex_enter(bpage);
++ ut_a(block_mutex);
++ }
+
+ if (UNIV_LIKELY_NULL(bpage)) {
+ if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+ /* The page was loaded meanwhile. */
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ return(bpage);
+ }
+ /* Add to an existing watch. */
+ bpage->buf_fix_count++;
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(block_mutex);
+ return(NULL);
+ }
+
++ /* buf_pool->watch is protected by zip_mutex for now */
++ mutex_enter(&buf_pool->zip_mutex);
+ for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
+ bpage = &buf_pool->watch[i];
+
+@@ -1891,10 +1941,12 @@
+ bpage->space = space;
+ bpage->offset = offset;
+ bpage->buf_fix_count = 1;
+-
++ bpage->buf_pool_index = buf_pool_index(buf_pool);
+ ut_d(bpage->in_page_hash = TRUE);
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
+ fold, bpage);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->zip_mutex);
+ return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_ad(bpage->in_page_hash);
+@@ -1912,6 +1964,8 @@
+ ut_error;
+
+ /* Fix compiler warning */
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->zip_mutex);
+ return(NULL);
+ }
+
+@@ -1941,6 +1995,8 @@
+ buf_chunk_t* chunks;
+ buf_chunk_t* chunk;
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_enter(buf_pool);
+ chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
+
+@@ -1959,6 +2015,8 @@
+ buf_pool->n_chunks++;
+ }
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_pool_mutex_exit(buf_pool);
+ }
+
+@@ -2046,7 +2104,11 @@
+ space, offset) */
+ buf_page_t* watch) /*!< in/out: sentinel for watch */
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
++ ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
+
+ HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
+ ut_d(watch->in_page_hash = FALSE);
+@@ -2068,28 +2130,31 @@
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ulint fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ /* The page must exist because buf_pool_watch_set()
+ increments buf_fix_count. */
+ ut_a(bpage);
+
+ if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
+- mutex_t* mutex = buf_page_get_mutex(bpage);
++ mutex_t* mutex = buf_page_get_mutex_enter(bpage);
+
+- mutex_enter(mutex);
+ ut_a(bpage->buf_fix_count > 0);
+ bpage->buf_fix_count--;
+ mutex_exit(mutex);
+ } else {
++ mutex_enter(&buf_pool->zip_mutex);
+ ut_a(bpage->buf_fix_count > 0);
+
+ if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
+ buf_pool_watch_remove(buf_pool, fold, bpage);
+ }
++ mutex_exit(&buf_pool->zip_mutex);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+
+ /****************************************************************//**
+@@ -2109,14 +2174,16 @@
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ ulint fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ /* The page must exist because buf_pool_watch_set()
+ increments buf_fix_count. */
+ ut_a(bpage);
+ ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(ret);
+ }
+@@ -2133,13 +2200,15 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ ut_a(buf_page_in_file(bpage));
+
+ buf_LRU_make_block_young(bpage);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /********************************************************************//**
+@@ -2163,14 +2232,20 @@
+ ut_a(buf_page_in_file(bpage));
+
+ if (buf_page_peek_if_too_old(bpage)) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ buf_LRU_make_block_young(bpage);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ } else if (!access_time) {
+ ulint time_ms = ut_time_ms();
+- buf_pool_mutex_enter(buf_pool);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
++ //buf_pool_mutex_enter(buf_pool);
++ if (block_mutex) {
+ buf_page_set_accessed(bpage, time_ms);
+- buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
++ }
++ //buf_pool_mutex_exit(buf_pool);
+ }
+ }
+
+@@ -2187,7 +2262,8 @@
+ buf_block_t* block;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2196,7 +2272,8 @@
+ block->check_index_page_at_flush = FALSE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+
+ /********************************************************************//**
+@@ -2215,7 +2292,8 @@
+ ibool is_hashed;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2226,7 +2304,8 @@
+ is_hashed = block->is_hashed;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(is_hashed);
+ }
+@@ -2248,7 +2327,8 @@
+ buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2257,7 +2337,8 @@
+ bpage->file_page_was_freed = TRUE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(bpage);
+ }
+@@ -2278,7 +2359,8 @@
+ buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+@@ -2287,7 +2369,8 @@
+ bpage->file_page_was_freed = FALSE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(bpage);
+ }
+@@ -2322,8 +2405,9 @@
+ buf_pool->stat.n_page_gets++;
+
+ for (;;) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+ lookup:
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+ if (bpage) {
+ ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+@@ -2332,7 +2416,8 @@
+
+ /* Page not in buf_pool: needs to be read from file */
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ buf_read_page(space, zip_size, offset);
+
+@@ -2344,10 +2429,15 @@
+ if (UNIV_UNLIKELY(!bpage->zip.data)) {
+ /* There is no compressed page. */
+ err_exit:
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ return(NULL);
+ }
+
++ block_mutex = buf_page_get_mutex_enter(bpage);
++
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+ ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
+
+ switch (buf_page_get_state(bpage)) {
+@@ -2356,19 +2446,19 @@
+ case BUF_BLOCK_MEMORY:
+ case BUF_BLOCK_REMOVE_HASH:
+ case BUF_BLOCK_ZIP_FREE:
++ if (block_mutex)
++ mutex_exit(block_mutex);
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+- block_mutex = &buf_pool->zip_mutex;
+- mutex_enter(block_mutex);
++ ut_a(block_mutex == &buf_pool->zip_mutex);
+ bpage->buf_fix_count++;
+ goto got_block;
+ case BUF_BLOCK_FILE_PAGE:
+- block_mutex = &((buf_block_t*) bpage)->mutex;
+- mutex_enter(block_mutex);
++ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
+
+ /* Discard the uncompressed page frame if possible. */
+- if (buf_LRU_free_block(bpage, FALSE, NULL)
++ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
+ == BUF_LRU_FREED) {
+
+ mutex_exit(block_mutex);
+@@ -2387,7 +2477,7 @@
+ must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
+ access_time = buf_page_is_accessed(bpage);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ mutex_exit(block_mutex);
+
+@@ -2696,7 +2786,7 @@
+ const buf_block_t* block) /*!< in: pointer to block,
+ not dereferenced */
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
+ /* The pointer should be aligned. */
+@@ -2732,6 +2822,7 @@
+ ulint fix_type;
+ ibool must_read;
+ ulint retries = 0;
++ mutex_t* block_mutex = NULL;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+ ut_ad(mtr);
+@@ -2753,9 +2844,11 @@
+ fold = buf_page_address_fold(space, offset);
+ loop:
+ block = guess;
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ if (block) {
++ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++
+ /* If the guess is a compressed page descriptor that
+ has been allocated by buf_buddy_alloc(), it may have
+ been invalidated by buf_buddy_relocate(). In that
+@@ -2764,11 +2857,15 @@
+ the guess may be pointing to a buffer pool chunk that
+ has been released when resizing the buffer pool. */
+
+- if (!buf_block_is_uncompressed(buf_pool, block)
++ if (!block_mutex) {
++ block = guess = NULL;
++ } else if (!buf_block_is_uncompressed(buf_pool, block)
+ || offset != block->page.offset
+ || space != block->page.space
+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+
++ mutex_exit(block_mutex);
++
+ block = guess = NULL;
+ } else {
+ ut_ad(!block->page.in_zip_hash);
+@@ -2777,12 +2874,19 @@
+ }
+
+ if (block == NULL) {
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ block = (buf_block_t*) buf_page_hash_get_low(
+ buf_pool, space, offset, fold);
++ if (block) {
++ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++ ut_a(block_mutex);
++ }
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+
+ loop2:
+ if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
++ mutex_exit(block_mutex);
+ block = NULL;
+ }
+
+@@ -2794,12 +2898,14 @@
+ space, offset, fold);
+
+ if (UNIV_LIKELY_NULL(block)) {
+-
++ block_mutex = buf_page_get_mutex((buf_page_t*)block);
++ ut_a(block_mutex);
++ ut_ad(mutex_own(block_mutex));
+ goto got_block;
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ if (mode == BUF_GET_IF_IN_POOL
+ || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+@@ -2847,7 +2953,8 @@
+ /* The page is being read to buffer pool,
+ but we cannot wait around for the read to
+ complete. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+
+ return(NULL);
+ }
+@@ -2857,38 +2964,49 @@
+ ibool success;
+
+ case BUF_BLOCK_FILE_PAGE:
++ if (block_mutex == &buf_pool->zip_mutex) {
++ /* it is wrong mutex... */
++ mutex_exit(block_mutex);
++ goto loop;
++ }
+ break;
+
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
++ ut_ad(block_mutex == &buf_pool->zip_mutex);
+ bpage = &block->page;
+ /* Protect bpage->buf_fix_count. */
+- mutex_enter(&buf_pool->zip_mutex);
++ //mutex_enter(&buf_pool->zip_mutex);
+
+ if (bpage->buf_fix_count
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+ /* This condition often occurs when the buffer
+ is not buffer-fixed, but I/O-fixed by
+ buf_page_init_for_read(). */
+- mutex_exit(&buf_pool->zip_mutex);
++ //mutex_exit(&buf_pool->zip_mutex);
+ wait_until_unfixed:
+ /* The block is buffer-fixed or I/O-fixed.
+ Try again later. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+ os_thread_sleep(WAIT_FOR_READ);
+
+ goto loop;
+ }
+
+ /* Allocate an uncompressed page. */
+- buf_pool_mutex_exit(buf_pool);
+- mutex_exit(&buf_pool->zip_mutex);
++ //buf_pool_mutex_exit(buf_pool);
++ //mutex_exit(&buf_pool->zip_mutex);
++ mutex_exit(block_mutex);
+
+ block = buf_LRU_get_free_block(buf_pool, 0);
+ ut_a(block);
++ block_mutex = &block->mutex;
+
+- buf_pool_mutex_enter(buf_pool);
+- mutex_enter(&block->mutex);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ mutex_enter(block_mutex);
+
+ {
+ buf_page_t* hash_bpage;
+@@ -2901,35 +3019,47 @@
+ while buf_pool->mutex was released.
+ Free the block that was allocated. */
+
+- buf_LRU_block_free_non_file_page(block);
+- mutex_exit(&block->mutex);
++ buf_LRU_block_free_non_file_page(block, TRUE);
++ mutex_exit(block_mutex);
+
+ block = (buf_block_t*) hash_bpage;
++ if (block) {
++ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
++ ut_a(block_mutex);
++ }
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ goto loop2;
+ }
+ }
+
++ mutex_enter(&buf_pool->zip_mutex);
++
+ if (UNIV_UNLIKELY
+ (bpage->buf_fix_count
+ || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
+
++ mutex_exit(&buf_pool->zip_mutex);
+ /* The block was buffer-fixed or I/O-fixed
+ while buf_pool->mutex was not held by this thread.
+ Free the block that was allocated and try again.
+ This should be extremely unlikely. */
+
+- buf_LRU_block_free_non_file_page(block);
+- mutex_exit(&block->mutex);
++ buf_LRU_block_free_non_file_page(block, TRUE);
++ //mutex_exit(&block->mutex);
+
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ goto wait_until_unfixed;
+ }
+
+ /* Move the compressed page from bpage to block,
+ and uncompress it. */
+
+- mutex_enter(&buf_pool->zip_mutex);
+-
+ buf_relocate(bpage, &block->page);
++
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+ buf_block_init_low(block);
+ block->lock_hash_val = lock_rec_hash(space, offset);
+
+@@ -2938,7 +3068,7 @@
+
+ if (buf_page_get_state(&block->page)
+ == BUF_BLOCK_ZIP_PAGE) {
+- UT_LIST_REMOVE(list, buf_pool->zip_clean,
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
+ &block->page);
+ ut_ad(!block->page.in_flush_list);
+ } else {
+@@ -2955,19 +3085,24 @@
+ /* Insert at the front of unzip_LRU list */
+ buf_unzip_LRU_add_block(block, FALSE);
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++
+ block->page.buf_fix_count = 1;
+ buf_block_set_io_fix(block, BUF_IO_READ);
+ rw_lock_x_lock_func(&block->lock, 0, file, line);
+
+ UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
+- mutex_exit(&block->mutex);
++ mutex_exit(block_mutex);
+ mutex_exit(&buf_pool->zip_mutex);
++
++ buf_pool_mutex_enter(buf_pool);
+ buf_pool->n_pend_unzip++;
++ buf_pool_mutex_exit(buf_pool);
+
+- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
++ buf_buddy_free(buf_pool, bpage, sizeof *bpage, FALSE);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ /* Decompress the page and apply buffered operations
+ while not holding buf_pool->mutex or block->mutex. */
+@@ -2980,12 +3115,15 @@
+ }
+
+ /* Unfix and unlatch the block. */
+- buf_pool_mutex_enter(buf_pool);
+- mutex_enter(&block->mutex);
++ //buf_pool_mutex_enter(buf_pool);
++ block_mutex = &block->mutex;
++ mutex_enter(block_mutex);
+ block->page.buf_fix_count--;
+ buf_block_set_io_fix(block, BUF_IO_NONE);
+- mutex_exit(&block->mutex);
++
++ buf_pool_mutex_enter(buf_pool);
+ buf_pool->n_pend_unzip--;
++ buf_pool_mutex_exit(buf_pool);
+ rw_lock_x_unlock(&block->lock);
+
+ break;
+@@ -3001,7 +3139,7 @@
+
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+
+- mutex_enter(&block->mutex);
++ //mutex_enter(&block->mutex);
+ #if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in buf_page_t. On
+ other systems, Valgrind could complain about uninitialized pad
+@@ -3014,7 +3152,7 @@
+ /* Try to evict the block from the buffer pool, to use the
+ insert buffer (change buffer) as much as possible. */
+
+- if (buf_LRU_free_block(&block->page, TRUE, NULL)
++ if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
+ == BUF_LRU_FREED) {
+ mutex_exit(&block->mutex);
+ if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
+@@ -3051,13 +3189,14 @@
+
+ buf_block_buf_fix_inc(block, file, line);
+
+- mutex_exit(&block->mutex);
++ //mutex_exit(&block->mutex);
+
+ /* Check if this is the first access to the page */
+
+ access_time = buf_page_is_accessed(&block->page);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+
+ buf_page_set_accessed_make_young(&block->page, access_time);
+
+@@ -3290,9 +3429,11 @@
+ buf_pool = buf_pool_from_block(block);
+
+ if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ buf_LRU_make_block_young(&block->page);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ } else if (!buf_page_is_accessed(&block->page)) {
+ /* Above, we do a dirty read on purpose, to avoid
+ mutex contention. The field buf_page_t::access_time
+@@ -3300,9 +3441,11 @@
+ field must be protected by mutex, however. */
+ ulint time_ms = ut_time_ms();
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&block->mutex);
+ buf_page_set_accessed(&block->page, time_ms);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&block->mutex);
+ }
+
+ ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
+@@ -3369,18 +3512,21 @@
+ ut_ad(mtr);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ block = buf_block_hash_get(buf_pool, space_id, page_no);
+
+ if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ return(NULL);
+ }
+
+ ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
+
+ mutex_enter(&block->mutex);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+@@ -3469,7 +3615,10 @@
+ buf_page_t* hash_page;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(&(block->mutex)));
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+@@ -3498,11 +3647,14 @@
+ if (UNIV_LIKELY(!hash_page)) {
+ } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
+ /* Preserve the reference count. */
+- ulint buf_fix_count = hash_page->buf_fix_count;
++ ulint buf_fix_count;
+
++ mutex_enter(&buf_pool->zip_mutex);
++ buf_fix_count = hash_page->buf_fix_count;
+ ut_a(buf_fix_count > 0);
+ block->page.buf_fix_count += buf_fix_count;
+ buf_pool_watch_remove(buf_pool, fold, hash_page);
++ mutex_exit(&buf_pool->zip_mutex);
+ } else {
+ fprintf(stderr,
+ "InnoDB: Error: page %lu %lu already found"
+@@ -3512,7 +3664,8 @@
+ (const void*) hash_page, (const void*) block);
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(&block->mutex);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+@@ -3596,7 +3749,9 @@
+
+ fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
+ if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
+@@ -3605,9 +3760,15 @@
+ err_exit:
+ if (block) {
+ mutex_enter(&block->mutex);
+- buf_LRU_block_free_non_file_page(block);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ buf_LRU_block_free_non_file_page(block, FALSE);
+ mutex_exit(&block->mutex);
+ }
++ else {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+
+ bpage = NULL;
+ goto func_exit;
+@@ -3630,6 +3791,8 @@
+
+ buf_page_init(space, offset, fold, block);
+
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+
+@@ -3657,7 +3820,7 @@
+ been added to buf_pool->LRU and
+ buf_pool->page_hash. */
+ mutex_exit(&block->mutex);
+- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+@@ -3670,6 +3833,7 @@
+ buf_unzip_LRU_add_block(block, TRUE);
+ }
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ mutex_exit(&block->mutex);
+ } else {
+ /* Defer buf_buddy_alloc() until after the block has
+@@ -3681,8 +3845,8 @@
+ control block (bpage), in order to avoid the
+ invocation of buf_buddy_relocate_block() on
+ uninitialized data. */
+- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
+- bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru);
++ data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
++ bpage = buf_buddy_alloc(buf_pool, sizeof *bpage, &lru, TRUE);
+
+ /* Initialize the buf_pool pointer. */
+ bpage->buf_pool_index = buf_pool_index(buf_pool);
+@@ -3701,8 +3865,11 @@
+
+ /* The block was added by some other thread. */
+ watch_page = NULL;
+- buf_buddy_free(buf_pool, bpage, sizeof *bpage);
+- buf_buddy_free(buf_pool, data, zip_size);
++ buf_buddy_free(buf_pool, bpage, sizeof *bpage, TRUE);
++ buf_buddy_free(buf_pool, data, zip_size, TRUE);
++
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ bpage = NULL;
+ goto func_exit;
+@@ -3746,18 +3913,24 @@
+ HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
+ bpage);
+
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++
+ /* The block must be put to the LRU list, to the old blocks */
+ buf_LRU_add_block(bpage, TRUE/* to old blocks */);
+ buf_LRU_insert_zip_clean(bpage);
+
++ mutex_exit(&buf_pool->LRU_list_mutex);
++
+ buf_page_set_io_fix(bpage, BUF_IO_READ);
+
+ mutex_exit(&buf_pool->zip_mutex);
+ }
+
++ buf_pool_mutex_enter(buf_pool);
+ buf_pool->n_pend_reads++;
+-func_exit:
+ buf_pool_mutex_exit(buf_pool);
++func_exit:
++ //buf_pool_mutex_exit(buf_pool);
+
+ if (mode == BUF_READ_IBUF_PAGES_ONLY) {
+
+@@ -3799,7 +3972,9 @@
+
+ fold = buf_page_address_fold(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ block = (buf_block_t*) buf_page_hash_get_low(
+ buf_pool, space, offset, fold);
+@@ -3815,7 +3990,9 @@
+ #endif /* UNIV_DEBUG_FILE_ACCESSES */
+
+ /* Page can be found in buf_pool */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ buf_block_free(free_block);
+
+@@ -3837,6 +4014,7 @@
+ mutex_enter(&block->mutex);
+
+ buf_page_init(space, offset, fold, block);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ /* The block must be put to the LRU list */
+ buf_LRU_add_block(&block->page, FALSE);
+@@ -3863,7 +4041,7 @@
+ the reacquisition of buf_pool->mutex. We also must
+ defer this operation until after the block descriptor
+ has been added to buf_pool->LRU and buf_pool->page_hash. */
+- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
++ data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
+ mutex_enter(&block->mutex);
+ block->page.zip.data = data;
+
+@@ -3881,7 +4059,8 @@
+
+ buf_page_set_accessed(&block->page, time_ms);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+
+@@ -3932,6 +4111,8 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ const ibool uncompressed = (buf_page_get_state(bpage)
+ == BUF_BLOCK_FILE_PAGE);
++ ibool have_LRU_mutex = FALSE;
++ mutex_t* block_mutex;
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -4065,8 +4246,26 @@
+ }
+ }
+
++ if (io_type == BUF_IO_WRITE
++ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
++ /* to keep consistency at buf_LRU_insert_zip_clean() */
++ have_LRU_mutex = TRUE; /* optimistic */
++ }
++retry_mutex:
++ if (have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ block_mutex = buf_page_get_mutex_enter(bpage);
++ ut_a(block_mutex);
++ if (io_type == BUF_IO_WRITE
++ && (buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
++ || buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
++ && !have_LRU_mutex) {
++ mutex_exit(block_mutex);
++ have_LRU_mutex = TRUE;
++ goto retry_mutex;
++ }
+ buf_pool_mutex_enter(buf_pool);
+- mutex_enter(buf_page_get_mutex(bpage));
+
+ #ifdef UNIV_IBUF_COUNT_DEBUG
+ if (io_type == BUF_IO_WRITE || uncompressed) {
+@@ -4089,6 +4288,7 @@
+ the x-latch to this OS thread: do not let this confuse you in
+ debugging! */
+
++ ut_a(!have_LRU_mutex);
+ ut_ad(buf_pool->n_pend_reads > 0);
+ buf_pool->n_pend_reads--;
+ buf_pool->stat.n_pages_read++;
+@@ -4106,6 +4306,9 @@
+
+ buf_flush_write_complete(bpage);
+
++ if (have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
++
+ if (uncompressed) {
+ rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
+ BUF_IO_WRITE);
+@@ -4128,8 +4331,8 @@
+ }
+ #endif /* UNIV_DEBUG */
+
+- mutex_exit(buf_page_get_mutex(bpage));
+ buf_pool_mutex_exit(buf_pool);
++ mutex_exit(block_mutex);
+ }
+
+ /*********************************************************************//**
+@@ -4146,7 +4349,9 @@
+
+ ut_ad(buf_pool);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ chunk = buf_pool->chunks;
+
+@@ -4163,7 +4368,9 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ return(TRUE);
+ }
+@@ -4211,7 +4418,8 @@
+ freed = buf_LRU_search_and_free_block(buf_pool, 100);
+ }
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
+ ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
+@@ -4224,7 +4432,8 @@
+ memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
+ buf_refresh_io_stats(buf_pool);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /*********************************************************************//**
+@@ -4266,7 +4475,10 @@
+
+ ut_ad(buf_pool);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ /* for keep the new latch order, it cannot validate correctly... */
+
+ chunk = buf_pool->chunks;
+
+@@ -4361,7 +4573,7 @@
+ /* Check clean compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ switch (buf_page_get_io_fix(b)) {
+ case BUF_IO_NONE:
+@@ -4392,7 +4604,7 @@
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
+ ut_ad(b->in_flush_list);
+ ut_a(b->oldest_modification);
+ n_flush++;
+@@ -4451,6 +4663,8 @@
+ }
+
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
++ /* because of latching order with block->mutex, we cannot get needed mutexes before that */
++/*
+ if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
+ fprintf(stderr, "Free list len %lu, free blocks %lu\n",
+ (ulong) UT_LIST_GET_LEN(buf_pool->free),
+@@ -4461,8 +4675,11 @@
+ ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
+ ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
++*/
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ ut_a(buf_LRU_validate());
+ ut_a(buf_flush_validate(buf_pool));
+@@ -4518,7 +4735,9 @@
+ index_ids = mem_alloc(size * sizeof *index_ids);
+ counts = mem_alloc(sizeof(ulint) * size);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
+ buf_flush_list_mutex_enter(buf_pool);
+
+ fprintf(stderr,
+@@ -4587,7 +4806,9 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ for (i = 0; i < n_found; i++) {
+ index = dict_index_get_if_in_cache(index_ids[i]);
+@@ -4644,7 +4865,7 @@
+ buf_chunk_t* chunk;
+ ulint fixed_pages_number = 0;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ chunk = buf_pool->chunks;
+
+@@ -4678,7 +4899,7 @@
+ /* Traverse the lists of clean and dirty compressed-only blocks. */
+
+ for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(zip_list, b)) {
+ ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
+ ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
+
+@@ -4690,7 +4911,7 @@
+
+ buf_flush_list_mutex_enter(buf_pool);
+ for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
+- b = UT_LIST_GET_NEXT(list, b)) {
++ b = UT_LIST_GET_NEXT(flush_list, b)) {
+ ut_ad(b->in_flush_list);
+
+ switch (buf_page_get_state(b)) {
+@@ -4716,7 +4937,7 @@
+
+ buf_flush_list_mutex_exit(buf_pool);
+ mutex_exit(&buf_pool->zip_mutex);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ return(fixed_pages_number);
+ }
+@@ -4810,6 +5031,8 @@
+
+ ut_ad(buf_pool);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
+ buf_pool_mutex_enter(buf_pool);
+ buf_flush_list_mutex_enter(buf_pool);
+
+@@ -4913,6 +5136,8 @@
+ buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
+
+ buf_refresh_io_stats(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_exit(&buf_pool->free_list_mutex);
+ buf_pool_mutex_exit(buf_pool);
+ }
+
+@@ -5032,11 +5257,13 @@
+ {
+ ulint len;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->free_list_mutex);
+
+ len = UT_LIST_GET_LEN(buf_pool->free);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ return(len);
+ }
+diff -ruN a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c
+--- a/storage/innobase/buf/buf0flu.c 2010-12-03 15:22:36.318955693 +0900
++++ b/storage/innobase/buf/buf0flu.c 2010-12-03 15:48:29.289024083 +0900
+@@ -279,7 +279,7 @@
+
+ ut_d(block->page.in_flush_list = TRUE);
+ block->page.oldest_modification = lsn;
+- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+
+ #ifdef UNIV_DEBUG_VALGRIND
+ {
+@@ -373,14 +373,14 @@
+ > block->page.oldest_modification) {
+ ut_ad(b->in_flush_list);
+ prev_b = b;
+- b = UT_LIST_GET_NEXT(list, b);
++ b = UT_LIST_GET_NEXT(flush_list, b);
+ }
+ }
+
+ if (prev_b == NULL) {
+- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
++ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
+ } else {
+- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
++ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
+ prev_b, &block->page);
+ }
+
+@@ -406,7 +406,7 @@
+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
++ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ //ut_ad(bpage->in_LRU_list);
+
+ if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
+@@ -442,14 +442,14 @@
+ enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+- ut_a(buf_page_in_file(bpage));
++ //ut_a(buf_page_in_file(bpage));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
+
+- if (bpage->oldest_modification != 0
++ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+ ut_ad(bpage->in_flush_list);
+
+@@ -480,7 +480,7 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(bpage->in_flush_list);
+
+@@ -498,11 +498,11 @@
+ return;
+ case BUF_BLOCK_ZIP_DIRTY:
+ buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
+- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+ buf_LRU_insert_zip_clean(bpage);
+ break;
+ case BUF_BLOCK_FILE_PAGE:
+- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+ break;
+ }
+
+@@ -546,7 +546,7 @@
+ buf_page_t* prev_b = NULL;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ /* Must reside in the same buffer pool. */
+ ut_ad(buf_pool == buf_pool_from_bpage(dpage));
+
+@@ -575,18 +575,18 @@
+ because we assert on in_flush_list in comparison function. */
+ ut_d(bpage->in_flush_list = FALSE);
+
+- prev = UT_LIST_GET_PREV(list, bpage);
+- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
++ prev = UT_LIST_GET_PREV(flush_list, bpage);
++ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
+
+ if (prev) {
+ ut_ad(prev->in_flush_list);
+ UT_LIST_INSERT_AFTER(
+- list,
++ flush_list,
+ buf_pool->flush_list,
+ prev, dpage);
+ } else {
+ UT_LIST_ADD_FIRST(
+- list,
++ flush_list,
+ buf_pool->flush_list,
+ dpage);
+ }
+@@ -1055,7 +1055,7 @@
+
+ #ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(!buf_pool_mutex_own(buf_pool));
++ //ut_ad(!buf_pool_mutex_own(buf_pool));
+ #endif
+
+ #ifdef UNIV_LOG_DEBUG
+@@ -1069,7 +1069,8 @@
+ io_fixed and oldest_modification != 0. Thus, it cannot be
+ relocated in the buffer pool or removed from flush_list or
+ LRU_list. */
+- ut_ad(!buf_pool_mutex_own(buf_pool));
++ //ut_ad(!buf_pool_mutex_own(buf_pool));
++ ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
+ ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
+@@ -1232,12 +1233,18 @@
+ ibool is_uncompressed;
+
+ ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+ ut_ad(buf_page_in_file(bpage));
+
+ block_mutex = buf_page_get_mutex(bpage);
+ ut_ad(mutex_own(block_mutex));
+
++ buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
++
+ ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
+
+ buf_page_set_io_fix(bpage, BUF_IO_WRITE);
+@@ -1399,14 +1406,16 @@
+
+ buf_pool = buf_pool_get(space, i);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ /* We only want to flush pages from this buffer pool. */
+ bpage = buf_page_hash_get(buf_pool, space, i);
+
+ if (!bpage) {
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ continue;
+ }
+
+@@ -1418,11 +1427,9 @@
+ if (flush_type != BUF_FLUSH_LRU
+ || i == offset
+ || buf_page_is_old(bpage)) {
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+-
+- mutex_enter(block_mutex);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+
+- if (buf_flush_ready_for_flush(bpage, flush_type)
++ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
+ && (i == offset || !bpage->buf_fix_count)) {
+ /* We only try to flush those
+ neighbors != offset where the buf fix
+@@ -1438,11 +1445,12 @@
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ count++;
+ continue;
+- } else {
++ } else if (block_mutex) {
+ mutex_exit(block_mutex);
+ }
+ }
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ }
+
+ return(count);
+@@ -1475,21 +1483,25 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(flush_type != BUF_FLUSH_LRU
++ || mutex_own(&buf_pool->LRU_list_mutex));
+
+- block_mutex = buf_page_get_mutex(bpage);
+- mutex_enter(block_mutex);
++ block_mutex = buf_page_get_mutex_enter(bpage);
+
+- ut_a(buf_page_in_file(bpage));
++ //ut_a(buf_page_in_file(bpage));
+
+- if (buf_flush_ready_for_flush(bpage, flush_type)) {
++ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)) {
+ ulint space;
+ ulint offset;
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_bpage(bpage);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ }
+
+ /* These fields are protected by both the
+ buffer pool mutex and block mutex. */
+@@ -1505,13 +1517,18 @@
+ *count,
+ n_to_flush);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ if (flush_type == BUF_FLUSH_LRU) {
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ }
+ flushed = TRUE;
+- } else {
++ } else if (block_mutex) {
+ mutex_exit(block_mutex);
+ }
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(flush_type != BUF_FLUSH_LRU
++ || mutex_own(&buf_pool->LRU_list_mutex));
+
+ return(flushed);
+ }
+@@ -1532,7 +1549,8 @@
+ buf_page_t* bpage;
+ ulint count = 0;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ do {
+ /* Start from the end of the list looking for a
+@@ -1554,7 +1572,8 @@
+ should be flushed, we factor in this value. */
+ buf_lru_flush_page_count += count;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ return(count);
+ }
+@@ -1582,9 +1601,10 @@
+ {
+ ulint len;
+ buf_page_t* bpage;
++ buf_page_t* prev_bpage = NULL;
+ ulint count = 0;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ /* If we have flushed enough, leave the loop */
+ do {
+@@ -1603,6 +1623,7 @@
+
+ if (bpage) {
+ ut_a(bpage->oldest_modification > 0);
++ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
+ }
+
+ if (!bpage || bpage->oldest_modification >= lsn_limit) {
+@@ -1644,9 +1665,17 @@
+ break;
+ }
+
+- bpage = UT_LIST_GET_PREV(list, bpage);
++ bpage = UT_LIST_GET_PREV(flush_list, bpage);
+
+- ut_ad(!bpage || bpage->in_flush_list);
++ //ut_ad(!bpage || bpage->in_flush_list);
++ if (bpage != prev_bpage) {
++ /* the search might warp.. retrying */
++ buf_flush_list_mutex_exit(buf_pool);
++ break;
++ }
++ if (bpage) {
++ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
++ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+
+@@ -1655,7 +1684,7 @@
+
+ } while (count < min_n && bpage != NULL && len > 0);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ return(count);
+ }
+@@ -1694,13 +1723,15 @@
+ || sync_thread_levels_empty_gen(TRUE));
+ #endif /* UNIV_SYNC_DEBUG */
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ /* Note: The buffer pool mutex is released and reacquired within
+ the flush functions. */
+ switch(flush_type) {
+ case BUF_FLUSH_LRU:
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ count = buf_flush_LRU_list_batch(buf_pool, min_n);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ break;
+ case BUF_FLUSH_LIST:
+ count = buf_flush_flush_list_batch(buf_pool, min_n, lsn_limit);
+@@ -1709,7 +1740,7 @@
+ ut_error;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ buf_flush_buffered_writes();
+
+@@ -1965,7 +1996,7 @@
+ retry:
+ //buf_pool_mutex_enter(buf_pool);
+ if (have_LRU_mutex)
+- buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
+
+@@ -1982,15 +2013,15 @@
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ continue;
+ }
+- block_mutex = buf_page_get_mutex(bpage);
+-
+- mutex_enter(block_mutex);
++ block_mutex = buf_page_get_mutex_enter(bpage);
+
+- if (buf_flush_ready_for_replace(bpage)) {
++ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
+ n_replaceable++;
+ }
+
+- mutex_exit(block_mutex);
++ if (block_mutex) {
++ mutex_exit(block_mutex);
++ }
+
+ distance++;
+
+@@ -1999,7 +2030,7 @@
+
+ //buf_pool_mutex_exit(buf_pool);
+ if (have_LRU_mutex)
+- buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN(buf_pool)) {
+
+@@ -2198,7 +2229,7 @@
+
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+
+- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
++ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
+ ut_ad(ut_list_node_313->in_flush_list));
+
+ bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+@@ -2238,7 +2269,7 @@
+ rnode = rbt_next(buf_pool->flush_rbt, rnode);
+ }
+
+- bpage = UT_LIST_GET_NEXT(list, bpage);
++ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+
+ ut_a(!bpage || om >= bpage->oldest_modification);
+ }
+diff -ruN a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
+--- a/storage/innobase/buf/buf0lru.c 2010-12-03 15:22:36.321987250 +0900
++++ b/storage/innobase/buf/buf0lru.c 2010-12-03 15:48:29.293023197 +0900
+@@ -143,8 +143,9 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+- buf_block_t* block); /*!< in: block, must contain a file page and
++ buf_block_t* block, /*!< in: block, must contain a file page and
+ be in a state where it can be freed */
++ ibool have_page_hash_mutex);
+
+ /******************************************************************//**
+ Determines if the unzip_LRU list should be used for evicting a victim
+@@ -154,15 +155,20 @@
+ ibool
+ buf_LRU_evict_from_unzip_LRU(
+ /*=========================*/
+- buf_pool_t* buf_pool)
++ buf_pool_t* buf_pool,
++ ibool have_LRU_mutex)
+ {
+ ulint io_avg;
+ ulint unzip_avg;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
++ if (!have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ /* If the unzip_LRU list is empty, we can only use the LRU. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ return(FALSE);
+ }
+
+@@ -171,14 +177,20 @@
+ decompressed pages in the buffer pool. */
+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
+ <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ return(FALSE);
+ }
+
+ /* If eviction hasn't started yet, we assume by default
+ that a workload is disk bound. */
+ if (buf_pool->freed_page_clock == 0) {
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ return(TRUE);
+ }
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ /* Calculate the average over past intervals, and add the values
+ of the current interval. */
+@@ -246,19 +258,23 @@
+ page_arr = ut_malloc(
+ sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ scan_again:
+ num_entries = 0;
+ bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+
+ while (bpage != NULL) {
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+ buf_page_t* prev_bpage;
+
+- mutex_enter(block_mutex);
+ prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
+
++ if (!block_mutex) {
++ goto next_page;
++ }
++
+ ut_a(buf_page_in_file(bpage));
+
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
+@@ -287,14 +303,16 @@
+
+ /* Array full. We release the buf_pool->mutex to
+ obey the latching order. */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ buf_LRU_drop_page_hash_batch(
+ id, zip_size, page_arr, num_entries);
+
+ num_entries = 0;
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ } else {
+ mutex_exit(block_mutex);
+ }
+@@ -319,7 +337,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ /* Drop any remaining batch of search hashed pages. */
+ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
+@@ -341,7 +360,9 @@
+ ibool all_freed;
+
+ scan_again:
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
+
+ all_freed = TRUE;
+
+@@ -369,8 +390,16 @@
+
+ all_freed = FALSE;
+ } else {
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+- mutex_enter(block_mutex);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
++
++ if (!block_mutex) {
++ /* It may be impossible case...
++ Something wrong, so will be scan_again */
++
++ all_freed = FALSE;
++
++ goto next_page_no_mutex;
++ }
+
+ if (bpage->buf_fix_count > 0) {
+
+@@ -429,7 +458,9 @@
+ ulint page_no;
+ ulint zip_size;
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ zip_size = buf_page_get_zip_size(bpage);
+ page_no = buf_page_get_page_no(bpage);
+@@ -454,7 +485,7 @@
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+ buf_LRU_block_free_hashed_page((buf_block_t*)
+- bpage);
++ bpage, TRUE);
+ } else {
+ /* The block_mutex should have been
+ released by buf_LRU_block_remove_hashed_page()
+@@ -486,7 +517,9 @@
+ bpage = prev_bpage;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+
+ if (!all_freed) {
+ os_thread_sleep(20000);
+@@ -532,7 +565,9 @@
+ buf_page_t* b;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++ ut_ad(mutex_own(&buf_pool->flush_list_mutex));
+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
+
+ /* Find the first successor of bpage in the LRU list
+@@ -540,17 +575,17 @@
+ b = bpage;
+ do {
+ b = UT_LIST_GET_NEXT(LRU, b);
+- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
++ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
+
+ /* Insert bpage before b, i.e., after the predecessor of b. */
+ if (b) {
+- b = UT_LIST_GET_PREV(list, b);
++ b = UT_LIST_GET_PREV(zip_list, b);
+ }
+
+ if (b) {
+- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
++ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
+ } else {
+- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
++ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
+ }
+ }
+
+@@ -563,18 +598,19 @@
+ buf_LRU_free_from_unzip_LRU_list(
+ /*=============================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+- ulint n_iterations) /*!< in: how many times this has
++ ulint n_iterations, /*!< in: how many times this has
+ been called repeatedly without
+ result: a high value means that
+ we should search farther; we will
+ search n_iterations / 5 of the
+ unzip_LRU list, or nothing if
+ n_iterations >= 5 */
++ ibool have_LRU_mutex)
+ {
+ buf_block_t* block;
+ ulint distance;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ /* Theoratically it should be much easier to find a victim
+ from unzip_LRU as we can choose even a dirty block (as we'll
+@@ -584,7 +620,7 @@
+ if we have done five iterations so far. */
+
+ if (UNIV_UNLIKELY(n_iterations >= 5)
+- || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
++ || !buf_LRU_evict_from_unzip_LRU(buf_pool, have_LRU_mutex)) {
+
+ return(FALSE);
+ }
+@@ -592,18 +628,25 @@
+ distance = 100 + (n_iterations
+ * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
+
++restart:
+ for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
+ UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
+ block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
+
+ enum buf_lru_free_block_status freed;
+
++ mutex_enter(&block->mutex);
++ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
++ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
++ mutex_exit(&block->mutex);
++ goto restart;
++ }
++
+ ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+ ut_ad(block->in_unzip_LRU_list);
+ ut_ad(block->page.in_LRU_list);
+
+- mutex_enter(&block->mutex);
+- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
++ freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
+ mutex_exit(&block->mutex);
+
+ switch (freed) {
+@@ -637,21 +680,23 @@
+ buf_LRU_free_from_common_LRU_list(
+ /*==============================*/
+ buf_pool_t* buf_pool,
+- ulint n_iterations)
++ ulint n_iterations,
+ /*!< in: how many times this has been called
+ repeatedly without result: a high value means
+ that we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list */
++ ibool have_LRU_mutex)
+ {
+ buf_page_t* bpage;
+ ulint distance;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+ distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
+
++restart:
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
+ bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
+@@ -659,14 +704,23 @@
+ enum buf_lru_free_block_status freed;
+ unsigned accessed;
+ mutex_t* block_mutex
+- = buf_page_get_mutex(bpage);
++ = buf_page_get_mutex_enter(bpage);
++
++ if (!block_mutex) {
++ goto restart;
++ }
++
++ if (!bpage->in_LRU_list
++ || !buf_page_in_file(bpage)) {
++ mutex_exit(block_mutex);
++ goto restart;
++ }
+
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->in_LRU_list);
+
+- mutex_enter(block_mutex);
+ accessed = buf_page_is_accessed(bpage);
+- freed = buf_LRU_free_block(bpage, TRUE, NULL);
++ freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
+ mutex_exit(block_mutex);
+
+ switch (freed) {
+@@ -718,16 +772,23 @@
+ n_iterations / 5 of the unzip_LRU list. */
+ {
+ ibool freed = FALSE;
++ ibool have_LRU_mutex = FALSE;
+
+- buf_pool_mutex_enter(buf_pool);
++ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
++ have_LRU_mutex = TRUE;
++
++ //buf_pool_mutex_enter(buf_pool);
++ if (have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
++ freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
+
+ if (!freed) {
+ freed = buf_LRU_free_from_common_LRU_list(
+- buf_pool, n_iterations);
++ buf_pool, n_iterations, have_LRU_mutex);
+ }
+
++ buf_pool_mutex_enter(buf_pool);
+ if (!freed) {
+ buf_pool->LRU_flush_ended = 0;
+ } else if (buf_pool->LRU_flush_ended > 0) {
+@@ -735,6 +796,8 @@
+ }
+
+ buf_pool_mutex_exit(buf_pool);
++ if (have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ return(freed);
+ }
+@@ -795,7 +858,9 @@
+
+ buf_pool = buf_pool_from_array(i);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
+
+ if (!recv_recovery_on
+ && UT_LIST_GET_LEN(buf_pool->free)
+@@ -805,7 +870,9 @@
+ ret = TRUE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_exit(&buf_pool->free_list_mutex);
+ }
+
+ return(ret);
+@@ -823,9 +890,10 @@
+ {
+ buf_block_t* block;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
++ mutex_enter(&buf_pool->free_list_mutex);
++ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
+
+ if (block) {
+
+@@ -834,7 +902,9 @@
+ ut_ad(!block->page.in_flush_list);
+ ut_ad(!block->page.in_LRU_list);
+ ut_a(!buf_page_in_file(&block->page));
+- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
++ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
++
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ mutex_enter(&block->mutex);
+
+@@ -844,6 +914,8 @@
+ ut_ad(buf_pool_from_block(block) == buf_pool);
+
+ mutex_exit(&block->mutex);
++ } else {
++ mutex_exit(&buf_pool->free_list_mutex);
+ }
+
+ return(block);
+@@ -868,7 +940,7 @@
+ ibool mon_value_was = FALSE;
+ ibool started_monitor = FALSE;
+ loop:
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
+ + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
+@@ -951,8 +1023,10 @@
+ ibool lru;
+ page_zip_set_size(&block->page.zip, zip_size);
+
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ block->page.zip.data = buf_buddy_alloc(
+- buf_pool, zip_size, &lru);
++ buf_pool, zip_size, &lru, FALSE);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+
+ UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
+ } else {
+@@ -960,7 +1034,7 @@
+ block->page.zip.data = NULL;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ if (started_monitor) {
+ srv_print_innodb_monitor = mon_value_was;
+@@ -972,7 +1046,7 @@
+ /* If no block was in the free list, search from the end of the LRU
+ list and try to free a block there */
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+
+ freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
+
+@@ -1058,7 +1132,8 @@
+ ulint new_len;
+
+ ut_a(buf_pool->LRU_old);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
+ ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
+ #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
+@@ -1124,7 +1199,8 @@
+ {
+ buf_page_t* bpage;
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+
+ /* We first initialize all blocks in the LRU list as old and then use
+@@ -1159,13 +1235,14 @@
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ if (buf_page_belongs_to_unzip_LRU(bpage)) {
+ buf_block_t* block = (buf_block_t*) bpage;
+
+ ut_ad(block->in_unzip_LRU_list);
+- ut_d(block->in_unzip_LRU_list = FALSE);
++ block->in_unzip_LRU_list = FALSE;
+
+ UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
+ }
+@@ -1183,7 +1260,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -1260,12 +1338,13 @@
+
+ ut_ad(buf_pool);
+ ut_ad(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+
+ ut_ad(!block->in_unzip_LRU_list);
+- ut_d(block->in_unzip_LRU_list = TRUE);
++ block->in_unzip_LRU_list = TRUE;
+
+ if (old) {
+ UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
+@@ -1286,7 +1365,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+
+@@ -1337,7 +1417,8 @@
+
+ ut_ad(buf_pool);
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ ut_a(buf_page_in_file(bpage));
+ ut_ad(!bpage->in_LRU_list);
+@@ -1416,7 +1497,8 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+
+ if (bpage->old) {
+ buf_pool->stat.n_pages_made_young++;
+@@ -1458,19 +1540,20 @@
+ buf_page_t* bpage, /*!< in: block to be freed */
+ ibool zip, /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+- ibool* buf_pool_mutex_released)
++ ibool* buf_pool_mutex_released,
+ /*!< in: pointer to a variable that will
+ be assigned TRUE if buf_pool_mutex
+ was temporarily released, or NULL */
++ ibool have_LRU_mutex)
+ {
+ buf_page_t* b = NULL;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(block_mutex));
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(bpage->in_LRU_list);
+ ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
+ #if UNIV_WORD_SIZE == 4
+ /* On 32-bit systems, there is no padding in buf_page_t. On
+@@ -1479,7 +1562,7 @@
+ UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
+ #endif
+
+- if (!buf_page_can_relocate(bpage)) {
++ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
+
+ /* Do not free buffer-fixed or I/O-fixed blocks. */
+ return(BUF_LRU_NOT_FREED);
+@@ -1511,15 +1594,15 @@
+ If it cannot be allocated (without freeing a block
+ from the LRU list), refuse to free bpage. */
+ alloc:
+- buf_pool_mutex_exit_forbid(buf_pool);
+- b = buf_buddy_alloc(buf_pool, sizeof *b, NULL);
+- buf_pool_mutex_exit_allow(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
++ b = buf_buddy_alloc(buf_pool, sizeof *b, NULL, FALSE);
++ //buf_pool_mutex_exit_allow(buf_pool);
+
+ if (UNIV_UNLIKELY(!b)) {
+ return(BUF_LRU_CANNOT_RELOCATE);
+ }
+
+- memcpy(b, bpage, sizeof *b);
++ //memcpy(b, bpage, sizeof *b);
+ }
+
+ #ifdef UNIV_DEBUG
+@@ -1530,6 +1613,39 @@
+ }
+ #endif /* UNIV_DEBUG */
+
++ /* not to break latch order, must re-enter block_mutex */
++ mutex_exit(block_mutex);
++
++ if (!have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ mutex_enter(block_mutex);
++
++ /* recheck states of block */
++ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
++ || !buf_page_can_relocate(bpage)) {
++not_freed:
++ if (b) {
++ buf_buddy_free(buf_pool, b, sizeof *b, TRUE);
++ }
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ return(BUF_LRU_NOT_FREED);
++ } else if (zip || !bpage->zip.data) {
++ if (bpage->oldest_modification)
++ goto not_freed;
++ } else if (bpage->oldest_modification) {
++ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
++ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
++ goto not_freed;
++ }
++ }
++
++ if (b) {
++ memcpy(b, bpage, sizeof *b);
++ }
++
+ if (buf_LRU_block_remove_hashed_page(bpage, zip)
+ != BUF_BLOCK_ZIP_FREE) {
+ ut_a(bpage->buf_fix_count == 0);
+@@ -1546,6 +1662,10 @@
+
+ ut_a(!hash_b);
+
++ while (prev_b && !prev_b->in_LRU_list) {
++ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
++ }
++
+ b->state = b->oldest_modification
+ ? BUF_BLOCK_ZIP_DIRTY
+ : BUF_BLOCK_ZIP_PAGE;
+@@ -1642,7 +1762,9 @@
+ *buf_pool_mutex_released = TRUE;
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ mutex_exit(block_mutex);
+
+ /* Remove possible adaptive hash index on the page.
+@@ -1674,7 +1796,9 @@
+ : BUF_NO_CHECKSUM_MAGIC);
+ }
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ if (have_LRU_mutex)
++ mutex_enter(&buf_pool->LRU_list_mutex);
+ mutex_enter(block_mutex);
+
+ if (b) {
+@@ -1684,13 +1808,17 @@
+ mutex_exit(&buf_pool->zip_mutex);
+ }
+
+- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
++ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
+ } else {
+ /* The block_mutex should have been released by
+ buf_LRU_block_remove_hashed_page() when it returns
+ BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool->zip_mutex);
+ mutex_enter(block_mutex);
++
++ if (!have_LRU_mutex)
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+
+ return(BUF_LRU_FREED);
+@@ -1702,13 +1830,14 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+- buf_block_t* block) /*!< in: block, must not contain a file page */
++ buf_block_t* block, /*!< in: block, must not contain a file page */
++ ibool have_page_hash_mutex)
+ {
+ void* data;
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
+
+ ut_ad(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(&block->mutex));
+
+ switch (buf_block_get_state(block)) {
+@@ -1742,18 +1871,21 @@
+ if (data) {
+ block->page.zip.data = NULL;
+ mutex_exit(&block->mutex);
+- buf_pool_mutex_exit_forbid(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
+
+ buf_buddy_free(
+- buf_pool, data, page_zip_get_size(&block->page.zip));
++ buf_pool, data, page_zip_get_size(&block->page.zip),
++ have_page_hash_mutex);
+
+- buf_pool_mutex_exit_allow(buf_pool);
++ //buf_pool_mutex_exit_allow(buf_pool);
+ mutex_enter(&block->mutex);
+ page_zip_set_size(&block->page.zip, 0);
+ }
+
+- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
++ mutex_enter(&buf_pool->free_list_mutex);
++ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
+ ut_d(block->page.in_free_list = TRUE);
++ mutex_exit(&buf_pool->free_list_mutex);
+
+ UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
+ }
+@@ -1783,7 +1915,11 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+ ut_ad(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
++#endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+ ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
+@@ -1891,7 +2027,9 @@
+
+ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ mutex_exit(buf_page_get_mutex(bpage));
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ buf_print();
+ buf_LRU_print();
+ buf_validate();
+@@ -1912,17 +2050,17 @@
+ ut_a(bpage->zip.data);
+ ut_a(buf_page_get_zip_size(bpage));
+
+- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
++ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
+
+ mutex_exit(&buf_pool->zip_mutex);
+- buf_pool_mutex_exit_forbid(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
+
+ buf_buddy_free(
+ buf_pool, bpage->zip.data,
+- page_zip_get_size(&bpage->zip));
++ page_zip_get_size(&bpage->zip), TRUE);
+
+- buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
+- buf_pool_mutex_exit_allow(buf_pool);
++ buf_buddy_free(buf_pool, bpage, sizeof(*bpage), TRUE);
++ //buf_pool_mutex_exit_allow(buf_pool);
+
+ UNIV_MEM_UNDESC(bpage);
+ return(BUF_BLOCK_ZIP_FREE);
+@@ -1945,13 +2083,13 @@
+ ut_ad(!bpage->in_flush_list);
+ ut_ad(!bpage->in_LRU_list);
+ mutex_exit(&((buf_block_t*) bpage)->mutex);
+- buf_pool_mutex_exit_forbid(buf_pool);
++ //buf_pool_mutex_exit_forbid(buf_pool);
+
+ buf_buddy_free(
+ buf_pool, data,
+- page_zip_get_size(&bpage->zip));
++ page_zip_get_size(&bpage->zip), TRUE);
+
+- buf_pool_mutex_exit_allow(buf_pool);
++ //buf_pool_mutex_exit_allow(buf_pool);
+ mutex_enter(&((buf_block_t*) bpage)->mutex);
+ page_zip_set_size(&bpage->zip, 0);
+ }
+@@ -1977,18 +2115,19 @@
+ void
+ buf_LRU_block_free_hashed_page(
+ /*===========================*/
+- buf_block_t* block) /*!< in: block, must contain a file page and
++ buf_block_t* block, /*!< in: block, must contain a file page and
+ be in a state where it can be freed */
++ ibool have_page_hash_mutex)
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_block(block);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_block(block);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(mutex_own(&block->mutex));
+
+ buf_block_set_state(block, BUF_BLOCK_MEMORY);
+
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
+ }
+
+ /**********************************************************************//**
+@@ -2015,7 +2154,8 @@
+ }
+
+ if (adjust) {
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ if (ratio != buf_pool->LRU_old_ratio) {
+ buf_pool->LRU_old_ratio = ratio;
+@@ -2027,7 +2167,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ } else {
+ buf_pool->LRU_old_ratio = ratio;
+ }
+@@ -2124,7 +2265,8 @@
+ ulint new_len;
+
+ ut_ad(buf_pool);
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
+
+@@ -2185,16 +2327,22 @@
+
+ ut_a(buf_pool->LRU_old_len == old_len);
+
+- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ mutex_enter(&buf_pool->free_list_mutex);
++
++ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
+ ut_ad(ut_list_node_313->in_free_list));
+
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
+ bpage != NULL;
+- bpage = UT_LIST_GET_NEXT(list, bpage)) {
++ bpage = UT_LIST_GET_NEXT(free, bpage)) {
+
+ ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
+ }
+
++ mutex_exit(&buf_pool->free_list_mutex);
++ mutex_enter(&buf_pool->LRU_list_mutex);
++
+ UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
+ ut_ad(ut_list_node_313->in_unzip_LRU_list
+ && ut_list_node_313->page.in_LRU_list));
+@@ -2208,7 +2356,8 @@
+ ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /**********************************************************************//**
+@@ -2244,7 +2393,8 @@
+ const buf_page_t* bpage;
+
+ ut_ad(buf_pool);
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->LRU_list_mutex);
+
+ bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
+
+@@ -2301,7 +2451,8 @@
+ bpage = UT_LIST_GET_NEXT(LRU, bpage);
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->LRU_list_mutex);
+ }
+
+ /**********************************************************************//**
+diff -ruN a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c
+--- a/storage/innobase/buf/buf0rea.c 2010-12-03 15:22:36.323977308 +0900
++++ b/storage/innobase/buf/buf0rea.c 2010-12-03 15:48:29.296024468 +0900
+@@ -311,6 +311,7 @@
+
+ return(0);
+ }
++ buf_pool_mutex_exit(buf_pool);
+
+ /* Check that almost all pages in the area have been accessed; if
+ offset == low, the accesses must be in a descending order, otherwise,
+@@ -329,6 +330,7 @@
+
+ fail_count = 0;
+
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ for (i = low; i < high; i++) {
+ bpage = buf_page_hash_get(buf_pool, space, i);
+
+@@ -356,7 +358,8 @@
+
+ if (fail_count > threshold) {
+ /* Too many failures: return */
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+ return(0);
+ }
+
+@@ -371,7 +374,8 @@
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+ if (bpage == NULL) {
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(0);
+ }
+@@ -397,7 +401,8 @@
+ pred_offset = fil_page_get_prev(frame);
+ succ_offset = fil_page_get_next(frame);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ if ((offset == low) && (succ_offset == offset + 1)) {
+
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:03.048955897 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:48:29.304024564 +0900
+@@ -245,6 +245,10 @@
+ # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+ {&buf_pool_mutex_key, "buf_pool_mutex", 0},
+ {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
++ {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
++ {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
++ {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
++ {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
+ {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
+ {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
+ {&dict_sys_mutex_key, "dict_sys_mutex", 0},
+@@ -295,6 +299,7 @@
+ {&archive_lock_key, "archive_lock", 0},
+ # endif /* UNIV_LOG_ARCHIVE */
+ {&btr_search_latch_key, "btr_search_latch", 0},
++ {&buf_pool_page_hash_key, "buf_pool_page_hash_latch", 0},
+ # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
+ {&buf_block_lock_key, "buf_block_lock", 0},
+ # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 15:37:45.517105700 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-03 15:48:29.331024462 +0900
+@@ -1566,7 +1566,8 @@
+
+ buf_pool = buf_pool_from_array(i);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ mutex_enter(&buf_pool->zip_free_mutex);
+
+ for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
+ buf_buddy_stat_t* buddy_stat;
+@@ -1596,7 +1597,8 @@
+ }
+ }
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ mutex_exit(&buf_pool->zip_free_mutex);
+
+ if (status) {
+ break;
+diff -ruN a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
+--- a/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:03.068954202 +0900
++++ b/storage/innobase/ibuf/ibuf0ibuf.c 2010-12-03 15:48:29.335988682 +0900
+@@ -3705,9 +3705,11 @@
+ ulint fold = buf_page_address_fold(space, page_no);
+ buf_pool_t* buf_pool = buf_pool_get(space, page_no);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+ bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ if (UNIV_LIKELY_NULL(bpage)) {
+ /* A buffer pool watch has been set or the
+diff -ruN a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
+--- a/storage/innobase/include/buf0buddy.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.h 2010-12-03 15:48:29.338023826 +0900
+@@ -51,10 +51,11 @@
+ buf_pool_t* buf_pool,
+ /*!< buffer pool in which the block resides */
+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
+- ibool* lru) /*!< in: pointer to a variable that will be assigned
++ ibool* lru, /*!< in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool->mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ __attribute__((malloc));
+
+ /**********************************************************************//**
+@@ -67,7 +68,8 @@
+ /*!< buffer pool in which the block resides */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
++ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
++ ibool have_page_hash_mutex)
+ __attribute__((nonnull));
+
+ #ifndef UNIV_NONINL
+diff -ruN a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
+--- a/storage/innobase/include/buf0buddy.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buddy.ic 2010-12-03 15:48:29.339040413 +0900
+@@ -46,10 +46,11 @@
+ /*!< in: buffer pool in which the page resides */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+- ibool* lru) /*!< in: pointer to a variable that will be assigned
++ ibool* lru, /*!< in: pointer to a variable that will be assigned
+ TRUE if storage was allocated from the LRU list
+ and buf_pool->mutex was temporarily released,
+ or NULL if the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ __attribute__((malloc));
+
+ /**********************************************************************//**
+@@ -61,8 +62,9 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint i) /*!< in: index of buf_pool->zip_free[],
++ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
++ ibool have_page_hash_mutex)
+ __attribute__((nonnull));
+
+ /**********************************************************************//**
+@@ -102,16 +104,17 @@
+ the page resides */
+ ulint size, /*!< in: block size, up to
+ UNIV_PAGE_SIZE */
+- ibool* lru) /*!< in: pointer to a variable
++ ibool* lru, /*!< in: pointer to a variable
+ that will be assigned TRUE if
+ storage was allocated from the
+ LRU list and buf_pool->mutex was
+ temporarily released, or NULL if
+ the LRU list should not be used */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+
+- return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
++ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ }
+
+ /**********************************************************************//**
+@@ -123,12 +126,25 @@
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+- ulint size) /*!< in: block size, up to
++ ulint size, /*!< in: block size, up to
+ UNIV_PAGE_SIZE */
++ ibool have_page_hash_mutex)
+ {
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++
++ if (!have_page_hash_mutex) {
++ mutex_enter(&buf_pool->LRU_list_mutex);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
+
+- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
++ mutex_enter(&buf_pool->zip_free_mutex);
++ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
++ mutex_exit(&buf_pool->zip_free_mutex);
++
++ if (!have_page_hash_mutex) {
++ mutex_exit(&buf_pool->LRU_list_mutex);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
+ }
+
+ #ifdef UNIV_MATERIALIZE
+diff -ruN a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
+--- a/storage/innobase/include/buf0buf.h 2010-12-03 15:22:36.327954660 +0900
++++ b/storage/innobase/include/buf0buf.h 2010-12-03 15:48:29.343024683 +0900
+@@ -132,6 +132,20 @@
+ /*==========================*/
+
+ /********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void);
++/*================================*/
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void);
++/*==================================*/
++
++/********************************************************************//**
+ Creates the buffer pool.
+ @return own: buf_pool object, NULL if not enough memory or error */
+ UNIV_INTERN
+@@ -761,6 +775,15 @@
+ const buf_page_t* bpage) /*!< in: pointer to control block */
+ __attribute__((pure));
+
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++ const buf_page_t* bpage) /*!< in: pointer to control block */
++ __attribute__((pure));
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return flush type */
+@@ -1242,7 +1265,7 @@
+ All these are protected by buf_pool->mutex. */
+ /* @{ */
+
+- UT_LIST_NODE_T(buf_page_t) list;
++ /* UT_LIST_NODE_T(buf_page_t) list; */
+ /*!< based on state, this is a
+ list node, protected either by
+ buf_pool->mutex or by
+@@ -1270,6 +1293,10 @@
+ BUF_BLOCK_REMOVE_HASH or
+ BUF_BLOCK_READY_IN_USE. */
+
++ /* resplit for optimistic use */
++ UT_LIST_NODE_T(buf_page_t) free;
++ UT_LIST_NODE_T(buf_page_t) flush_list;
++ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
+ #ifdef UNIV_DEBUG
+ ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
+ when buf_pool->flush_list_mutex is
+@@ -1362,11 +1389,11 @@
+ a block is in the unzip_LRU list
+ if page.state == BUF_BLOCK_FILE_PAGE
+ and page.zip.data != NULL */
+-#ifdef UNIV_DEBUG
++//#ifdef UNIV_DEBUG
+ ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
+ decompressed LRU list;
+ used in debugging */
+-#endif /* UNIV_DEBUG */
++//#endif /* UNIV_DEBUG */
+ mutex_t mutex; /*!< mutex protecting this block:
+ state (also protected by the buffer
+ pool mutex), io_fix, buf_fix_count,
+@@ -1532,6 +1559,11 @@
+ pool instance, protects compressed
+ only pages (of type buf_page_t, not
+ buf_block_t */
++ mutex_t LRU_list_mutex;
++ rw_lock_t page_hash_latch;
++ mutex_t free_list_mutex;
++ mutex_t zip_free_mutex;
++ mutex_t zip_hash_mutex;
+ ulint instance_no; /*!< Array index of this buffer
+ pool instance */
+ ulint old_pool_size; /*!< Old pool size in bytes */
+diff -ruN a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
+--- a/storage/innobase/include/buf0buf.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0buf.ic 2010-12-03 15:48:29.345024524 +0900
+@@ -274,7 +274,7 @@
+ case BUF_BLOCK_ZIP_FREE:
+ /* This is a free page in buf_pool->zip_free[].
+ Such pages should only be accessed by the buddy allocator. */
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ break;
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+@@ -317,9 +317,14 @@
+ {
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
++ if (buf_pool_watch_is_sentinel(buf_pool, bpage)) {
++ /* TODO: this code is the interim. should be confirmed later. */
++ return(&buf_pool->zip_mutex);
++ }
++
+ switch (buf_page_get_state(bpage)) {
+ case BUF_BLOCK_ZIP_FREE:
+- ut_error;
++ /* ut_error; */ /* optimistic */
+ return(NULL);
+ case BUF_BLOCK_ZIP_PAGE:
+ case BUF_BLOCK_ZIP_DIRTY:
+@@ -329,6 +334,28 @@
+ }
+ }
+
++/*************************************************************************
++Gets the mutex of a block and enter the mutex with consistency. */
++UNIV_INLINE
++mutex_t*
++buf_page_get_mutex_enter(
++/*=========================*/
++ const buf_page_t* bpage) /*!< in: pointer to control block */
++{
++ mutex_t* block_mutex;
++
++ while(1) {
++ block_mutex = buf_page_get_mutex(bpage);
++ if (!block_mutex)
++ return block_mutex;
++
++ mutex_enter(block_mutex);
++ if (block_mutex == buf_page_get_mutex(bpage))
++ return block_mutex;
++ mutex_exit(block_mutex);
++ }
++}
++
+ /*********************************************************************//**
+ Get the flush type of a page.
+ @return flush type */
+@@ -425,8 +452,8 @@
+ enum buf_io_fix io_fix) /*!< in: io_fix state */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+@@ -456,14 +483,14 @@
+ const buf_page_t* bpage) /*!< control block being relocated */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_ad(buf_page_in_file(bpage));
+- ut_ad(bpage->in_LRU_list);
++ //ut_ad(bpage->in_LRU_list);
+
+- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
++ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
+ && bpage->buf_fix_count == 0);
+ }
+
+@@ -477,8 +504,8 @@
+ const buf_page_t* bpage) /*!< in: control block */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
+ ut_ad(buf_page_in_file(bpage));
+
+@@ -498,7 +525,8 @@
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ #endif /* UNIV_DEBUG */
+ ut_a(buf_page_in_file(bpage));
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+ ut_ad(bpage->in_LRU_list);
+
+ #ifdef UNIV_LRU_DEBUG
+@@ -545,9 +573,10 @@
+ ulint time_ms) /*!< in: ut_time_ms() */
+ {
+ #ifdef UNIV_DEBUG
+- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
++ //ut_ad(buf_pool_mutex_own(buf_pool));
+ #endif
++ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_in_file(bpage));
+
+ if (!bpage->access_time) {
+@@ -761,19 +790,19 @@
+ /*===========*/
+ buf_block_t* block) /*!< in, own: block to be freed */
+ {
+- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
++ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
+
+ mutex_enter(&block->mutex);
+
+ ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
+
+- buf_LRU_block_free_non_file_page(block);
++ buf_LRU_block_free_non_file_page(block, FALSE);
+
+ mutex_exit(&block->mutex);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
+ }
+ #endif /* !UNIV_HOTBACKUP */
+
+@@ -821,17 +850,17 @@
+ page frame */
+ {
+ ib_uint64_t lsn;
+- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+-
+- mutex_enter(block_mutex);
++ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
+
+- if (buf_page_in_file(bpage)) {
++ if (block_mutex && buf_page_in_file(bpage)) {
+ lsn = bpage->newest_modification;
+ } else {
+ lsn = 0;
+ }
+
+- mutex_exit(block_mutex);
++ if (block_mutex) {
++ mutex_exit(block_mutex);
++ }
+
+ return(lsn);
+ }
+@@ -849,7 +878,7 @@
+ #ifdef UNIV_SYNC_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+
+- ut_ad((buf_pool_mutex_own(buf_pool)
++ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
+ && (block->page.buf_fix_count == 0))
+ || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+ #endif /* UNIV_SYNC_DEBUG */
+@@ -979,7 +1008,11 @@
+ buf_page_t* bpage;
+
+ ut_ad(buf_pool);
+- ut_ad(buf_pool_mutex_own(buf_pool));
++ //ut_ad(buf_pool_mutex_own(buf_pool));
++#ifdef UNIV_SYNC_DEBUG
++ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
++ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
++#endif
+ ut_ad(fold == buf_page_address_fold(space, offset));
+
+ /* Look for the page in the hash table */
+@@ -1064,11 +1097,13 @@
+ const buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+
+- buf_pool_mutex_enter(buf_pool);
++ //buf_pool_mutex_enter(buf_pool);
++ rw_lock_s_lock(&buf_pool->page_hash_latch);
+
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+- buf_pool_mutex_exit(buf_pool);
++ //buf_pool_mutex_exit(buf_pool);
++ rw_lock_s_unlock(&buf_pool->page_hash_latch);
+
+ return(bpage != NULL);
+ }
+@@ -1196,4 +1231,38 @@
+ buf_pool_mutex_exit(buf_pool);
+ }
+ }
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_lock_all(void)
++/*===============================*/
++{
++ ulint i;
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++ rw_lock_x_lock(&buf_pool->page_hash_latch);
++ }
++}
++
++/********************************************************************//**
++*/
++UNIV_INLINE
++void
++buf_pool_page_hash_x_unlock_all(void)
++/*=================================*/
++{
++ ulint i;
++
++ for (i = 0; i < srv_buf_pool_instances; i++) {
++ buf_pool_t* buf_pool;
++
++ buf_pool = buf_pool_from_array(i);
++ rw_lock_x_unlock(&buf_pool->page_hash_latch);
++ }
++}
+ #endif /* !UNIV_HOTBACKUP */
+diff -ruN a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
+--- a/storage/innobase/include/buf0lru.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/buf0lru.h 2010-12-03 15:48:29.349024701 +0900
+@@ -113,10 +113,11 @@
+ buf_page_t* bpage, /*!< in: block to be freed */
+ ibool zip, /*!< in: TRUE if should remove also the
+ compressed page of an uncompressed page */
+- ibool* buf_pool_mutex_released);
++ ibool* buf_pool_mutex_released,
+ /*!< in: pointer to a variable that will
+ be assigned TRUE if buf_pool->mutex
+ was temporarily released, or NULL */
++ ibool have_LRU_mutex);
+ /******************************************************************//**
+ Try to free a replaceable block.
+ @return TRUE if found and freed */
+@@ -163,7 +164,8 @@
+ void
+ buf_LRU_block_free_non_file_page(
+ /*=============================*/
+- buf_block_t* block); /*!< in: block, must not contain a file page */
++ buf_block_t* block, /*!< in: block, must not contain a file page */
++ ibool have_page_hash_mutex);
+ /******************************************************************//**
+ Adds a block to the LRU list. */
+ UNIV_INTERN
+diff -ruN a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
+--- a/storage/innobase/include/sync0rw.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0rw.h 2010-12-03 15:48:29.349942993 +0900
+@@ -112,6 +112,7 @@
+ extern mysql_pfs_key_t archive_lock_key;
+ # endif /* UNIV_LOG_ARCHIVE */
+ extern mysql_pfs_key_t btr_search_latch_key;
++extern mysql_pfs_key_t buf_pool_page_hash_key;
+ extern mysql_pfs_key_t buf_block_lock_key;
+ # ifdef UNIV_SYNC_DEBUG
+ extern mysql_pfs_key_t buf_block_debug_latch_key;
+diff -ruN a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
+--- a/storage/innobase/include/sync0sync.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/sync0sync.h 2010-12-03 15:48:29.352024614 +0900
+@@ -75,6 +75,10 @@
+ extern mysql_pfs_key_t buffer_block_mutex_key;
+ extern mysql_pfs_key_t buf_pool_mutex_key;
+ extern mysql_pfs_key_t buf_pool_zip_mutex_key;
++extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
++extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
++extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
++extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
+ extern mysql_pfs_key_t cache_last_read_mutex_key;
+ extern mysql_pfs_key_t dict_foreign_err_mutex_key;
+ extern mysql_pfs_key_t dict_sys_mutex_key;
+@@ -660,7 +664,7 @@
+ #define SYNC_TRX_LOCK_HEAP 298
+ #define SYNC_TRX_SYS_HEADER 290
+ #define SYNC_LOG 170
+-#define SYNC_LOG_FLUSH_ORDER 147
++#define SYNC_LOG_FLUSH_ORDER 156
+ #define SYNC_RECV 168
+ #define SYNC_WORK_QUEUE 162
+ #define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
+@@ -670,8 +674,13 @@
+ SYNC_SEARCH_SYS, as memory allocation
+ can call routines there! Otherwise
+ the level is SYNC_MEM_HASH. */
++#define SYNC_BUF_LRU_LIST 158
++#define SYNC_BUF_PAGE_HASH 157
++#define SYNC_BUF_BLOCK 155 /* Block mutex */
++#define SYNC_BUF_FREE_LIST 153
++#define SYNC_BUF_ZIP_FREE 152
++#define SYNC_BUF_ZIP_HASH 151
+ #define SYNC_BUF_POOL 150 /* Buffer pool mutex */
+-#define SYNC_BUF_BLOCK 146 /* Block mutex */
+ #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
+ #define SYNC_DOUBLEWRITE 140
+ #define SYNC_ANY_LATCH 135
+@@ -703,7 +712,7 @@
+ os_fast_mutex; /*!< We use this OS mutex in place of lock_word
+ when atomic operations are not enabled */
+ #endif
+- ulint waiters; /*!< This ulint is set to 1 if there are (or
++ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
+ may be) threads waiting in the global wait
+ array for this mutex to be released.
+ Otherwise, this is 0. */
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:03.080956216 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:48:29.355023766 +0900
+@@ -3060,7 +3060,7 @@
+ level += log_sys->max_checkpoint_age
+ - (lsn - oldest_modification);
+ }
+- bpage = UT_LIST_GET_NEXT(list, bpage);
++ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+ n_blocks++;
+ }
+
+@@ -3145,7 +3145,7 @@
+ found = TRUE;
+ break;
+ }
+- bpage = UT_LIST_GET_NEXT(list, bpage);
++ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
+ new_blocks_num++;
+ }
+ if (!found) {
+diff -ruN a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
+--- a/storage/innobase/sync/sync0sync.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/sync/sync0sync.c 2010-12-03 15:48:29.358023890 +0900
+@@ -265,7 +265,7 @@
+ mutex->lock_word = 0;
+ #endif
+ mutex->event = os_event_create(NULL);
+- mutex_set_waiters(mutex, 0);
++ mutex->waiters = 0;
+ #ifdef UNIV_DEBUG
+ mutex->magic_n = MUTEX_MAGIC_N;
+ #endif /* UNIV_DEBUG */
+@@ -444,6 +444,15 @@
+ mutex_t* mutex, /*!< in: mutex */
+ ulint n) /*!< in: value to set */
+ {
++#ifdef INNODB_RW_LOCKS_USE_ATOMICS
++ ut_ad(mutex);
++
++ if (n) {
++ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
++ } else {
++ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
++ }
++#else
+ volatile ulint* ptr; /* declared volatile to ensure that
+ the value is stored to memory */
+ ut_ad(mutex);
+@@ -452,6 +461,7 @@
+
+ *ptr = n; /* Here we assume that the write of a single
+ word in memory is atomic */
++#endif
+ }
+
+ /******************************************************************//**
+@@ -1193,7 +1203,12 @@
+ ut_error;
+ }
+ break;
++ case SYNC_BUF_LRU_LIST:
+ case SYNC_BUF_FLUSH_LIST:
++ case SYNC_BUF_PAGE_HASH:
++ case SYNC_BUF_FREE_LIST:
++ case SYNC_BUF_ZIP_FREE:
++ case SYNC_BUF_ZIP_HASH:
+ case SYNC_BUF_POOL:
+ /* We can have multiple mutexes of this type therefore we
+ can only check whether the greater than condition holds. */
+@@ -1211,7 +1226,8 @@
+ buffer block (block->mutex or buf_pool->zip_mutex). */
+ if (!sync_thread_levels_g(array, level, FALSE)) {
+ ut_a(sync_thread_levels_g(array, level - 1, TRUE));
+- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
++ /* the exact rule is not fixed yet, for now */
++ //ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
+ }
+ break;
+ case SYNC_REC_LOCK:
--- /dev/null
+# name : innodb_stats.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
+--- a/storage/innobase/btr/btr0cur.c 2010-12-03 15:49:59.165212710 +0900
++++ b/storage/innobase/btr/btr0cur.c 2010-12-03 17:19:24.834126874 +0900
+@@ -1010,6 +1010,107 @@
+ }
+ }
+
++/**********************************************************************//**
++Positions a cursor at a randomly chosen position within a B-tree
++after the given path
++@return TRUE if the position is at the first page, and cursor must point
++ the first record for used by the caller.*/
++UNIV_INTERN
++ibool
++btr_cur_open_at_rnd_pos_after_path(
++/*====================*/
++ dict_index_t* index, /*!< in: index */
++ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
++ btr_path_t* first_rec_path,
++ btr_cur_t* cursor, /*!< in/out: B-tree cursor */
++ mtr_t* mtr) /*!< in: mtr */
++{
++ page_cur_t* page_cursor;
++ btr_path_t* slot;
++ ibool is_first_rec = TRUE;
++ ulint page_no;
++ ulint space;
++ ulint zip_size;
++ ulint height;
++ rec_t* node_ptr;
++ mem_heap_t* heap = NULL;
++ ulint offsets_[REC_OFFS_NORMAL_SIZE];
++ ulint* offsets = offsets_;
++ rec_offs_init(offsets_);
++
++ if (latch_mode == BTR_MODIFY_TREE) {
++ mtr_x_lock(dict_index_get_lock(index), mtr);
++ } else {
++ mtr_s_lock(dict_index_get_lock(index), mtr);
++ }
++
++ page_cursor = btr_cur_get_page_cur(cursor);
++ cursor->index = index;
++
++ space = dict_index_get_space(index);
++ zip_size = dict_table_zip_size(index->table);
++ page_no = dict_index_get_page(index);
++
++ height = ULINT_UNDEFINED;
++ slot = first_rec_path;
++
++ for (;;) {
++ buf_block_t* block;
++ page_t* page;
++
++ block = buf_page_get_gen(space, zip_size, page_no,
++ RW_NO_LATCH, NULL, BUF_GET,
++ __FILE__, __LINE__, mtr);
++ page = buf_block_get_frame(block);
++ ut_ad(index->id == btr_page_get_index_id(page));
++
++ if (height == ULINT_UNDEFINED) {
++ /* We are in the root node */
++
++ height = btr_page_get_level(page, mtr);
++ }
++
++ if (height == 0) {
++ btr_cur_latch_leaves(page, space, zip_size, page_no,
++ latch_mode, cursor, mtr);
++ }
++
++ if (is_first_rec && slot->nth_rec != ULINT_UNDEFINED) {
++ if (height == 0) {
++ /* must open the first rec */
++ page_cur_open_on_nth_user_rec(block, page_cursor, slot->nth_rec);
++ } else {
++ is_first_rec = page_cur_open_on_rnd_user_rec_after_nth(block,
++ page_cursor, slot->nth_rec);
++ }
++ } else {
++ is_first_rec = FALSE;
++ page_cur_open_on_rnd_user_rec(block, page_cursor);
++ }
++
++ if (height == 0) {
++ break;
++ }
++
++ ut_ad(height > 0);
++
++ height--;
++ slot++;
++
++ node_ptr = page_cur_get_rec(page_cursor);
++ offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
++ ULINT_UNDEFINED, &heap);
++ /* Go to the child node */
++ page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
++ }
++
++ if (UNIV_LIKELY_NULL(heap)) {
++ mem_heap_free(heap);
++ }
++
++ return (is_first_rec);
++}
++
+ /*==================== B-TREE INSERT =========================*/
+
+ /*************************************************************//**
+@@ -3488,6 +3589,154 @@
+ }
+
+ /*******************************************************************//**
++Estimates the number of pages which have not null value of the key of n_cols.
++@return estimated number of pages */
++UNIV_INTERN
++ulint
++btr_estimate_n_pages_not_null(
++/*=========================*/
++ dict_index_t* index, /*!< in: index */
++ ulint n_cols, /*!< in: The cols should be not null */
++ btr_path_t* path1) /*!< in: path1[BTR_PATH_ARRAY_N_SLOTS] */
++{
++ dtuple_t* tuple1;
++ btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
++ btr_cur_t cursor;
++ btr_path_t* slot1;
++ btr_path_t* slot2;
++ ibool diverged;
++ ibool diverged_lot;
++ ulint divergence_level;
++ ulint n_pages;
++ ulint i;
++ mtr_t mtr;
++ mem_heap_t* heap;
++
++ heap = mem_heap_create(n_cols * sizeof(dfield_t)
++ + sizeof(dtuple_t));
++
++ /* make tuple1 (NULL,NULL,,,) from n_cols */
++ tuple1 = dtuple_create(heap, n_cols);
++ dict_index_copy_types(tuple1, index, n_cols);
++
++ for (i = 0; i < n_cols; i++) {
++ dfield_set_null(dtuple_get_nth_field(tuple1, i));
++ }
++
++ mtr_start(&mtr);
++
++ cursor.path_arr = path1;
++
++ btr_cur_search_to_nth_level(index, 0, tuple1, PAGE_CUR_G,
++ BTR_SEARCH_LEAF | BTR_ESTIMATE,
++ &cursor, 0, __FILE__, __LINE__, &mtr);
++
++ mtr_commit(&mtr);
++
++
++
++ mtr_start(&mtr);
++
++ cursor.path_arr = path2;
++
++ btr_cur_open_at_index_side(FALSE, index,
++ BTR_SEARCH_LEAF | BTR_ESTIMATE,
++ &cursor, &mtr);
++
++ mtr_commit(&mtr);
++
++ mem_heap_free(heap);
++
++ /* We have the path information for the range in path1 and path2 */
++
++ n_pages = 1;
++ diverged = FALSE; /* This becomes true when the path is not
++ the same any more */
++ diverged_lot = FALSE; /* This becomes true when the paths are
++ not the same or adjacent any more */
++ divergence_level = 1000000; /* This is the level where paths diverged
++ a lot */
++ for (i = 0; ; i++) {
++ ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
++
++ slot1 = path1 + i;
++ slot2 = path2 + i;
++
++ if ((slot1 + 1)->nth_rec == ULINT_UNDEFINED
++ || (slot2 + 1)->nth_rec == ULINT_UNDEFINED) {
++
++ if (i > divergence_level + 1) {
++ /* In trees whose height is > 1 our algorithm
++ tends to underestimate: multiply the estimate
++ by 2: */
++
++ n_pages = n_pages * 2;
++ }
++
++ /* Do not estimate the number of rows in the range
++ to over 1 / 2 of the estimated rows in the whole
++ table */
++
++ if (n_pages > index->stat_n_leaf_pages / 2) {
++ n_pages = index->stat_n_leaf_pages / 2;
++
++ /* If there are just 0 or 1 rows in the table,
++ then we estimate all rows are in the range */
++
++ if (n_pages == 0) {
++ n_pages = index->stat_n_leaf_pages;
++ }
++ }
++
++ return(n_pages);
++ }
++
++ if (!diverged && slot1->nth_rec != slot2->nth_rec) {
++
++ diverged = TRUE;
++
++ if (slot1->nth_rec < slot2->nth_rec) {
++ n_pages = slot2->nth_rec - slot1->nth_rec;
++
++ if (n_pages > 1) {
++ diverged_lot = TRUE;
++ divergence_level = i;
++ }
++ } else {
++ /* Maybe the tree has changed between
++ searches */
++
++ return(10);
++ }
++
++ } else if (diverged && !diverged_lot) {
++
++ if (slot1->nth_rec < slot1->n_recs
++ || slot2->nth_rec > 1) {
++
++ diverged_lot = TRUE;
++ divergence_level = i;
++
++ n_pages = 0;
++
++ if (slot1->nth_rec < slot1->n_recs) {
++ n_pages += slot1->n_recs
++ - slot1->nth_rec;
++ }
++
++ if (slot2->nth_rec > 1) {
++ n_pages += slot2->nth_rec - 1;
++ }
++ }
++ } else if (diverged_lot) {
++
++ n_pages = (n_pages * (slot1->n_recs + slot2->n_recs))
++ / 2;
++ }
++ }
++}
++
++/*******************************************************************//**
+ Estimates the number of different key values in a given index, for
+ each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+ The estimates are stored in the array index->stat_n_diff_key_vals. */
+@@ -3516,18 +3765,38 @@
+ ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets_rec = offsets_rec_;
+ ulint* offsets_next_rec= offsets_next_rec_;
++ ulint stats_method = srv_stats_method;
++ btr_path_t first_rec_path[BTR_PATH_ARRAY_N_SLOTS];
++ ulint effective_pages; /* effective leaf pages */
+ rec_offs_init(offsets_rec_);
+ rec_offs_init(offsets_next_rec_);
+
+ n_cols = dict_index_get_n_unique(index);
+
++ if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
++ /* estimate effective pages and path for the first effective record */
++ /* TODO: make it work also for n_cols > 1. */
++ effective_pages = btr_estimate_n_pages_not_null(index, 1 /*k*/, first_rec_path);
++
++ if (!effective_pages) {
++ for (j = 0; j <= n_cols; j++) {
++ index->stat_n_diff_key_vals[j] = (ib_int64_t)index->stat_n_leaf_pages;
++ }
++ return;
++ } else if (effective_pages > index->stat_n_leaf_pages) {
++ effective_pages = index->stat_n_leaf_pages;
++ }
++ } else {
++ effective_pages = index->stat_n_leaf_pages;
++ }
++
+ n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
+
+ /* It makes no sense to test more pages than are contained
+ in the index, thus we lower the number if it is too high */
+- if (srv_stats_sample_pages > index->stat_index_size) {
+- if (index->stat_index_size > 0) {
+- n_sample_pages = index->stat_index_size;
++ if (srv_stats_sample_pages > effective_pages) {
++ if (effective_pages > 0) {
++ n_sample_pages = effective_pages;
+ } else {
+ n_sample_pages = 1;
+ }
+@@ -3539,9 +3808,15 @@
+
+ for (i = 0; i < n_sample_pages; i++) {
+ rec_t* supremum;
++ ibool is_first_page = TRUE;
+ mtr_start(&mtr);
+
++ if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
++ is_first_page = btr_cur_open_at_rnd_pos_after_path(index, BTR_SEARCH_LEAF,
++ first_rec_path, &cursor, &mtr);
++ } else {
+ btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
++ }
+
+ /* Count the number of different key values for each prefix of
+ the key on this index page. If the prefix does not determine
+@@ -3552,7 +3827,13 @@
+ page = btr_cur_get_page(&cursor);
+
+ supremum = page_get_supremum_rec(page);
++ if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS && is_first_page) {
++ /* the cursor should be the first record of the page. */
++ /* Counting should be started from here. */
++ rec = btr_cur_get_rec(&cursor);
++ } else {
+ rec = page_rec_get_next(page_get_infimum_rec(page));
++ }
+
+ if (rec != supremum) {
+ not_empty_flag = 1;
+@@ -3561,7 +3842,8 @@
+ }
+
+ while (rec != supremum) {
+- rec_t* next_rec = page_rec_get_next(rec);
++ rec_t* next_rec;
++ next_rec = page_rec_get_next(rec);
+ if (next_rec == supremum) {
+ break;
+ }
+@@ -3575,7 +3857,10 @@
+ cmp_rec_rec_with_match(rec, next_rec,
+ offsets_rec, offsets_next_rec,
+ index, &matched_fields,
+- &matched_bytes);
++ &matched_bytes,
++ (stats_method==SRV_STATS_METHOD_NULLS_NOT_EQUAL) ?
++ SRV_STATS_METHOD_NULLS_NOT_EQUAL :
++ SRV_STATS_METHOD_NULLS_EQUAL);
+
+ for (j = matched_fields + 1; j <= n_cols; j++) {
+ /* We add one if this index record has
+@@ -3636,7 +3921,7 @@
+ for (j = 0; j <= n_cols; j++) {
+ index->stat_n_diff_key_vals[j]
+ = ((n_diff[j]
+- * (ib_int64_t)index->stat_n_leaf_pages
++ * (ib_int64_t)effective_pages
+ + n_sample_pages - 1
+ + total_external_size
+ + not_empty_flag)
+@@ -3651,7 +3936,7 @@
+ different key values, or even more. Let us try to approximate
+ that: */
+
+- add_on = index->stat_n_leaf_pages
++ add_on = effective_pages
+ / (10 * (n_sample_pages
+ + total_external_size));
+
+@@ -3660,6 +3945,15 @@
+ }
+
+ index->stat_n_diff_key_vals[j] += add_on;
++
++ if (stats_method == SRV_STATS_METHOD_IGNORE_NULLS) {
++ /* index->stat_n_diff_key_vals[k] is used for calc rec_per_key,
++ as "stats.records / index->stat_n_diff_key_vals[x]".
++ So it should be adjusted to the value which is based on whole of the index. */
++ index->stat_n_diff_key_vals[j] =
++ index->stat_n_diff_key_vals[j] * (ib_int64_t)index->stat_n_leaf_pages
++ / (ib_int64_t)effective_pages;
++ }
+ }
+
+ mem_free(n_diff);
+diff -ruN a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c
+--- a/storage/innobase/dict/dict0boot.c 2010-12-03 15:48:03.034036843 +0900
++++ b/storage/innobase/dict/dict0boot.c 2010-12-03 17:19:24.835112632 +0900
+@@ -266,6 +266,29 @@
+ /* Get the dictionary header */
+ dict_hdr = dict_hdr_get(&mtr);
+
++ if (mach_read_from_8(dict_hdr + DICT_HDR_XTRADB_MARK)
++ != DICT_HDR_XTRADB_FLAG) {
++ /* not extended yet by XtraDB, need to be extended */
++ ulint root_page_no;
++
++ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
++ DICT_HDR_SPACE, 0, DICT_STATS_ID,
++ dict_ind_redundant, &mtr);
++ if (root_page_no == FIL_NULL) {
++ fprintf(stderr, "InnoDB: Warning: failed to create SYS_STATS btr.\n");
++ srv_use_sys_stats_table = FALSE;
++ } else {
++ mlog_write_ulint(dict_hdr + DICT_HDR_STATS, root_page_no,
++ MLOG_4BYTES, &mtr);
++ mlog_write_ull(dict_hdr + DICT_HDR_XTRADB_MARK,
++ DICT_HDR_XTRADB_FLAG, &mtr);
++ }
++ mtr_commit(&mtr);
++ /* restart mtr */
++ mtr_start(&mtr);
++ dict_hdr = dict_hdr_get(&mtr);
++ }
++
+ /* Because we only write new row ids to disk-based data structure
+ (dictionary header) when it is divisible by
+ DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
+@@ -425,7 +448,7 @@
+ table->id = DICT_FIELDS_ID;
+ dict_table_add_to_cache(table, heap);
+ dict_sys->sys_fields = table;
+- mem_heap_free(heap);
++ mem_heap_empty(heap);
+
+ index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
+ DICT_HDR_SPACE,
+@@ -442,6 +465,41 @@
+ FALSE);
+ ut_a(error == DB_SUCCESS);
+
++ /*-------------------------*/
++ table = dict_mem_table_create("SYS_STATS", DICT_HDR_SPACE, 3, 0);
++ table->n_mysql_handles_opened = 1; /* for pin */
++
++ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
++ dict_mem_table_add_col(table, heap, "KEY_COLS", DATA_INT, 0, 4);
++ dict_mem_table_add_col(table, heap, "DIFF_VALS", DATA_BINARY, 0, 0);
++
++ /* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */
++#if DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2
++#error "DICT_SYS_STATS_DIFF_VALS_FIELD != 2 + 2"
++#endif
++
++ table->id = DICT_STATS_ID;
++ dict_table_add_to_cache(table, heap);
++ dict_sys->sys_stats = table;
++ mem_heap_empty(heap);
++
++ index = dict_mem_index_create("SYS_STATS", "CLUST_IND",
++ DICT_HDR_SPACE,
++ DICT_UNIQUE | DICT_CLUSTERED, 2);
++
++ dict_mem_index_add_field(index, "INDEX_ID", 0);
++ dict_mem_index_add_field(index, "KEY_COLS", 0);
++
++ index->id = DICT_STATS_ID;
++ error = dict_index_add_to_cache(table, index,
++ mtr_read_ulint(dict_hdr
++ + DICT_HDR_STATS,
++ MLOG_4BYTES, &mtr),
++ FALSE);
++ ut_a(error == DB_SUCCESS);
++
++ mem_heap_free(heap);
++
+ mtr_commit(&mtr);
+ /*-------------------------*/
+
+@@ -455,6 +513,7 @@
+ dict_load_sys_table(dict_sys->sys_columns);
+ dict_load_sys_table(dict_sys->sys_indexes);
+ dict_load_sys_table(dict_sys->sys_fields);
++ dict_load_sys_table(dict_sys->sys_stats);
+
+ mutex_exit(&(dict_sys->mutex));
+ }
+diff -ruN a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
+--- a/storage/innobase/dict/dict0crea.c 2010-12-03 15:48:03.036081059 +0900
++++ b/storage/innobase/dict/dict0crea.c 2010-12-03 17:19:24.836964976 +0900
+@@ -508,6 +508,51 @@
+ }
+
+ /*****************************************************************//**
++Based on an index object, this function builds the entry to be inserted
++in the SYS_STATS system table.
++@return the tuple which should be inserted */
++static
++dtuple_t*
++dict_create_sys_stats_tuple(
++/*========================*/
++ const dict_index_t* index,
++ ulint i,
++ mem_heap_t* heap)
++{
++ dict_table_t* sys_stats;
++ dtuple_t* entry;
++ dfield_t* dfield;
++ byte* ptr;
++
++ ut_ad(index);
++ ut_ad(heap);
++
++ sys_stats = dict_sys->sys_stats;
++
++ entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
++
++ dict_table_copy_types(entry, sys_stats);
++
++ /* 0: INDEX_ID -----------------------*/
++ dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/);
++ ptr = mem_heap_alloc(heap, 8);
++ mach_write_to_8(ptr, index->id);
++ dfield_set_data(dfield, ptr, 8);
++ /* 1: KEY_COLS -----------------------*/
++ dfield = dtuple_get_nth_field(entry, 1/*KEY_COLS*/);
++ ptr = mem_heap_alloc(heap, 4);
++ mach_write_to_4(ptr, i);
++ dfield_set_data(dfield, ptr, 4);
++ /* 4: DIFF_VALS ----------------------*/
++ dfield = dtuple_get_nth_field(entry, 2/*DIFF_VALS*/);
++ ptr = mem_heap_alloc(heap, 8);
++ mach_write_to_8(ptr, 0); /* initial value is 0 */
++ dfield_set_data(dfield, ptr, 8);
++
++ return(entry);
++}
++
++/*****************************************************************//**
+ Creates the tuple with which the index entry is searched for writing the index
+ tree root page number, if such a tree is created.
+ @return the tuple for search */
+@@ -617,6 +662,27 @@
+ }
+
+ /***************************************************************//**
++Builds a row for storing stats to insert.
++@return DB_SUCCESS */
++static
++ulint
++dict_build_stats_def_step(
++/*======================*/
++ ind_node_t* node)
++{
++ dict_index_t* index;
++ dtuple_t* row;
++
++ index = node->index;
++
++ row = dict_create_sys_stats_tuple(index, node->stats_no, node->heap);
++
++ ins_node_set_new_row(node->stats_def, row);
++
++ return(DB_SUCCESS);
++}
++
++/***************************************************************//**
+ Creates an index tree for the index if it is not a member of a cluster.
+ @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+ static
+@@ -937,6 +1003,49 @@
+ dict_sys->sys_fields, heap);
+ node->field_def->common.parent = node;
+
++ if (srv_use_sys_stats_table) {
++ node->stats_def = ins_node_create(INS_DIRECT,
++ dict_sys->sys_stats, heap);
++ node->stats_def->common.parent = node;
++ } else {
++ node->stats_def = NULL;
++ }
++
++ node->commit_node = commit_node_create(heap);
++ node->commit_node->common.parent = node;
++
++ return(node);
++}
++
++/*********************************************************************//**
++*/
++UNIV_INTERN
++ind_node_t*
++ind_insert_stats_graph_create(
++/*==========================*/
++ dict_index_t* index,
++ mem_heap_t* heap)
++{
++ ind_node_t* node;
++
++ node = mem_heap_alloc(heap, sizeof(ind_node_t));
++
++ node->common.type = QUE_NODE_INSERT_STATS;
++
++ node->index = index;
++
++ node->state = INDEX_BUILD_STATS_COLS;
++ node->page_no = FIL_NULL;
++ node->heap = mem_heap_create(256);
++
++ node->ind_def = NULL;
++ node->field_def = NULL;
++
++ node->stats_def = ins_node_create(INS_DIRECT,
++ dict_sys->sys_stats, heap);
++ node->stats_def->common.parent = node;
++ node->stats_no = 0;
++
+ node->commit_node = commit_node_create(heap);
+ node->commit_node->common.parent = node;
+
+@@ -1087,6 +1196,7 @@
+
+ node->state = INDEX_BUILD_FIELD_DEF;
+ node->field_no = 0;
++ node->stats_no = 0;
+
+ thr->run_node = node->ind_def;
+
+@@ -1132,7 +1242,31 @@
+ goto function_exit;
+ }
+
+- node->state = INDEX_CREATE_INDEX_TREE;
++ if (srv_use_sys_stats_table
++ && !((node->table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
++ node->state = INDEX_BUILD_STATS_COLS;
++ } else {
++ node->state = INDEX_CREATE_INDEX_TREE;
++ }
++ }
++ if (node->state == INDEX_BUILD_STATS_COLS) {
++ if (node->stats_no <= dict_index_get_n_unique(node->index)) {
++
++ err = dict_build_stats_def_step(node);
++
++ if (err != DB_SUCCESS) {
++
++ goto function_exit;
++ }
++
++ node->stats_no++;
++
++ thr->run_node = node->stats_def;
++
++ return(thr);
++ } else {
++ node->state = INDEX_CREATE_INDEX_TREE;
++ }
+ }
+
+ if (node->state == INDEX_CREATE_INDEX_TREE) {
+@@ -1178,6 +1312,66 @@
+ return(NULL);
+ }
+
++ thr->run_node = que_node_get_parent(node);
++
++ return(thr);
++}
++
++/****************************************************************//**
++*/
++UNIV_INTERN
++que_thr_t*
++dict_insert_stats_step(
++/*===================*/
++ que_thr_t* thr) /*!< in: query thread */
++{
++ ind_node_t* node;
++ ulint err = DB_ERROR;
++ trx_t* trx;
++
++ ut_ad(thr);
++
++ trx = thr_get_trx(thr);
++
++ node = thr->run_node;
++
++ if (thr->prev_node == que_node_get_parent(node)) {
++ node->state = INDEX_BUILD_STATS_COLS;
++ }
++
++ if (node->state == INDEX_BUILD_STATS_COLS) {
++ if (node->stats_no <= dict_index_get_n_unique(node->index)) {
++
++ err = dict_build_stats_def_step(node);
++
++ if (err != DB_SUCCESS) {
++
++ goto function_exit;
++ }
++
++ node->stats_no++;
++
++ thr->run_node = node->stats_def;
++
++ return(thr);
++ } else {
++ node->state = INDEX_COMMIT_WORK;
++ }
++ }
++
++ if (node->state == INDEX_COMMIT_WORK) {
++
++ /* do not commit transaction here for now */
++ }
++
++function_exit:
++ trx->error_state = err;
++
++ if (err == DB_SUCCESS) {
++ } else {
++ return(NULL);
++ }
++
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+diff -ruN a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
+--- a/storage/innobase/dict/dict0dict.c 2010-12-03 15:48:03.040222428 +0900
++++ b/storage/innobase/dict/dict0dict.c 2010-12-03 17:19:24.841947690 +0900
+@@ -754,7 +754,7 @@
+ print an error message and return without doing
+ anything. */
+ dict_update_statistics(table, TRUE /* only update stats
+- if they have not been initialized */);
++ if they have not been initialized */, FALSE);
+ }
+
+ return(table);
+@@ -4291,6 +4291,240 @@
+ }
+
+ /*********************************************************************//**
++functions to use SYS_STATS system table. */
++static
++ibool
++dict_reload_statistics(
++/*===================*/
++ dict_table_t* table,
++ ulint* sum_of_index_sizes)
++{
++ dict_index_t* index;
++ ulint size;
++ mem_heap_t* heap;
++
++ index = dict_table_get_first_index(table);
++
++ if (index == NULL) {
++ /* Table definition is corrupt */
++
++ return(FALSE);
++ }
++
++ heap = mem_heap_create(1000);
++
++ while (index) {
++ size = btr_get_size(index, BTR_TOTAL_SIZE);
++
++ index->stat_index_size = size;
++
++ *sum_of_index_sizes += size;
++
++ size = btr_get_size(index, BTR_N_LEAF_PAGES);
++
++ if (size == 0) {
++ /* The root node of the tree is a leaf */
++ size = 1;
++ }
++
++ index->stat_n_leaf_pages = size;
++
++/*===========================================*/
++{
++ dict_table_t* sys_stats;
++ dict_index_t* sys_index;
++ btr_pcur_t pcur;
++ dtuple_t* tuple;
++ dfield_t* dfield;
++ ulint key_cols;
++ ulint n_cols;
++ const rec_t* rec;
++ const byte* field;
++ ulint len;
++ ib_int64_t* stat_n_diff_key_vals_tmp;
++ byte* buf;
++ ulint i;
++ mtr_t mtr;
++
++ n_cols = dict_index_get_n_unique(index);
++ stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
++
++ sys_stats = dict_sys->sys_stats;
++ sys_index = UT_LIST_GET_FIRST(sys_stats->indexes);
++ ut_a(!dict_table_is_comp(sys_stats));
++
++ tuple = dtuple_create(heap, 1);
++ dfield = dtuple_get_nth_field(tuple, 0);
++
++ buf = mem_heap_alloc(heap, 8);
++ mach_write_to_8(buf, index->id);
++
++ dfield_set_data(dfield, buf, 8);
++ dict_index_copy_types(tuple, sys_index, 1);
++
++ mtr_start(&mtr);
++
++ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
++ BTR_SEARCH_LEAF, &pcur, &mtr);
++ for (i = 0; i <= n_cols; i++) {
++ rec = btr_pcur_get_rec(&pcur);
++
++ if (!btr_pcur_is_on_user_rec(&pcur)
++ || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len))
++ != index->id) {
++ /* not found: even 1 if not found should not be alowed */
++ fprintf(stderr, "InnoDB: Warning: stats for %s/%s (%lu/%lu)"
++ " not fonund in SYS_STATS\n",
++ index->table_name, index->name, i, n_cols);
++ btr_pcur_close(&pcur);
++ mtr_commit(&mtr);
++ mem_heap_free(heap);
++ return(FALSE);
++ }
++
++ if (rec_get_deleted_flag(rec, 0)) {
++ goto next_rec;
++ }
++
++ field = rec_get_nth_field_old(rec, 1, &len);
++ ut_a(len == 4);
++
++ key_cols = mach_read_from_4(field);
++
++ ut_a(i == key_cols);
++
++ field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len);
++ ut_a(len == 8);
++
++ stat_n_diff_key_vals_tmp[i] = mach_read_from_8(field);
++next_rec:
++ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
++ }
++
++ btr_pcur_close(&pcur);
++ mtr_commit(&mtr);
++
++ for (i = 0; i <= n_cols; i++) {
++ index->stat_n_diff_key_vals[i] = stat_n_diff_key_vals_tmp[i];
++ }
++}
++/*===========================================*/
++
++ index = dict_table_get_next_index(index);
++ }
++
++ mem_heap_free(heap);
++ return(TRUE);
++}
++
++static
++void
++dict_store_statistics(
++/*==================*/
++ dict_table_t* table)
++{
++ dict_index_t* index;
++ mem_heap_t* heap;
++
++ index = dict_table_get_first_index(table);
++
++ ut_a(index);
++
++ heap = mem_heap_create(1000);
++
++ while (index) {
++/*===========================================*/
++{
++ dict_table_t* sys_stats;
++ dict_index_t* sys_index;
++ btr_pcur_t pcur;
++ dtuple_t* tuple;
++ dfield_t* dfield;
++ ulint key_cols;
++ ulint n_cols;
++ ulint rests;
++ const rec_t* rec;
++ const byte* field;
++ ulint len;
++ ib_int64_t* stat_n_diff_key_vals_tmp;
++ byte* buf;
++ ulint i;
++ mtr_t mtr;
++
++ n_cols = dict_index_get_n_unique(index);
++ stat_n_diff_key_vals_tmp = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
++
++ for (i = 0; i <= n_cols; i++) {
++ stat_n_diff_key_vals_tmp[i] = index->stat_n_diff_key_vals[i];
++ }
++
++ sys_stats = dict_sys->sys_stats;
++ sys_index = UT_LIST_GET_FIRST(sys_stats->indexes);
++ ut_a(!dict_table_is_comp(sys_stats));
++
++ tuple = dtuple_create(heap, 1);
++ dfield = dtuple_get_nth_field(tuple, 0);
++
++ buf = mem_heap_alloc(heap, 8);
++ mach_write_to_8(buf, index->id);
++
++ dfield_set_data(dfield, buf, 8);
++ dict_index_copy_types(tuple, sys_index, 1);
++
++ mtr_start(&mtr);
++
++ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
++ BTR_MODIFY_LEAF, &pcur, &mtr);
++ rests = n_cols + 1;
++ for (i = 0; i <= n_cols; i++) {
++ rec = btr_pcur_get_rec(&pcur);
++
++ if (!btr_pcur_is_on_user_rec(&pcur)
++ || mach_read_from_8(rec_get_nth_field_old(rec, 0, &len))
++ != index->id) {
++ /* not found */
++ btr_pcur_close(&pcur);
++ mtr_commit(&mtr);
++ break;
++ }
++
++ if (rec_get_deleted_flag(rec, 0)) {
++ goto next_rec;
++ }
++
++ field = rec_get_nth_field_old(rec, 1, &len);
++ ut_a(len == 4);
++
++ key_cols = mach_read_from_4(field);
++
++ field = rec_get_nth_field_old(rec, DICT_SYS_STATS_DIFF_VALS_FIELD, &len);
++ ut_a(len == 8);
++
++ mlog_write_ull((byte*)field, stat_n_diff_key_vals_tmp[key_cols], &mtr);
++
++ rests--;
++
++next_rec:
++ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
++ }
++ btr_pcur_close(&pcur);
++ mtr_commit(&mtr);
++
++ if (rests) {
++ fprintf(stderr, "InnoDB: Warning: failed to store %lu stats entries"
++ " of %s/%s to SYS_STATS system table.\n",
++ rests, index->table_name, index->name);
++ }
++}
++/*===========================================*/
++
++ index = dict_table_get_next_index(index);
++ }
++
++ mem_heap_free(heap);
++}
++
++/*********************************************************************//**
+ Calculates new estimates for table and index statistics. The statistics
+ are used in query optimization. */
+ UNIV_INTERN
+@@ -4298,10 +4532,11 @@
+ dict_update_statistics(
+ /*===================*/
+ dict_table_t* table, /*!< in/out: table */
+- ibool only_calc_if_missing_stats)/*!< in: only
++ ibool only_calc_if_missing_stats,/*!< in: only
+ update/recalc the stats if they have
+ not been initialized yet, otherwise
+ do nothing */
++ ibool sync) /*!< in: TRUE if must update SYS_STATS */
+ {
+ dict_index_t* index;
+ ulint sum_of_index_sizes = 0;
+@@ -4318,6 +4553,27 @@
+ return;
+ }
+
++ if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) && !sync) {
++ dict_table_stats_lock(table, RW_X_LATCH);
++
++ /* reload statistics from SYS_STATS table */
++ if (dict_reload_statistics(table, &sum_of_index_sizes)) {
++ /* success */
++#ifdef UNIV_DEBUG
++ fprintf(stderr, "InnoDB: DEBUG: reload_statistics is scceeded for %s.\n",
++ table->name);
++#endif
++ goto end;
++ }
++
++ dict_table_stats_unlock(table, RW_X_LATCH);
++ }
++#ifdef UNIV_DEBUG
++ fprintf(stderr, "InnoDB: DEBUG: update_statistics for %s.\n",
++ table->name);
++#endif
++ sum_of_index_sizes = 0;
++
+ /* Find out the sizes of the indexes and how many different values
+ for the key they approximately have */
+
+@@ -4378,6 +4634,11 @@
+ index = dict_table_get_next_index(index);
+ } while (index);
+
++ if (srv_use_sys_stats_table && !((table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)) {
++ /* store statistics to SYS_STATS table */
++ dict_store_statistics(table);
++ }
++end:
+ index = dict_table_get_first_index(table);
+
+ table->stat_n_rows = index->stat_n_diff_key_vals[
+@@ -4472,7 +4733,8 @@
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+- dict_update_statistics(table, FALSE /* update even if initialized */);
++ if (srv_stats_auto_update)
++ dict_update_statistics(table, FALSE /* update even if initialized */, FALSE);
+
+ dict_table_stats_lock(table, RW_S_LATCH);
+
+diff -ruN a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c
+--- a/storage/innobase/dict/dict0load.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/dict/dict0load.c 2010-12-03 17:19:24.845947460 +0900
+@@ -49,7 +49,8 @@
+ "SYS_COLUMNS",
+ "SYS_FIELDS",
+ "SYS_FOREIGN",
+- "SYS_FOREIGN_COLS"
++ "SYS_FOREIGN_COLS",
++ "SYS_STATS"
+ };
+ /****************************************************************//**
+ Compare the name of an index column.
+@@ -342,12 +343,13 @@
+ }
+
+ if ((status & DICT_TABLE_UPDATE_STATS)
++ && srv_stats_auto_update
+ && dict_table_get_first_index(*table)) {
+
+ /* Update statistics if DICT_TABLE_UPDATE_STATS
+ is set */
+ dict_update_statistics(*table, FALSE /* update even if
+- initialized */);
++ initialized */, FALSE);
+ }
+
+ return(NULL);
+@@ -565,6 +567,61 @@
+
+ return(NULL);
+ }
++/********************************************************************//**
++This function parses a SYS_STATS record and extract necessary
++information from the record and return to caller.
++@return error message, or NULL on success */
++UNIV_INTERN
++const char*
++dict_process_sys_stats_rec(
++/*=============================*/
++ mem_heap_t* heap, /*!< in/out: heap memory */
++ const rec_t* rec, /*!< in: current SYS_STATS rec */
++ index_id_t* index_id, /*!< out: INDEX_ID */
++ ulint* key_cols, /*!< out: KEY_COLS */
++ ib_uint64_t* diff_vals) /*!< out: DIFF_VALS */
++{
++ ulint len;
++ const byte* field;
++
++ if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
++ return("delete-marked record in SYS_STATS");
++ }
++
++ if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) != 5)) {
++ return("wrong number of columns in SYS_STATS record");
++ }
++
++ field = rec_get_nth_field_old(rec, 0/*INDEX_ID*/, &len);
++ if (UNIV_UNLIKELY(len != 8)) {
++err_len:
++ return("incorrect column length in SYS_STATS");
++ }
++ *index_id = mach_read_from_8(field);
++
++ field = rec_get_nth_field_old(rec, 1/*KEY_COLS*/, &len);
++ if (UNIV_UNLIKELY(len != 4)) {
++ goto err_len;
++ }
++ *key_cols = mach_read_from_4(field);
++
++ rec_get_nth_field_offs_old(rec, 2/*DB_TRX_ID*/, &len);
++ if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) {
++ goto err_len;
++ }
++ rec_get_nth_field_offs_old(rec, 3/*DB_ROLL_PTR*/, &len);
++ if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) {
++ goto err_len;
++ }
++
++ field = rec_get_nth_field_old(rec, 4/*DIFF_VALS*/, &len);
++ if (UNIV_UNLIKELY(len != 8)) {
++ goto err_len;
++ }
++ *diff_vals = mach_read_from_8(field);
++
++ return(NULL);
++}
+ /********************************************************************//**
+ Determine the flags of a table described in SYS_TABLES.
+ @return compressed page size in kilobytes; or 0 if the tablespace is
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:17:03.665960357 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 17:22:21.586939783 +0900
+@@ -187,6 +187,7 @@
+ static my_bool innobase_rollback_on_timeout = FALSE;
+ static my_bool innobase_create_status_file = FALSE;
+ static my_bool innobase_stats_on_metadata = TRUE;
++static my_bool innobase_use_sys_stats_table = FALSE;
+
+
+ static char* internal_innobase_data_file_path = NULL;
+@@ -2387,6 +2388,8 @@
+ goto error;
+ }
+
++ srv_use_sys_stats_table = (ibool) innobase_use_sys_stats_table;
++
+ /* -------------- Log files ---------------------------*/
+
+ /* The default dir for log files is the datadir of MySQL */
+@@ -5190,6 +5193,10 @@
+
+ error = row_insert_for_mysql((byte*) record, prebuilt);
+
++#ifdef EXTENDED_FOR_USERSTAT
++ if (error == DB_SUCCESS) rows_changed++;
++#endif
++
+ /* Handle duplicate key errors */
+ if (auto_inc_used) {
+ ulint err;
+@@ -5526,6 +5533,10 @@
+ }
+ }
+
++#ifdef EXTENDED_FOR_USERSTAT
++ if (error == DB_SUCCESS) rows_changed++;
++#endif
++
+ innodb_srv_conc_exit_innodb(trx);
+
+ error = convert_error_code_to_mysql(error,
+@@ -5579,6 +5590,10 @@
+
+ error = row_update_for_mysql((byte*) record, prebuilt);
+
++#ifdef EXTENDED_FOR_USERSTAT
++ if (error == DB_SUCCESS) rows_changed++;
++#endif
++
+ innodb_srv_conc_exit_innodb(trx);
+
+ error = convert_error_code_to_mysql(
+@@ -6106,6 +6121,11 @@
+ case DB_SUCCESS:
+ error = 0;
+ table->status = 0;
++#ifdef EXTENDED_FOR_USERSTAT
++ rows_read++;
++ if (active_index >= 0 && active_index < MAX_KEY)
++ index_rows_read[active_index]++;
++#endif
+ break;
+ case DB_RECORD_NOT_FOUND:
+ error = HA_ERR_END_OF_FILE;
+@@ -8000,11 +8020,31 @@
+ /* In sql_show we call with this flag: update
+ then statistics so that they are up-to-date */
+
++ if (srv_use_sys_stats_table && !((ib_table->flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY)
++ && called_from_analyze) {
++ /* If the indexes on the table don't have enough rows in SYS_STATS system table, */
++ /* they need to be created. */
++ dict_index_t* index;
++
++ prebuilt->trx->op_info = "confirming rows of SYS_STATS to store statistics";
++
++ ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED);
++
++ for (index = dict_table_get_first_index(ib_table);
++ index != NULL;
++ index = dict_table_get_next_index(index)) {
++ row_insert_stats_for_mysql(index, prebuilt->trx);
++ innobase_commit_low(prebuilt->trx);
++ }
++
++ ut_a(prebuilt->trx->conc_state == TRX_NOT_STARTED);
++ }
++
+ prebuilt->trx->op_info = "updating table statistics";
+
+ dict_update_statistics(ib_table,
+ FALSE /* update even if stats
+- are initialized */);
++ are initialized */, called_from_analyze);
+
+ prebuilt->trx->op_info = "returning various info to MySQL";
+ }
+@@ -8082,7 +8122,7 @@
+ are asked by MySQL to avoid locking. Another reason to
+ avoid the call is that it uses quite a lot of CPU.
+ See Bug#38185. */
+- if (flag & HA_STATUS_NO_LOCK) {
++ if (flag & HA_STATUS_NO_LOCK || !srv_stats_update_need_lock) {
+ /* We do not update delete_length if no
+ locking is requested so the "old" value can
+ remain. delete_length is initialized to 0 in
+@@ -11283,6 +11323,45 @@
+ "The number of index pages to sample when calculating statistics (default 8)",
+ NULL, NULL, 8, 1, ~0ULL, 0);
+
++const char *innobase_stats_method_names[]=
++{
++ "nulls_equal",
++ "nulls_unequal",
++ "nulls_ignored",
++ NullS
++};
++TYPELIB innobase_stats_method_typelib=
++{
++ array_elements(innobase_stats_method_names) - 1, "innobase_stats_method_typelib",
++ innobase_stats_method_names, NULL
++};
++static MYSQL_SYSVAR_ENUM(stats_method, srv_stats_method,
++ PLUGIN_VAR_RQCMDARG,
++ "Specifies how InnoDB index statistics collection code should threat NULLs. "
++ "Possible values of name are same to for 'myisam_stats_method'. "
++ "This is startup parameter.",
++ NULL, NULL, 0, &innobase_stats_method_typelib);
++
++static MYSQL_SYSVAR_ULONG(stats_auto_update, srv_stats_auto_update,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable InnoDB's auto update statistics of indexes. "
++ "(except for ANALYZE TABLE command) 0:disable 1:enable",
++ NULL, NULL, 1, 0, 1, 0);
++
++static MYSQL_SYSVAR_ULONG(stats_update_need_lock, srv_stats_update_need_lock,
++ PLUGIN_VAR_RQCMDARG,
++ "Enable/Disable InnoDB's update statistics which needs to lock dictionary. "
++ "e.g. Data_free.",
++ NULL, NULL, 1, 0, 1, 0);
++
++static MYSQL_SYSVAR_BOOL(use_sys_stats_table, innobase_use_sys_stats_table,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "Enable to use SYS_STATS system table to store statistics statically, "
++ "And avoids to calculate statistics at every first open of the tables. "
++ "This option may make the opportunities of update statistics less. "
++ "So you should use ANALYZE TABLE command intentionally.",
++ NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable InnoDB adaptive hash index (enabled by default). "
+@@ -11611,6 +11690,10 @@
+ MYSQL_SYSVAR(overwrite_relay_log_info),
+ MYSQL_SYSVAR(rollback_on_timeout),
+ MYSQL_SYSVAR(stats_on_metadata),
++ MYSQL_SYSVAR(stats_method),
++ MYSQL_SYSVAR(stats_auto_update),
++ MYSQL_SYSVAR(stats_update_need_lock),
++ MYSQL_SYSVAR(use_sys_stats_table),
+ MYSQL_SYSVAR(stats_sample_pages),
+ MYSQL_SYSVAR(adaptive_hash_index),
+ MYSQL_SYSVAR(replication_delay),
+@@ -11680,7 +11763,10 @@
+ i_s_innodb_sys_columns,
+ i_s_innodb_sys_fields,
+ i_s_innodb_sys_foreign,
+-i_s_innodb_sys_foreign_cols
++i_s_innodb_sys_foreign_cols,
++i_s_innodb_sys_stats,
++i_s_innodb_table_stats,
++i_s_innodb_index_stats
+ mysql_declare_plugin_end;
+
+ /** @brief Initialize the default value of innodb_commit_concurrency.
+diff -ruN a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
+--- a/storage/innobase/handler/i_s.cc 2010-12-03 17:17:03.666956117 +0900
++++ b/storage/innobase/handler/i_s.cc 2010-12-03 17:19:24.880964526 +0900
+@@ -49,6 +49,7 @@
+ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
+ #include "trx0rseg.h" /* for trx_rseg_struct */
+ #include "trx0sys.h" /* for trx_sys */
++#include "dict0dict.h" /* for dict_sys */
+ }
+
+ static const char plugin_author[] = "Innobase Oy";
+@@ -3458,6 +3459,203 @@
+ STRUCT_FLD(__reserved1, NULL)
+ };
+
++/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_stats */
++static ST_FIELD_INFO innodb_sys_stats_fields_info[] =
++{
++#define SYS_STATS_INDEX_ID 0
++ {STRUCT_FLD(field_name, "INDEX_ID"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_STATS_KEY_COLS 1
++ {STRUCT_FLD(field_name, "KEY_COLS"),
++ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++#define SYS_STATS_DIFF_VALS 2
++ {STRUCT_FLD(field_name, "DIFF_VALS"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++/**********************************************************************//**
++Function to fill information_schema.innodb_sys_stats
++@return 0 on success */
++static
++int
++i_s_dict_fill_sys_stats(
++/*====================*/
++ THD* thd, /*!< in: thread */
++ index_id_t index_id, /*!< in: INDEX_ID */
++ ulint key_cols, /*!< in: KEY_COLS */
++ ib_uint64_t diff_vals, /*!< in: DIFF_VALS */
++ TABLE* table_to_fill) /*!< in/out: fill this table */
++{
++ Field** fields;
++
++ DBUG_ENTER("i_s_dict_fill_sys_stats");
++
++ fields = table_to_fill->field;
++
++ OK(fields[SYS_STATS_INDEX_ID]->store(longlong(index_id), TRUE));
++
++ OK(fields[SYS_STATS_KEY_COLS]->store(key_cols));
++
++ OK(fields[SYS_STATS_DIFF_VALS]->store(longlong(diff_vals), TRUE));
++
++ OK(schema_table_store_record(thd, table_to_fill));
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Function to populate INFORMATION_SCHEMA.innodb_sys_stats table.
++@return 0 on success */
++static
++int
++i_s_sys_stats_fill_table(
++/*=====================*/
++ THD* thd, /*!< in: thread */
++ TABLE_LIST* tables, /*!< in/out: tables to fill */
++ COND* cond) /*!< in: condition (not used) */
++{
++ btr_pcur_t pcur;
++ const rec_t* rec;
++ mem_heap_t* heap;
++ mtr_t mtr;
++
++ DBUG_ENTER("i_s_sys_stats_fill_table");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++ DBUG_RETURN(0);
++ }
++
++ heap = mem_heap_create(1000);
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++
++ rec = dict_startscan_system(&pcur, &mtr, SYS_STATS);
++
++ while (rec) {
++ const char* err_msg;
++ index_id_t index_id;
++ ulint key_cols;
++ ib_uint64_t diff_vals;
++
++ /* Extract necessary information from a SYS_FOREIGN_COLS row */
++ err_msg = dict_process_sys_stats_rec(
++ heap, rec, &index_id, &key_cols, &diff_vals);
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++
++ if (!err_msg) {
++ i_s_dict_fill_sys_stats(
++ thd, index_id, key_cols, diff_vals,
++ tables->table);
++ } else {
++ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
++ ER_CANT_FIND_SYSTEM_REC,
++ err_msg);
++ }
++
++ mem_heap_empty(heap);
++
++ /* Get the next record */
++ mutex_enter(&dict_sys->mutex);
++ mtr_start(&mtr);
++ rec = dict_getnext_system(&pcur, &mtr);
++ }
++
++ mtr_commit(&mtr);
++ mutex_exit(&dict_sys->mutex);
++ mem_heap_free(heap);
++
++ DBUG_RETURN(0);
++}
++/*******************************************************************//**
++Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_stats
++@return 0 on success */
++static
++int
++innodb_sys_stats_init(
++/*========================*/
++ void* p) /*!< in/out: table schema object */
++{
++ ST_SCHEMA_TABLE* schema;
++
++ DBUG_ENTER("innodb_sys_stats_init");
++
++ schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = innodb_sys_stats_fields_info;
++ schema->fill_table = i_s_sys_stats_fill_table;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_stats =
++{
++ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
++ /* int */
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++
++ /* pointer to type-specific plugin descriptor */
++ /* void* */
++ STRUCT_FLD(info, &i_s_info),
++
++ /* plugin name */
++ /* const char* */
++ STRUCT_FLD(name, "INNODB_SYS_STATS"),
++
++ /* plugin author (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(author, plugin_author),
++
++ /* general descriptive text (for SHOW PLUGINS) */
++ /* const char* */
++ STRUCT_FLD(descr, "XtraDB SYS_STATS table"),
++
++ /* the plugin license (PLUGIN_LICENSE_XXX) */
++ /* int */
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++
++ /* the function to invoke when plugin is loaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(init, innodb_sys_stats_init),
++
++ /* the function to invoke when plugin is unloaded */
++ /* int (*)(void*); */
++ STRUCT_FLD(deinit, i_s_common_deinit),
++
++ /* plugin version (for SHOW PLUGINS) */
++ /* unsigned int */
++ STRUCT_FLD(version, INNODB_VERSION_SHORT),
++
++ /* struct st_mysql_show_var* */
++ STRUCT_FLD(status_vars, NULL),
++
++ /* struct st_mysql_sys_var** */
++ STRUCT_FLD(system_vars, NULL),
++
++ /* reserved for dependency checking */
++ /* void* */
++ STRUCT_FLD(__reserved1, NULL)
++};
++
+ /***********************************************************************
+ */
+ static ST_FIELD_INFO i_s_innodb_rseg_fields_info[] =
+@@ -3620,3 +3818,347 @@
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL)
+ };
++
++/***********************************************************************
++*/
++static ST_FIELD_INFO i_s_innodb_table_stats_info[] =
++{
++ {STRUCT_FLD(field_name, "table_schema"),
++ STRUCT_FLD(field_length, NAME_LEN),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "table_name"),
++ STRUCT_FLD(field_length, NAME_LEN),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "rows"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "clust_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "other_size"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "modified"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static ST_FIELD_INFO i_s_innodb_index_stats_info[] =
++{
++ {STRUCT_FLD(field_name, "table_schema"),
++ STRUCT_FLD(field_length, NAME_LEN),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "table_name"),
++ STRUCT_FLD(field_length, NAME_LEN),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "index_name"),
++ STRUCT_FLD(field_length, NAME_LEN),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "fields"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "rows_per_key"),
++ STRUCT_FLD(field_length, 256),
++ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, 0),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "index_total_pages"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ {STRUCT_FLD(field_name, "index_leaf_pages"),
++ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
++ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
++ STRUCT_FLD(value, 0),
++ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
++ STRUCT_FLD(old_name, ""),
++ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
++
++ END_OF_ST_FIELD_INFO
++};
++
++static
++int
++i_s_innodb_table_stats_fill(
++/*========================*/
++ THD* thd,
++ TABLE_LIST* tables,
++ COND* cond)
++{
++ TABLE* i_s_table = (TABLE *) tables->table;
++ int status = 0;
++ dict_table_t* table;
++
++ DBUG_ENTER("i_s_innodb_table_stats_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++ DBUG_RETURN(0);
++ }
++
++ mutex_enter(&(dict_sys->mutex));
++
++ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
++
++ while (table) {
++ char buf[NAME_LEN * 2 + 2];
++ char* ptr;
++
++ if (table->stat_clustered_index_size == 0) {
++ table = UT_LIST_GET_NEXT(table_LRU, table);
++ continue;
++ }
++
++ buf[NAME_LEN * 2 + 1] = 0;
++ strncpy(buf, table->name, NAME_LEN * 2 + 1);
++ ptr = strchr(buf, '/');
++ if (ptr) {
++ *ptr = '\0';
++ ++ptr;
++ } else {
++ ptr = buf;
++ }
++
++ field_store_string(i_s_table->field[0], buf);
++ field_store_string(i_s_table->field[1], ptr);
++ i_s_table->field[2]->store(table->stat_n_rows);
++ i_s_table->field[3]->store(table->stat_clustered_index_size);
++ i_s_table->field[4]->store(table->stat_sum_of_other_index_sizes);
++ i_s_table->field[5]->store(table->stat_modified_counter);
++
++ if (schema_table_store_record(thd, i_s_table)) {
++ status = 1;
++ break;
++ }
++
++ table = UT_LIST_GET_NEXT(table_LRU, table);
++ }
++
++ mutex_exit(&(dict_sys->mutex));
++
++ DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_index_stats_fill(
++/*========================*/
++ THD* thd,
++ TABLE_LIST* tables,
++ COND* cond)
++{
++ TABLE* i_s_table = (TABLE *) tables->table;
++ int status = 0;
++ dict_table_t* table;
++ dict_index_t* index;
++
++ DBUG_ENTER("i_s_innodb_index_stats_fill");
++
++ /* deny access to non-superusers */
++ if (check_global_access(thd, PROCESS_ACL)) {
++ DBUG_RETURN(0);
++ }
++
++ mutex_enter(&(dict_sys->mutex));
++
++ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
++
++ while (table) {
++ if (table->stat_clustered_index_size == 0) {
++ table = UT_LIST_GET_NEXT(table_LRU, table);
++ continue;
++ }
++
++ ib_int64_t n_rows = table->stat_n_rows;
++
++ if (n_rows < 0) {
++ n_rows = 0;
++ }
++
++ index = dict_table_get_first_index(table);
++
++ while (index) {
++ char buff[256+1];
++ char row_per_keys[256+1];
++ char buf[NAME_LEN * 2 + 2];
++ char* ptr;
++ ulint i;
++
++ buf[NAME_LEN * 2 + 1] = 0;
++ strncpy(buf, table->name, NAME_LEN * 2 + 1);
++ ptr = strchr(buf, '/');
++ if (ptr) {
++ *ptr = '\0';
++ ++ptr;
++ } else {
++ ptr = buf;
++ }
++
++ field_store_string(i_s_table->field[0], buf);
++ field_store_string(i_s_table->field[1], ptr);
++ field_store_string(i_s_table->field[2], index->name);
++ i_s_table->field[3]->store(index->n_uniq);
++
++ row_per_keys[0] = '\0';
++
++ /* It is remained optimistic operation still for now */
++ //dict_index_stat_mutex_enter(index);
++ if (index->stat_n_diff_key_vals) {
++ for (i = 1; i <= index->n_uniq; i++) {
++ ib_int64_t rec_per_key;
++ if (index->stat_n_diff_key_vals[i]) {
++ rec_per_key = n_rows / index->stat_n_diff_key_vals[i];
++ } else {
++ rec_per_key = n_rows;
++ }
++ ut_snprintf(buff, 256, (i == index->n_uniq)?"%llu":"%llu, ",
++ rec_per_key);
++ strncat(row_per_keys, buff, 256 - strlen(row_per_keys));
++ }
++ }
++ //dict_index_stat_mutex_exit(index);
++
++ field_store_string(i_s_table->field[4], row_per_keys);
++
++ i_s_table->field[5]->store(index->stat_index_size);
++ i_s_table->field[6]->store(index->stat_n_leaf_pages);
++
++ if (schema_table_store_record(thd, i_s_table)) {
++ status = 1;
++ break;
++ }
++
++ index = dict_table_get_next_index(index);
++ }
++
++ if (status == 1) {
++ break;
++ }
++
++ table = UT_LIST_GET_NEXT(table_LRU, table);
++ }
++
++ mutex_exit(&(dict_sys->mutex));
++
++ DBUG_RETURN(status);
++}
++
++static
++int
++i_s_innodb_table_stats_init(
++/*========================*/
++ void* p)
++{
++ DBUG_ENTER("i_s_innodb_table_stats_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_table_stats_info;
++ schema->fill_table = i_s_innodb_table_stats_fill;
++
++ DBUG_RETURN(0);
++}
++
++static
++int
++i_s_innodb_index_stats_init(
++/*========================*/
++ void* p)
++{
++ DBUG_ENTER("i_s_innodb_index_stats_init");
++ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
++
++ schema->fields_info = i_s_innodb_index_stats_info;
++ schema->fill_table = i_s_innodb_index_stats_fill;
++
++ DBUG_RETURN(0);
++}
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_table_stats =
++{
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++ STRUCT_FLD(info, &i_s_info),
++ STRUCT_FLD(name, "INNODB_TABLE_STATS"),
++ STRUCT_FLD(author, plugin_author),
++ STRUCT_FLD(descr, "InnoDB table statistics in memory"),
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++ STRUCT_FLD(init, i_s_innodb_table_stats_init),
++ STRUCT_FLD(deinit, i_s_common_deinit),
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++ STRUCT_FLD(status_vars, NULL),
++ STRUCT_FLD(system_vars, NULL),
++ STRUCT_FLD(__reserved1, NULL)
++};
++
++UNIV_INTERN struct st_mysql_plugin i_s_innodb_index_stats =
++{
++ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
++ STRUCT_FLD(info, &i_s_info),
++ STRUCT_FLD(name, "INNODB_INDEX_STATS"),
++ STRUCT_FLD(author, plugin_author),
++ STRUCT_FLD(descr, "InnoDB index statistics in memory"),
++ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
++ STRUCT_FLD(init, i_s_innodb_index_stats_init),
++ STRUCT_FLD(deinit, i_s_common_deinit),
++ STRUCT_FLD(version, 0x0100 /* 1.0 */),
++ STRUCT_FLD(status_vars, NULL),
++ STRUCT_FLD(system_vars, NULL),
++ STRUCT_FLD(__reserved1, NULL)
++};
+diff -ruN a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
+--- a/storage/innobase/handler/i_s.h 2010-12-03 17:17:03.668953884 +0900
++++ b/storage/innobase/handler/i_s.h 2010-12-03 17:19:24.882947826 +0900
+@@ -41,5 +41,8 @@
+ extern struct st_mysql_plugin i_s_innodb_sys_foreign;
+ extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols;
+ extern struct st_mysql_plugin i_s_innodb_rseg;
++extern struct st_mysql_plugin i_s_innodb_sys_stats;
++extern struct st_mysql_plugin i_s_innodb_table_stats;
++extern struct st_mysql_plugin i_s_innodb_index_stats;
+
+ #endif /* i_s_h */
+diff -ruN a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
+--- a/storage/innobase/include/dict0boot.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0boot.h 2010-12-03 17:19:24.885947372 +0900
+@@ -104,6 +104,7 @@
+ #define DICT_COLUMNS_ID 2
+ #define DICT_INDEXES_ID 3
+ #define DICT_FIELDS_ID 4
++#define DICT_STATS_ID 6
+ /* The following is a secondary index on SYS_TABLES */
+ #define DICT_TABLE_IDS_ID 5
+
+@@ -131,10 +132,13 @@
+ #define DICT_HDR_INDEXES 44 /* Root of the index index tree */
+ #define DICT_HDR_FIELDS 48 /* Root of the index field
+ index tree */
++#define DICT_HDR_STATS 52 /* Root of the stats tree */
+
+ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
+ segment into which the dictionary
+ header is created */
++
++#define DICT_HDR_XTRADB_MARK 256 /* Flag to distinguish expansion of XtraDB */
+ /*-------------------------------------------------------------*/
+
+ /* The field number of the page number field in the sys_indexes table
+@@ -144,11 +148,15 @@
+ #define DICT_SYS_INDEXES_TYPE_FIELD 6
+ #define DICT_SYS_INDEXES_NAME_FIELD 4
+
++#define DICT_SYS_STATS_DIFF_VALS_FIELD 4
++
+ /* When a row id which is zero modulo this number (which must be a power of
+ two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
+ updated */
+ #define DICT_HDR_ROW_ID_WRITE_MARGIN 256
+
++#define DICT_HDR_XTRADB_FLAG 0x5854524144425F31ULL /* "XTRADB_1" */
++
+ #ifndef UNIV_NONINL
+ #include "dict0boot.ic"
+ #endif
+diff -ruN a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
+--- a/storage/innobase/include/dict0crea.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0crea.h 2010-12-03 17:19:24.886949643 +0900
+@@ -53,6 +53,14 @@
+ dict_index_t* index, /*!< in: index to create, built as a memory data
+ structure */
+ mem_heap_t* heap); /*!< in: heap where created */
++/*********************************************************************//**
++*/
++UNIV_INTERN
++ind_node_t*
++ind_insert_stats_graph_create(
++/*==========================*/
++ dict_index_t* index,
++ mem_heap_t* heap);
+ /***********************************************************//**
+ Creates a table. This is a high-level function used in SQL execution graphs.
+ @return query thread to run next or NULL */
+@@ -62,6 +70,13 @@
+ /*===================*/
+ que_thr_t* thr); /*!< in: query thread */
+ /***********************************************************//**
++*/
++UNIV_INTERN
++que_thr_t*
++dict_insert_stats_step(
++/*===================*/
++ que_thr_t* thr);
++/***********************************************************//**
+ Creates an index. This is a high-level function used in SQL execution
+ graphs.
+ @return query thread to run next or NULL */
+@@ -170,6 +185,7 @@
+ ins_node_t* field_def; /* child node which does the inserts of
+ the field definitions; the row to be inserted
+ is built by the parent node */
++ ins_node_t* stats_def;
+ commit_node_t* commit_node;
+ /* child node which performs a commit after
+ a successful index creation */
+@@ -180,6 +196,7 @@
+ dict_table_t* table; /*!< table which owns the index */
+ dtuple_t* ind_row;/* index definition row built */
+ ulint field_no;/* next field definition to insert */
++ ulint stats_no;
+ mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
+ };
+
+@@ -189,6 +206,7 @@
+ #define INDEX_CREATE_INDEX_TREE 3
+ #define INDEX_COMMIT_WORK 4
+ #define INDEX_ADD_TO_CACHE 5
++#define INDEX_BUILD_STATS_COLS 6
+
+ #ifndef UNIV_NONINL
+ #include "dict0crea.ic"
+diff -ruN a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
+--- a/storage/innobase/include/dict0dict.h 2010-12-03 15:48:03.073024387 +0900
++++ b/storage/innobase/include/dict0dict.h 2010-12-03 17:19:24.888965622 +0900
+@@ -1084,10 +1084,11 @@
+ dict_update_statistics(
+ /*===================*/
+ dict_table_t* table, /*!< in/out: table */
+- ibool only_calc_if_missing_stats);/*!< in: only
++ ibool only_calc_if_missing_stats, /*!< in: only
+ update/recalc the stats if they have
+ not been initialized yet, otherwise
+ do nothing */
++ ibool sync);
+ /********************************************************************//**
+ Reserves the dictionary system mutex for MySQL. */
+ UNIV_INTERN
+@@ -1202,6 +1203,7 @@
+ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
+ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
+ dict_table_t* sys_fields; /*!< SYS_FIELDS table */
++ dict_table_t* sys_stats; /*!< SYS_STATS table */
+ };
+ #endif /* !UNIV_HOTBACKUP */
+
+diff -ruN a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
+--- a/storage/innobase/include/dict0load.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/dict0load.h 2010-12-03 17:19:24.889947481 +0900
+@@ -41,6 +41,7 @@
+ SYS_FIELDS,
+ SYS_FOREIGN,
+ SYS_FOREIGN_COLS,
++ SYS_STATS,
+
+ /* This must be last item. Defines the number of system tables. */
+ SYS_NUM_SYSTEM_TABLES
+@@ -319,6 +320,19 @@
+ const char** ref_col_name, /*!< out: referenced column name
+ in referenced table */
+ ulint* pos); /*!< out: column position */
++/********************************************************************//**
++This function parses a SYS_STATS record and extract necessary
++information from the record and return to caller.
++@return error message, or NULL on success */
++UNIV_INTERN
++const char*
++dict_process_sys_stats_rec(
++/*=============================*/
++ mem_heap_t* heap, /*!< in/out: heap memory */
++ const rec_t* rec, /*!< in: current SYS_STATS rec */
++ index_id_t* index_id, /*!< out: INDEX_ID */
++ ulint* key_cols, /*!< out: KEY_COLS */
++ ib_uint64_t* diff_vals); /*!< out: DIFF_VALS */
+ #ifndef UNIV_NONINL
+ #include "dict0load.ic"
+ #endif
+diff -ruN a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
+--- a/storage/innobase/include/page0cur.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/page0cur.h 2010-12-03 17:19:24.891954511 +0900
+@@ -293,6 +293,22 @@
+ /*==========================*/
+ buf_block_t* block, /*!< in: page */
+ page_cur_t* cursor);/*!< out: page cursor */
++
++UNIV_INTERN
++void
++page_cur_open_on_nth_user_rec(
++/*==========================*/
++ buf_block_t* block, /*!< in: page */
++ page_cur_t* cursor, /*!< out: page cursor */
++ ulint nth);
++
++UNIV_INTERN
++ibool
++page_cur_open_on_rnd_user_rec_after_nth(
++/*==========================*/
++ buf_block_t* block, /*!< in: page */
++ page_cur_t* cursor, /*!< out: page cursor */
++ ulint nth);
+ #endif /* !UNIV_HOTBACKUP */
+ /***********************************************************//**
+ Parses a log record of a record insert on a page.
+diff -ruN a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
+--- a/storage/innobase/include/que0que.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/que0que.h 2010-12-03 17:19:24.892947946 +0900
+@@ -492,6 +492,8 @@
+ #define QUE_NODE_CALL 31
+ #define QUE_NODE_EXIT 32
+
++#define QUE_NODE_INSERT_STATS 34
++
+ /* Query thread states */
+ #define QUE_THR_RUNNING 1
+ #define QUE_THR_PROCEDURE_WAIT 2
+diff -ruN a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
+--- a/storage/innobase/include/rem0cmp.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/rem0cmp.h 2010-12-03 17:19:24.893953395 +0900
+@@ -169,10 +169,11 @@
+ matched fields; when the function returns,
+ contains the value the for current
+ comparison */
+- ulint* matched_bytes);/*!< in/out: number of already matched
++ ulint* matched_bytes, /*!< in/out: number of already matched
+ bytes within the first field not completely
+ matched; when the function returns, contains
+ the value for the current comparison */
++ ulint stats_method);
+ /*************************************************************//**
+ This function is used to compare two physical records. Only the common
+ first fields are compared.
+diff -ruN a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic
+--- a/storage/innobase/include/rem0cmp.ic 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/rem0cmp.ic 2010-12-03 17:19:24.902983425 +0900
+@@ -87,5 +87,5 @@
+ ulint match_b = 0;
+
+ return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
+- &match_f, &match_b));
++ &match_f, &match_b, 0));
+ }
+diff -ruN a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
+--- a/storage/innobase/include/row0mysql.h 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/include/row0mysql.h 2010-12-03 17:19:24.904973020 +0900
+@@ -387,6 +387,14 @@
+ then checked for not being too
+ large. */
+ /*********************************************************************//**
++*/
++UNIV_INTERN
++int
++row_insert_stats_for_mysql(
++/*=======================*/
++ dict_index_t* index,
++ trx_t* trx);
++/*********************************************************************//**
+ Scans a table create SQL string and adds to the data dictionary
+ the foreign key constraints declared in the string. This function
+ should be called after the indexes for a table have been created.
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:53:54.622036720 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 17:19:24.906953188 +0900
+@@ -209,6 +209,13 @@
+ extern ibool srv_innodb_status;
+
+ extern unsigned long long srv_stats_sample_pages;
++extern ulint srv_stats_method;
++#define SRV_STATS_METHOD_NULLS_EQUAL 0
++#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1
++#define SRV_STATS_METHOD_IGNORE_NULLS 2
++extern ulint srv_stats_auto_update;
++extern ulint srv_stats_update_need_lock;
++extern ibool srv_use_sys_stats_table;
+
+ extern ibool srv_use_doublewrite_buf;
+ extern ibool srv_use_checksums;
+diff -ruN a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
+--- a/storage/innobase/page/page0cur.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/page/page0cur.c 2010-12-03 17:19:24.908973357 +0900
+@@ -564,6 +564,74 @@
+ } while (rnd--);
+ }
+
++UNIV_INTERN
++void
++page_cur_open_on_nth_user_rec(
++/*==========================*/
++ buf_block_t* block, /*!< in: page */
++ page_cur_t* cursor, /*!< out: page cursor */
++ ulint nth)
++{
++ ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
++
++ page_cur_set_before_first(block, cursor);
++
++ if (UNIV_UNLIKELY(n_recs == 0)) {
++
++ return;
++ }
++
++ nth--;
++
++ if (nth >= n_recs) {
++ nth = n_recs - 1;
++ }
++
++ do {
++ page_cur_move_to_next(cursor);
++ } while (nth--);
++}
++
++UNIV_INTERN
++ibool
++page_cur_open_on_rnd_user_rec_after_nth(
++/*==========================*/
++ buf_block_t* block, /*!< in: page */
++ page_cur_t* cursor, /*!< out: page cursor */
++ ulint nth)
++{
++ ulint rnd;
++ ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
++ ibool ret;
++
++ page_cur_set_before_first(block, cursor);
++
++ if (UNIV_UNLIKELY(n_recs == 0)) {
++
++ return (FALSE);
++ }
++
++ nth--;
++
++ if (nth >= n_recs) {
++ nth = n_recs - 1;
++ }
++
++ rnd = (ulint) (nth + page_cur_lcg_prng() % (n_recs - nth));
++
++ if (rnd == nth) {
++ ret = TRUE;
++ } else {
++ ret = FALSE;
++ }
++
++ do {
++ page_cur_move_to_next(cursor);
++ } while (rnd--);
++
++ return (ret);
++}
++
+ /***********************************************************//**
+ Writes the log record of a record insert on a page. */
+ static
+diff -ruN a/storage/innobase/que/que0que.c b/storage/innobase/que/que0que.c
+--- a/storage/innobase/que/que0que.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/que/que0que.c 2010-12-03 17:19:24.910953422 +0900
+@@ -621,11 +621,21 @@
+
+ que_graph_free_recursive(cre_ind->ind_def);
+ que_graph_free_recursive(cre_ind->field_def);
++ if (srv_use_sys_stats_table)
++ que_graph_free_recursive(cre_ind->stats_def);
+ que_graph_free_recursive(cre_ind->commit_node);
+
+ mem_heap_free(cre_ind->heap);
+
+ break;
++ case QUE_NODE_INSERT_STATS:
++ cre_ind = node;
++
++ que_graph_free_recursive(cre_ind->stats_def);
++ que_graph_free_recursive(cre_ind->commit_node);
++
++ mem_heap_free(cre_ind->heap);
++ break;
+ case QUE_NODE_PROC:
+ que_graph_free_stat_list(((proc_node_t*)node)->stat_list);
+
+@@ -1138,6 +1148,8 @@
+ str = "CREATE TABLE";
+ } else if (type == QUE_NODE_CREATE_INDEX) {
+ str = "CREATE INDEX";
++ } else if (type == QUE_NODE_INSERT_STATS) {
++ str = "INSERT TO SYS_STATS";
+ } else if (type == QUE_NODE_FOR) {
+ str = "FOR LOOP";
+ } else if (type == QUE_NODE_RETURN) {
+@@ -1255,6 +1267,8 @@
+ thr = dict_create_table_step(thr);
+ } else if (type == QUE_NODE_CREATE_INDEX) {
+ thr = dict_create_index_step(thr);
++ } else if (type == QUE_NODE_INSERT_STATS) {
++ thr = dict_insert_stats_step(thr);
+ } else if (type == QUE_NODE_ROW_PRINTF) {
+ thr = row_printf_step(thr);
+ } else {
+diff -ruN a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c
+--- a/storage/innobase/rem/rem0cmp.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/rem/rem0cmp.c 2010-12-03 17:19:24.911953579 +0900
+@@ -866,10 +866,11 @@
+ matched fields; when the function returns,
+ contains the value the for current
+ comparison */
+- ulint* matched_bytes) /*!< in/out: number of already matched
++ ulint* matched_bytes, /*!< in/out: number of already matched
+ bytes within the first field not completely
+ matched; when the function returns, contains
+ the value for the current comparison */
++ ulint stats_method)
+ {
+ ulint rec1_n_fields; /* the number of fields in rec */
+ ulint rec1_f_len; /* length of current field in rec */
+@@ -962,7 +963,11 @@
+
+ if (rec1_f_len == rec2_f_len) {
+
+- goto next_field;
++ if (stats_method == SRV_STATS_METHOD_NULLS_EQUAL) {
++ goto next_field;
++ } else {
++ ret = -1;
++ }
+
+ } else if (rec2_f_len == UNIV_SQL_NULL) {
+
+diff -ruN a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c
+--- a/storage/innobase/row/row0merge.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0merge.c 2010-12-03 17:19:24.914955391 +0900
+@@ -2020,6 +2020,8 @@
+ "UPDATE SYS_INDEXES SET NAME=CONCAT('"
+ TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n"
+ "COMMIT WORK;\n"
++ /* Drop the statistics of the index. */
++ "DELETE FROM SYS_STATS WHERE INDEX_ID = :indexid;\n"
+ /* Drop the field definitions of the index. */
+ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
+ /* Drop the index definition and the B-tree. */
+diff -ruN a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
+--- a/storage/innobase/row/row0mysql.c 2010-11-03 07:01:13.000000000 +0900
++++ b/storage/innobase/row/row0mysql.c 2010-12-03 17:19:24.918953476 +0900
+@@ -921,6 +921,9 @@
+
+ table->stat_modified_counter = counter + 1;
+
++ if (!srv_stats_auto_update)
++ return;
++
+ /* Calculate new statistics if 1 / 16 of table has been modified
+ since the last time a statistics batch was run, or if
+ stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
+@@ -931,7 +934,7 @@
+ || ((ib_int64_t)counter > 16 + table->stat_n_rows / 16)) {
+
+ dict_update_statistics(table, FALSE /* update even if stats
+- are initialized */);
++ are initialized */, TRUE);
+ }
+ }
+
+@@ -2105,6 +2108,45 @@
+ }
+
+ /*********************************************************************//**
++*/
++UNIV_INTERN
++int
++row_insert_stats_for_mysql(
++/*=======================*/
++ dict_index_t* index,
++ trx_t* trx)
++{
++ ind_node_t* node;
++ mem_heap_t* heap;
++ que_thr_t* thr;
++ ulint err;
++
++ ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
++
++ trx->op_info = "try to insert rows to SYS_STATS";
++
++ trx_start_if_not_started(trx);
++ trx->error_state = DB_SUCCESS;
++
++ heap = mem_heap_create(512);
++
++ node = ind_insert_stats_graph_create(index, heap);
++
++ thr = pars_complete_graph_for_exec(node, trx, heap);
++
++ ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
++ que_run_threads(thr);
++
++ err = trx->error_state;
++
++ que_graph_free((que_t*) que_node_get_parent(thr));
++
++ trx->op_info = "";
++
++ return((int) err);
++}
++
++/*********************************************************************//**
+ Scans a table create SQL string and adds to the data dictionary
+ the foreign key constraints declared in the string. This function
+ should be called after the indexes for a table have been created.
+@@ -3024,7 +3066,7 @@
+ dict_table_autoinc_initialize(table, 1);
+ dict_table_autoinc_unlock(table);
+ dict_update_statistics(table, FALSE /* update even if stats are
+- initialized */);
++ initialized */, TRUE);
+
+ trx_commit_for_mysql(trx);
+
+@@ -3326,6 +3368,8 @@
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSE\n"
++ " DELETE FROM SYS_STATS\n"
++ " WHERE INDEX_ID = index_id;\n"
+ " DELETE FROM SYS_FIELDS\n"
+ " WHERE INDEX_ID = index_id;\n"
+ " DELETE FROM SYS_INDEXES\n"
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:53:54.625288512 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 17:19:24.922953561 +0900
+@@ -395,6 +395,10 @@
+ /* When estimating number of different key values in an index, sample
+ this many index pages */
+ UNIV_INTERN unsigned long long srv_stats_sample_pages = 8;
++UNIV_INTERN ulint srv_stats_method = 0;
++UNIV_INTERN ulint srv_stats_auto_update = 1;
++UNIV_INTERN ulint srv_stats_update_need_lock = 1;
++UNIV_INTERN ibool srv_use_sys_stats_table = FALSE;
+
+ UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
+ UNIV_INTERN ibool srv_use_checksums = TRUE;
--- /dev/null
+# name : innodb_thread_concurrency_timer_based.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
+--- a/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:41:52.045404706 +0900
++++ b/storage/innobase/handler/ha_innodb.cc 2010-12-03 15:42:11.568959457 +0900
+@@ -148,6 +148,7 @@
+ static ulong innobase_write_io_threads;
+ static long innobase_buffer_pool_instances = 1;
+
++static my_bool innobase_thread_concurrency_timer_based;
+ static long long innobase_buffer_pool_size, innobase_log_file_size;
+
+ /** Percentage of the buffer pool to reserve for 'old' blocks.
+@@ -2496,6 +2497,9 @@
+ srv_n_log_files = (ulint) innobase_log_files_in_group;
+ srv_log_file_size = (ulint) innobase_log_file_size;
+
++ srv_thread_concurrency_timer_based =
++ (ibool) innobase_thread_concurrency_timer_based;
++
+ #ifdef UNIV_LOG_ARCHIVE
+ srv_log_archive_on = (ulint) innobase_log_archive;
+ #endif /* UNIV_LOG_ARCHIVE */
+@@ -11373,6 +11377,12 @@
+ "Maximum delay between polling for a spin lock (6 by default)",
+ NULL, NULL, 6L, 0L, ~0L, 0);
+
++static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based,
++ innobase_thread_concurrency_timer_based,
++ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
++ "Use InnoDB timer based concurrency throttling. ",
++ NULL, NULL, FALSE);
++
+ static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
+ PLUGIN_VAR_RQCMDARG,
+ "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
+@@ -11586,6 +11596,7 @@
+ MYSQL_SYSVAR(spin_wait_delay),
+ MYSQL_SYSVAR(table_locks),
+ MYSQL_SYSVAR(thread_concurrency),
++ MYSQL_SYSVAR(thread_concurrency_timer_based),
+ MYSQL_SYSVAR(thread_sleep_delay),
+ MYSQL_SYSVAR(autoinc_lock_mode),
+ MYSQL_SYSVAR(show_verbose_locks),
+diff -ruN a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
+--- a/storage/innobase/include/srv0srv.h 2010-12-03 15:37:45.543027751 +0900
++++ b/storage/innobase/include/srv0srv.h 2010-12-03 15:42:11.571024631 +0900
+@@ -164,6 +164,8 @@
+ extern ulint srv_mem_pool_size;
+ extern ulint srv_lock_table_size;
+
++extern ibool srv_thread_concurrency_timer_based;
++
+ extern ulint srv_n_file_io_threads;
+ extern ulong srv_read_ahead_threshold;
+ extern ulint srv_n_read_io_threads;
+diff -ruN a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
+--- a/storage/innobase/srv/srv0srv.c 2010-12-03 15:37:45.546023493 +0900
++++ b/storage/innobase/srv/srv0srv.c 2010-12-03 15:42:11.574955879 +0900
+@@ -344,6 +344,7 @@
+ computer. Bigger computers need bigger values. Value 0 will disable the
+ concurrency check. */
+
++UNIV_INTERN ibool srv_thread_concurrency_timer_based = FALSE;
+ UNIV_INTERN ulong srv_thread_concurrency = 0;
+
+ /* this mutex protects srv_conc data structures */
+@@ -1130,6 +1131,75 @@
+ /*********************************************************************//**
+ Puts an OS thread to wait if there are too many concurrent threads
+ (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
++
++#ifdef HAVE_ATOMIC_BUILTINS
++static void
++enter_innodb_with_tickets(trx_t* trx)
++{
++ trx->declared_to_be_inside_innodb = TRUE;
++ trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
++ return;
++}
++
++static void
++srv_conc_enter_innodb_timer_based(trx_t* trx)
++{
++ lint conc_n_threads;
++ ibool has_yielded = FALSE;
++ ulint has_slept = 0;
++
++ if (trx->declared_to_be_inside_innodb) {
++ ut_print_timestamp(stderr);
++ fputs(
++" InnoDB: Error: trying to declare trx to enter InnoDB, but\n"
++"InnoDB: it already is declared.\n", stderr);
++ trx_print(stderr, trx, 0);
++ putc('\n', stderr);
++ }
++retry:
++ if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
++ conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
++ if (conc_n_threads <= (lint) srv_thread_concurrency) {
++ enter_innodb_with_tickets(trx);
++ return;
++ }
++ os_atomic_increment_lint(&srv_conc_n_threads, -1);
++ }
++ if (!has_yielded)
++ {
++ has_yielded = TRUE;
++ os_thread_yield();
++ goto retry;
++ }
++ if (trx->has_search_latch
++ || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
++
++ conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
++ enter_innodb_with_tickets(trx);
++ return;
++ }
++ if (has_slept < 2)
++ {
++ trx->op_info = "sleeping before entering InnoDB";
++ os_thread_sleep(10000);
++ trx->op_info = "";
++ has_slept++;
++ }
++ conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
++ enter_innodb_with_tickets(trx);
++ return;
++}
++
++static void
++srv_conc_exit_innodb_timer_based(trx_t* trx)
++{
++ os_atomic_increment_lint(&srv_conc_n_threads, -1);
++ trx->declared_to_be_inside_innodb = FALSE;
++ trx->n_tickets_to_enter_innodb = 0;
++ return;
++}
++#endif
++
+ UNIV_INTERN
+ void
+ srv_conc_enter_innodb(
+@@ -1160,6 +1230,13 @@
+ return;
+ }
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (srv_thread_concurrency_timer_based) {
++ srv_conc_enter_innodb_timer_based(trx);
++ return;
++ }
++#endif
++
+ os_fast_mutex_lock(&srv_conc_mutex);
+ retry:
+ if (trx->declared_to_be_inside_innodb) {
+@@ -1305,6 +1382,14 @@
+ }
+
+ ut_ad(srv_conc_n_threads >= 0);
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (srv_thread_concurrency_timer_based) {
++ os_atomic_increment_lint(&srv_conc_n_threads, 1);
++ trx->declared_to_be_inside_innodb = TRUE;
++ trx->n_tickets_to_enter_innodb = 1;
++ return;
++ }
++#endif
+
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+@@ -1338,6 +1423,13 @@
+ return;
+ }
+
++#ifdef HAVE_ATOMIC_BUILTINS
++ if (srv_thread_concurrency_timer_based) {
++ srv_conc_exit_innodb_timer_based(trx);
++ return;
++ }
++#endif
++
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+ ut_ad(srv_conc_n_threads > 0);
--- /dev/null
+# name : log_connection_error.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/log_connection_error.patch b/patch_info/log_connection_error.patch
+--- a/patch_info/log_connection_error.patch 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/log_connection_error.patch 2010-07-28 16:47:47.634070367 +0400
+@@ -0,0 +1,6 @@
++File=log_connection_error.patch
++Name=logging abandoned connections
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-07-28 16:47:47.105319218 +0400
++++ b/sql/mysqld.cc 2010-07-28 16:47:47.644101813 +0400
+@@ -4995,6 +4995,10 @@
+
+ DBUG_PRINT("error",("Too many connections"));
+ close_connection(thd, ER_CON_COUNT_ERROR, 1);
++ if (global_system_variables.log_warnings)
++ {
++ sql_print_warning("%s", ER_DEFAULT(ER_CON_COUNT_ERROR));
++ }
+ delete thd;
+ DBUG_VOID_RETURN;
+ }
+@@ -5375,6 +5379,10 @@
+ if (!(thd->net.vio= vio_new_win32pipe(hConnectedPipe)) ||
+ my_net_init(&thd->net, thd->net.vio))
+ {
++ if (global_system_variables.log_warnings)
++ {
++ sql_print_warning("%s", ER_DEFAULT(ER_OUT_OF_RESOURCES));
++ }
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+ delete thd;
+ continue;
+@@ -5570,6 +5578,10 @@
+ event_conn_closed)) ||
+ my_net_init(&thd->net, thd->net.vio))
+ {
++ if (global_system_variables.log_warnings)
++ {
++ sql_print_warning("%s", ER_DEFAULT(ER_OUT_OF_RESOURCES));
++ }
+ close_connection(thd, ER_OUT_OF_RESOURCES, 1);
+ errmsg= 0;
+ goto errorconn;
--- /dev/null
+# name : log_warnings_silence.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/log_warnings_silence.patch b/patch_info/log_warnings_silence.patch
+--- a/patch_info/log_warnings_silence.patch 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/log_warnings_silence.patch 2011-01-05 20:35:46.000000000 +0300
+@@ -0,0 +1,8 @@
++File=log_warnings_silence.patch
++Name=Disable log warnings for enumerated warnings (old name:suppress_log_warning_1592.patch)
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++Changelog
++2011-01-05 rename patch suppress_log_warning_1592.patch to log_warnings_silence/patch. Also remove boolean system variable "suppress_log_warning_1592" and add set varbile "log_warnings_silence" (possible values: 1592)
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2011-01-05 20:31:33.000000000 +0300
++++ b/sql/mysqld.cc 2011-01-05 20:32:34.000000000 +0300
+@@ -617,6 +617,8 @@
+ SHOW_COMP_OPTION have_crypt, have_compress;
+ SHOW_COMP_OPTION have_profiling;
+
++ulonglong opt_log_warnings_silence= 0;
++
+ /* Thread specific variables */
+
+ pthread_key(MEM_ROOT**,THR_MALLOC);
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2011-01-05 20:31:33.000000000 +0300
++++ b/sql/mysqld.h 2011-01-05 20:32:35.000000000 +0300
+@@ -224,6 +224,8 @@
+ extern TYPELIB thread_handling_typelib;
+ extern my_decimal decimal_zero;
+
++extern ulonglong opt_log_warnings_silence;
++
+ extern pthread_key(MEM_ROOT**,THR_MALLOC);
+
+ #ifdef HAVE_PSI_INTERFACE
+diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
+--- a/sql/sql_class.cc 2011-01-05 20:31:32.000000000 +0300
++++ b/sql/sql_class.cc 2011-01-05 20:32:34.000000000 +0300
+@@ -4544,7 +4544,7 @@
+ ER_BINLOG_UNSAFE_STATEMENT,
+ ER(ER_BINLOG_UNSAFE_STATEMENT),
+ ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
+- if (global_system_variables.log_warnings)
++ if (global_system_variables.log_warnings && ((opt_log_warnings_silence & (ULL(1) << log_warnings_silence_1592)) == 0))
+ {
+ char buf[MYSQL_ERRMSG_SIZE * 2];
+ sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h 2011-01-05 20:31:33.000000000 +0300
++++ b/sql/sql_class.h 2011-01-05 20:32:37.000000000 +0300
+@@ -83,6 +83,7 @@
+ SLOG_F_TMP_TABLE, SLOG_F_TMP_DISK, SLOG_F_FILESORT,
+ SLOG_F_FILESORT_DISK
+ };
++enum enum_log_warnings_silence { log_warnings_silence_1592 };
+ enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT,
+ SLAVE_EXEC_MODE_IDEMPOTENT,
+ SLAVE_EXEC_MODE_LAST_BIT};
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2011-01-05 20:31:33.000000000 +0300
++++ b/sql/sys_vars.cc 2011-01-05 20:32:38.000000000 +0300
+@@ -1408,6 +1408,15 @@
+ READ_ONLY GLOBAL_VAR(mysqld_port), CMD_LINE(REQUIRED_ARG, 'P'),
+ VALID_RANGE(0, UINT_MAX32), DEFAULT(0), BLOCK_SIZE(1));
+
++const char *log_warnings_silence_name[]= { "1592" };
++static Sys_var_set Sys_log_warnings_silence(
++ "log_warnings_silence",
++ "disable logging of enumerated warnings: "
++ "1592: unsafe statements for binary logging; "
++ "possible values : [1592]",
++ GLOBAL_VAR(opt_log_warnings_silence), CMD_LINE(REQUIRED_ARG),
++ log_warnings_silence_name, DEFAULT(0));
++
+ static Sys_var_ulong Sys_preload_buff_size(
+ "preload_buffer_size",
+ "The size of the buffer that is allocated when preloading indexes",
#!!! notice !!!
# Any small change to this file in the main branch
# should be done or reviewed by the maintainer!
-diff -ruN a/patch_info/microsec_process.info b/patch_info/microsec_process.info
+diff -ruN /dev/null b/patch_info/microsec_process.info
--- /dev/null 1970-01-01 09:00:00.000000000 +0900
-+++ b/patch_info/microsec_process.info 2010-08-27 14:45:52.941058849 +0900
++++ b/patch_info/microsec_process.info 2010-12-02 20:41:41.616069579 +0900
@@ -0,0 +1,8 @@
+File=microsec_process.patch
+Name=Adds INFOMATION_SCHEMA.PROCESSLIST with TIME_MS column
+2010-01
+Ported to 5.1.42
diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
---- a/sql/sql_show.cc 2010-08-27 14:38:08.781057666 +0900
-+++ b/sql/sql_show.cc 2010-08-27 14:45:52.946058726 +0900
-@@ -1919,7 +1919,8 @@
+--- a/sql/sql_show.cc 2010-12-02 19:22:40.054024541 +0900
++++ b/sql/sql_show.cc 2010-12-02 20:41:41.622941425 +0900
+@@ -1882,7 +1882,8 @@
TABLE *table= tables->table;
CHARSET_INFO *cs= system_charset_info;
char *user;
DBUG_ENTER("fill_process_list");
user= thd->security_ctx->master_access & PROCESS_ACL ?
-@@ -2024,6 +2025,10 @@
+@@ -1966,6 +1967,10 @@
}
- pthread_mutex_unlock(&tmp->LOCK_thd_data);
+ mysql_mutex_unlock(&tmp->LOCK_thd_data);
+ /* TIME_MS */
+ table->field[8]->store(((tmp->start_utime ?
+
if (schema_table_store_record(thd, table))
{
- VOID(pthread_mutex_unlock(&LOCK_thread_count));
-@@ -6695,6 +6700,8 @@
+ mysql_mutex_unlock(&LOCK_thread_count);
+@@ -7220,6 +7225,8 @@
{"STATE", 64, MYSQL_TYPE_STRING, 0, 1, "State", SKIP_OPEN_TABLE},
{"INFO", PROCESS_LIST_INFO_WIDTH, MYSQL_TYPE_STRING, 0, 1, "Info",
SKIP_OPEN_TABLE},
+++ /dev/null
-## fix-dummy-thread-race-condition.dpatch by <mkoegler@auto.tuwien.ac.at>
-##
-## Avoid dummy thread for pthread_exit workaround
-
---- mysql-5.1.40/mysys/my_thr_init.c~ 2009-10-06 20:49:02.000000000 +0300
-+++ mysql-5.1.40/mysys/my_thr_init.c 2009-10-27 15:01:59.807275693 +0200
-@@ -47,24 +47,6 @@
- pthread_mutexattr_t my_errorcheck_mutexattr;
- #endif
-
--#ifdef TARGET_OS_LINUX
--
--/*
-- Dummy thread spawned in my_thread_global_init() below to avoid
-- race conditions in NPTL pthread_exit code.
--*/
--
--static pthread_handler_t
--nptl_pthread_exit_hack_handler(void *arg __attribute((unused)))
--{
-- /* Do nothing! */
-- pthread_exit(0);
-- return 0;
--}
--
--#endif /* TARGET_OS_LINUX */
--
--
- static uint get_thread_lib(void);
-
- /*
-@@ -89,33 +71,6 @@
- return 1;
- }
-
--#ifdef TARGET_OS_LINUX
-- /*
-- BUG#24507: Race conditions inside current NPTL pthread_exit()
-- implementation.
--
-- To avoid a possible segmentation fault during concurrent
-- executions of pthread_exit(), a dummy thread is spawned which
-- initializes internal variables of pthread lib. See bug description
-- for a full explanation.
--
-- TODO: Remove this code when fixed versions of glibc6 are in common
-- use.
-- */
-- if (thd_lib_detected == THD_LIB_NPTL)
-- {
-- pthread_t dummy_thread;
-- pthread_attr_t dummy_thread_attr;
--
-- pthread_attr_init(&dummy_thread_attr);
-- pthread_attr_setdetachstate(&dummy_thread_attr, PTHREAD_CREATE_JOINABLE);
--
-- if (pthread_create(&dummy_thread,&dummy_thread_attr,
-- nptl_pthread_exit_hack_handler, NULL) == 0)
-- (void)pthread_join(dummy_thread, NULL);
-- }
--#endif /* TARGET_OS_LINUX */
--
- #ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
- /*
- Set mutex type to "fast" a.k.a "adaptive"
+++ /dev/null
-# name : innodb_split_buf_pool_mutex.patch
-# introduced : 11 or before
-# maintainer : Yasufumi
-#
-#!!! notice !!!
-# Any small change to this file in the main branch
-# should be done or reviewed by the maintainer!
-diff -ruN a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c
---- a/storage/innodb_plugin/btr/btr0cur.c 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/btr/btr0cur.c 2010-08-27 16:11:40.593021205 +0900
-@@ -3764,7 +3764,8 @@
-
- mtr_commit(mtr);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
- mutex_enter(&block->mutex);
-
- /* Only free the block if it is still allocated to
-@@ -3775,17 +3776,22 @@
- && buf_block_get_space(block) == space
- && buf_block_get_page_no(block) == page_no) {
-
-- if (buf_LRU_free_block(&block->page, all, NULL)
-+ if (buf_LRU_free_block(&block->page, all, NULL, TRUE)
- != BUF_LRU_FREED
-- && all && block->page.zip.data) {
-+ && all && block->page.zip.data
-+ /* Now, buf_LRU_free_block() may release mutex temporarily */
-+ && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
-+ && buf_block_get_space(block) == space
-+ && buf_block_get_page_no(block) == page_no) {
- /* Attempt to deallocate the uncompressed page
- if the whole block cannot be deallocted. */
-
-- buf_LRU_free_block(&block->page, FALSE, NULL);
-+ buf_LRU_free_block(&block->page, FALSE, NULL, TRUE);
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- mutex_exit(&block->mutex);
- }
-
-diff -ruN a/storage/innodb_plugin/btr/btr0sea.c b/storage/innodb_plugin/btr/btr0sea.c
---- a/storage/innodb_plugin/btr/btr0sea.c 2010-08-27 16:11:12.151975789 +0900
-+++ b/storage/innodb_plugin/btr/btr0sea.c 2010-08-27 16:11:40.593021205 +0900
-@@ -1199,7 +1199,7 @@
- ulint* offsets;
-
- rw_lock_x_lock(&btr_search_latch);
-- buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- table = btr_search_sys->hash_index;
-
-@@ -1285,7 +1285,7 @@
- bpage = UT_LIST_GET_PREV(LRU, bpage);
- }
-
-- buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- rw_lock_x_unlock(&btr_search_latch);
-
- if (UNIV_LIKELY_NULL(heap)) {
-@@ -1878,7 +1878,8 @@
- rec_offs_init(offsets_);
-
- rw_lock_x_lock(&btr_search_latch);
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_x_lock(&page_hash_latch);
-
- cell_count = hash_get_n_cells(btr_search_sys->hash_index);
-
-@@ -1886,11 +1887,13 @@
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if ((i != 0) && ((i % chunk_size) == 0)) {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_x_unlock(&page_hash_latch);
- rw_lock_x_unlock(&btr_search_latch);
- os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_x_lock(&page_hash_latch);
- }
-
- node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
-@@ -1997,11 +2000,13 @@
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if (i != 0) {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_x_unlock(&page_hash_latch);
- rw_lock_x_unlock(&btr_search_latch);
- os_thread_yield();
- rw_lock_x_lock(&btr_search_latch);
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_x_lock(&page_hash_latch);
- }
-
- if (!ha_validate(btr_search_sys->hash_index, i, end_index)) {
-@@ -2009,7 +2014,8 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_x_unlock(&page_hash_latch);
- rw_lock_x_unlock(&btr_search_latch);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
-diff -ruN a/storage/innodb_plugin/buf/buf0buddy.c b/storage/innodb_plugin/buf/buf0buddy.c
---- a/storage/innodb_plugin/buf/buf0buddy.c 2010-08-27 15:54:59.015990108 +0900
-+++ b/storage/innodb_plugin/buf/buf0buddy.c 2010-08-27 16:11:40.596022762 +0900
-@@ -82,10 +82,11 @@
- if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
- #endif /* UNIV_DEBUG_VALGRIND */
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&zip_free_mutex));
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
- ut_ad(buf_pool->zip_free[i].start != bpage);
-- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
-+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_free[i], bpage);
-
- #ifdef UNIV_DEBUG_VALGRIND
- if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
-@@ -103,8 +104,8 @@
- ulint i) /*!< in: index of buf_pool->zip_free[] */
- {
- #ifdef UNIV_DEBUG_VALGRIND
-- buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
-- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
-+ buf_page_t* prev = UT_LIST_GET_PREV(zip_list, bpage);
-+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
-
- if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
- if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
-@@ -113,9 +114,10 @@
- ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
- #endif /* UNIV_DEBUG_VALGRIND */
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&zip_free_mutex));
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
-- UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
-+ UT_LIST_REMOVE(zip_list, buf_pool->zip_free[i], bpage);
-
- #ifdef UNIV_DEBUG_VALGRIND
- if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
-@@ -134,12 +136,13 @@
- {
- buf_page_t* bpage;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&zip_free_mutex));
- ut_a(i < BUF_BUDDY_SIZES);
-
- #ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing free memory. */
-- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE)));
- #endif /* !UNIV_DEBUG_VALGRIND */
-@@ -182,16 +185,19 @@
- void
- buf_buddy_block_free(
- /*=================*/
-- void* buf) /*!< in: buffer frame to deallocate */
-+ void* buf, /*!< in: buffer frame to deallocate */
-+ ibool have_page_hash_mutex)
- {
- const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
- buf_page_t* bpage;
- buf_block_t* block;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(!mutex_own(&buf_pool_zip_mutex));
- ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
-
-+ mutex_enter(&zip_hash_mutex);
-+
- HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
- && bpage->in_zip_hash && !bpage->in_page_hash),
-@@ -203,12 +209,14 @@
- ut_d(bpage->in_zip_hash = FALSE);
- HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
-
-+ mutex_exit(&zip_hash_mutex);
-+
- ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
- UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
-
- block = (buf_block_t*) bpage;
- mutex_enter(&block->mutex);
-- buf_LRU_block_free_non_file_page(block);
-+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
- mutex_exit(&block->mutex);
-
- ut_ad(buf_buddy_n_frames > 0);
-@@ -224,7 +232,7 @@
- buf_block_t* block) /*!< in: buffer frame to allocate */
- {
- const ulint fold = BUF_POOL_ZIP_FOLD(block);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(!mutex_own(&buf_pool_zip_mutex));
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
-
-@@ -236,7 +244,10 @@
- ut_ad(!block->page.in_page_hash);
- ut_ad(!block->page.in_zip_hash);
- ut_d(block->page.in_zip_hash = TRUE);
-+
-+ mutex_enter(&zip_hash_mutex);
- HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
-+ mutex_exit(&zip_hash_mutex);
-
- ut_d(buf_buddy_n_frames++);
- }
-@@ -270,7 +281,7 @@
- bpage->state = BUF_BLOCK_ZIP_FREE;
- #ifndef UNIV_DEBUG_VALGRIND
- /* Valgrind would complain about accessing free memory. */
-- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(
- ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE)));
-@@ -292,24 +303,28 @@
- /*================*/
- ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
-- ibool* lru) /*!< in: pointer to a variable that will be assigned
-+ ibool* lru, /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
- or NULL if the LRU list should not be used */
-+ ibool have_page_hash_mutex)
- {
- buf_block_t* block;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(!mutex_own(&buf_pool_zip_mutex));
-
- if (i < BUF_BUDDY_SIZES) {
- /* Try to allocate from the buddy system. */
-+ mutex_enter(&zip_free_mutex);
- block = buf_buddy_alloc_zip(i);
-
- if (block) {
-
- goto func_exit;
- }
-+
-+ mutex_exit(&zip_free_mutex);
- }
-
- /* Try allocating from the buf_pool->free list. */
-@@ -326,18 +341,29 @@
- }
-
- /* Try replacing an uncompressed page in the buffer pool. */
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ if (have_page_hash_mutex) {
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
- block = buf_LRU_get_free_block(0);
- *lru = TRUE;
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ if (have_page_hash_mutex) {
-+ rw_lock_x_lock(&page_hash_latch);
-+ }
-
- alloc_big:
- buf_buddy_block_register(block);
-
-+ mutex_enter(&zip_free_mutex);
- block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
-
- func_exit:
- buf_buddy_stat[i].used++;
-+ mutex_exit(&zip_free_mutex);
-+
- return(block);
- }
-
-@@ -353,7 +379,10 @@
- {
- buf_page_t* b;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
-@@ -362,7 +391,7 @@
- case BUF_BLOCK_FILE_PAGE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
-- ut_error;
-+ /* ut_error; */ /* optimistic */
- case BUF_BLOCK_ZIP_DIRTY:
- /* Cannot relocate dirty pages. */
- return(FALSE);
-@@ -372,9 +401,17 @@
- }
-
- mutex_enter(&buf_pool_zip_mutex);
-+ mutex_enter(&zip_free_mutex);
-
- if (!buf_page_can_relocate(bpage)) {
- mutex_exit(&buf_pool_zip_mutex);
-+ mutex_exit(&zip_free_mutex);
-+ return(FALSE);
-+ }
-+
-+ if (bpage != buf_page_hash_get(bpage->space, bpage->offset)) {
-+ mutex_exit(&buf_pool_zip_mutex);
-+ mutex_exit(&zip_free_mutex);
- return(FALSE);
- }
-
-@@ -382,18 +419,21 @@
- ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
-
- /* relocate buf_pool->zip_clean */
-- b = UT_LIST_GET_PREV(list, dpage);
-- UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
-+ mutex_enter(&flush_list_mutex);
-+ b = UT_LIST_GET_PREV(zip_list, dpage);
-+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, dpage);
-
- if (b) {
-- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
-+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, dpage);
- } else {
-- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
-+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, dpage);
- }
-+ mutex_exit(&flush_list_mutex);
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
-
- mutex_exit(&buf_pool_zip_mutex);
-+ mutex_exit(&zip_free_mutex);
- return(TRUE);
- }
-
-@@ -406,13 +446,15 @@
- /*===============*/
- void* src, /*!< in: block to relocate */
- void* dst, /*!< in: free block to relocate to */
-- ulint i) /*!< in: index of buf_pool->zip_free[] */
-+ ulint i, /*!< in: index of buf_pool->zip_free[] */
-+ ibool have_page_hash_mutex)
- {
- buf_page_t* bpage;
- const ulint size = BUF_BUDDY_LOW << i;
- ullint usec = ut_time_us(NULL);
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&zip_free_mutex));
- ut_ad(!mutex_own(&buf_pool_zip_mutex));
- ut_ad(!ut_align_offset(src, size));
- ut_ad(!ut_align_offset(dst, size));
-@@ -434,6 +476,12 @@
- /* This is a compressed page. */
- mutex_t* mutex;
-
-+ if (!have_page_hash_mutex) {
-+ mutex_exit(&zip_free_mutex);
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-+ }
-+
- /* The src block may be split into smaller blocks,
- some of which may be free. Thus, the
- mach_read_from_4() calls below may attempt to read
-@@ -458,6 +506,11 @@
- added to buf_pool->page_hash yet. Obviously,
- it cannot be relocated. */
-
-+ if (!have_page_hash_mutex) {
-+ mutex_enter(&zip_free_mutex);
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
- return(FALSE);
- }
-
-@@ -467,18 +520,27 @@
- For the sake of simplicity, give up. */
- ut_ad(page_zip_get_size(&bpage->zip) < size);
-
-+ if (!have_page_hash_mutex) {
-+ mutex_enter(&zip_free_mutex);
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
- return(FALSE);
- }
-
-+ /* To keep latch order */
-+ if (have_page_hash_mutex)
-+ mutex_exit(&zip_free_mutex);
-+
- /* The block must have been allocated, but it may
- contain uninitialized data. */
- UNIV_MEM_ASSERT_W(src, size);
-
-- mutex = buf_page_get_mutex(bpage);
-+ mutex = buf_page_get_mutex_enter(bpage);
-
-- mutex_enter(mutex);
-+ mutex_enter(&zip_free_mutex);
-
-- if (buf_page_can_relocate(bpage)) {
-+ if (mutex && buf_page_can_relocate(bpage)) {
- /* Relocate the compressed page. */
- ut_a(bpage->zip.data == src);
- memcpy(dst, src, size);
-@@ -493,10 +555,22 @@
- buddy_stat->relocated_usec
- += ut_time_us(NULL) - usec;
- }
-+
-+ if (!have_page_hash_mutex) {
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
- return(TRUE);
- }
-
-- mutex_exit(mutex);
-+ if (!have_page_hash_mutex) {
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
-+
-+ if (mutex) {
-+ mutex_exit(mutex);
-+ }
- } else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
- /* This must be a buf_page_t object. */
- #if UNIV_WORD_SIZE == 4
-@@ -505,10 +579,31 @@
- about uninitialized pad bytes. */
- UNIV_MEM_ASSERT_RW(src, size);
- #endif
-+
-+ mutex_exit(&zip_free_mutex);
-+
-+ if (!have_page_hash_mutex) {
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-+ }
-+
- if (buf_buddy_relocate_block(src, dst)) {
-+ mutex_enter(&zip_free_mutex);
-+
-+ if (!have_page_hash_mutex) {
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
-
- goto success;
- }
-+
-+ mutex_enter(&zip_free_mutex);
-+
-+ if (!have_page_hash_mutex) {
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
- }
-
- return(FALSE);
-@@ -522,13 +617,15 @@
- /*===============*/
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
-- ulint i) /*!< in: index of buf_pool->zip_free[],
-+ ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
-+ ibool have_page_hash_mutex)
- {
- buf_page_t* bpage;
- buf_page_t* buddy;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&zip_free_mutex));
- ut_ad(!mutex_own(&buf_pool_zip_mutex));
- ut_ad(i <= BUF_BUDDY_SIZES);
- ut_ad(buf_buddy_stat[i].used > 0);
-@@ -539,7 +636,9 @@
- ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
-
- if (i == BUF_BUDDY_SIZES) {
-- buf_buddy_block_free(buf);
-+ mutex_exit(&zip_free_mutex);
-+ buf_buddy_block_free(buf, have_page_hash_mutex);
-+ mutex_enter(&zip_free_mutex);
- return;
- }
-
-@@ -584,7 +683,7 @@
- ut_a(bpage != buf);
-
- {
-- buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
-+ buf_page_t* next = UT_LIST_GET_NEXT(zip_list, bpage);
- UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
- bpage = next;
- }
-@@ -593,13 +692,13 @@
- #ifndef UNIV_DEBUG_VALGRIND
- buddy_nonfree:
- /* Valgrind would complain about accessing free memory. */
-- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE)));
- #endif /* UNIV_DEBUG_VALGRIND */
-
- /* The buddy is not free. Is there a free block of this size? */
-- bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-+ bpage = UT_LIST_GET_LAST(buf_pool->zip_free[i]);
-
- if (bpage) {
- /* Remove the block from the free list, because a successful
-@@ -609,7 +708,7 @@
- buf_buddy_remove_from_free(bpage, i);
-
- /* Try to relocate the buddy of buf to the free block. */
-- if (buf_buddy_relocate(buddy, bpage, i)) {
-+ if (buf_buddy_relocate(buddy, bpage, i, have_page_hash_mutex)) {
-
- ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
- goto buddy_free2;
-@@ -629,14 +728,14 @@
-
- (Parts of the buddy can be free in
- buf_pool->zip_free[j] with j < i.) */
-- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
-+ ut_d(UT_LIST_VALIDATE(zip_list, buf_page_t, buf_pool->zip_free[i],
- ut_ad(buf_page_get_state(
- ut_list_node_313)
- == BUF_BLOCK_ZIP_FREE
- && ut_list_node_313 != buddy)));
- #endif /* !UNIV_DEBUG_VALGRIND */
-
-- if (buf_buddy_relocate(buddy, buf, i)) {
-+ if (buf_buddy_relocate(buddy, buf, i, have_page_hash_mutex)) {
-
- buf = bpage;
- UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
-diff -ruN a/storage/innodb_plugin/buf/buf0buf.c b/storage/innodb_plugin/buf/buf0buf.c
---- a/storage/innodb_plugin/buf/buf0buf.c 2010-08-27 15:55:39.385322978 +0900
-+++ b/storage/innodb_plugin/buf/buf0buf.c 2010-08-27 16:11:40.603021006 +0900
-@@ -251,6 +251,12 @@
- /** mutex protecting the buffer pool struct and control blocks, except the
- read-write lock in them */
- UNIV_INTERN mutex_t buf_pool_mutex;
-+UNIV_INTERN mutex_t LRU_list_mutex;
-+UNIV_INTERN mutex_t flush_list_mutex;
-+UNIV_INTERN rw_lock_t page_hash_latch;
-+UNIV_INTERN mutex_t free_list_mutex;
-+UNIV_INTERN mutex_t zip_free_mutex;
-+UNIV_INTERN mutex_t zip_hash_mutex;
- /** mutex protecting the control blocks of compressed-only pages
- (of type buf_page_t, not buf_block_t) */
- UNIV_INTERN mutex_t buf_pool_zip_mutex;
-@@ -661,9 +667,9 @@
- block->page.in_zip_hash = FALSE;
- block->page.in_flush_list = FALSE;
- block->page.in_free_list = FALSE;
-- block->in_unzip_LRU_list = FALSE;
- #endif /* UNIV_DEBUG */
- block->page.in_LRU_list = FALSE;
-+ block->in_unzip_LRU_list = FALSE;
- #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- block->n_pointers = 0;
- #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-@@ -748,8 +754,10 @@
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
- #endif
- /* Add the block to the free list */
-- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
-+ mutex_enter(&free_list_mutex);
-+ UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
- ut_d(block->page.in_free_list = TRUE);
-+ mutex_exit(&free_list_mutex);
-
- block++;
- frame += UNIV_PAGE_SIZE;
-@@ -774,7 +782,7 @@
- ulint i;
-
- ut_ad(buf_pool);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
- block = chunk->blocks;
-
-@@ -826,7 +834,7 @@
- ulint i;
-
- ut_ad(buf_pool);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
-
- block = chunk->blocks;
-
-@@ -878,7 +886,7 @@
- ulint i;
-
- ut_ad(buf_pool);
-- ut_ad(buf_pool_mutex_own());
-+ ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
-
- block = chunk->blocks;
-
-@@ -904,7 +912,7 @@
- buf_block_t* block;
- const buf_block_t* block_end;
-
-- ut_ad(buf_pool_mutex_own());
-+ ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
-
- block_end = chunk->blocks + chunk->size;
-
-@@ -916,8 +924,10 @@
- ut_ad(!block->in_unzip_LRU_list);
- ut_ad(!block->page.in_flush_list);
- /* Remove the block from the free list. */
-+ mutex_enter(&free_list_mutex);
- ut_ad(block->page.in_free_list);
-- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
-+ mutex_exit(&free_list_mutex);
-
- /* Free the latches. */
- mutex_free(&block->mutex);
-@@ -947,8 +957,17 @@
- /* 1. Initialize general fields
- ------------------------------- */
- mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
-+ mutex_create(&LRU_list_mutex, SYNC_BUF_LRU_LIST);
-+ mutex_create(&flush_list_mutex, SYNC_BUF_FLUSH_LIST);
-+ rw_lock_create(&page_hash_latch, SYNC_BUF_PAGE_HASH);
-+ mutex_create(&free_list_mutex, SYNC_BUF_FREE_LIST);
-+ mutex_create(&zip_free_mutex, SYNC_BUF_ZIP_FREE);
-+ mutex_create(&zip_hash_mutex, SYNC_BUF_ZIP_HASH);
-+
- mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
-
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
- buf_pool_mutex_enter();
-
- buf_pool->n_chunks = 1;
-@@ -983,6 +1002,8 @@
- --------------------------- */
- /* All fields are initialized by mem_zalloc(). */
-
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
- buf_pool_mutex_exit();
-
- btr_search_sys_create(buf_pool->curr_size
-@@ -1120,7 +1141,11 @@
- buf_page_t* b;
- ulint fold;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-+#ifdef UNIV_SYNC_DEBUG
-+ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_a(bpage->buf_fix_count == 0);
-@@ -1204,7 +1229,8 @@
-
- try_again:
- btr_search_disable(); /* Empty the adaptive hash index again */
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- shrink_again:
- if (buf_pool->n_chunks <= 1) {
-@@ -1275,7 +1301,7 @@
-
- buf_LRU_make_block_old(&block->page);
- dirty++;
-- } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
-+ } else if (buf_LRU_free_block(&block->page, TRUE, NULL, FALSE)
- != BUF_LRU_FREED) {
- nonfree++;
- }
-@@ -1283,7 +1309,8 @@
- mutex_exit(&block->mutex);
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-
- /* Request for a flush of the chunk if it helps.
- Do not flush if there are non-free blocks, since
-@@ -1332,7 +1359,8 @@
- func_done:
- srv_buf_pool_old_size = srv_buf_pool_size;
- func_exit:
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- btr_search_enable();
- }
-
-@@ -1350,7 +1378,11 @@
- hash_table_t* zip_hash;
- buf_page_t* b;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-+ mutex_enter(&flush_list_mutex);
-+
-
- /* Free, create, and populate the hash table. */
- hash_table_free(buf_pool->page_hash);
-@@ -1392,7 +1424,7 @@
- in buf_pool->flush_list. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-- b = UT_LIST_GET_NEXT(list, b)) {
-+ b = UT_LIST_GET_NEXT(zip_list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- ut_ad(!b->in_flush_list);
- ut_ad(b->in_LRU_list);
-@@ -1404,7 +1436,7 @@
- }
-
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-- b = UT_LIST_GET_NEXT(list, b)) {
-+ b = UT_LIST_GET_NEXT(flush_list, b)) {
- ut_ad(b->in_flush_list);
- ut_ad(b->in_LRU_list);
- ut_ad(b->in_page_hash);
-@@ -1430,7 +1462,10 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ mutex_exit(&flush_list_mutex);
- }
-
- /********************************************************************//**
-@@ -1440,17 +1475,20 @@
- buf_pool_resize(void)
- /*=================*/
- {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- if (srv_buf_pool_old_size == srv_buf_pool_size) {
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- return;
- }
-
- if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-
- /* Disable adaptive hash indexes and empty the index
- in order to free up memory in the buffer pool chunks. */
-@@ -1484,7 +1522,8 @@
- }
-
- srv_buf_pool_old_size = srv_buf_pool_size;
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- }
-
- buf_pool_page_hash_rebuild();
-@@ -1500,13 +1539,15 @@
- /*================*/
- buf_page_t* bpage) /*!< in: buffer block of a file page */
- {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- ut_a(buf_page_in_file(bpage));
-
- buf_LRU_make_block_young(bpage);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- }
-
- /********************************************************************//**
-@@ -1528,14 +1569,20 @@
- ut_a(buf_page_in_file(bpage));
-
- if (buf_page_peek_if_too_old(bpage)) {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
- buf_LRU_make_block_young(bpage);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- } else if (!access_time) {
- ulint time_ms = ut_time_ms();
-- buf_pool_mutex_enter();
-+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
-+ //buf_pool_mutex_enter();
-+ if (block_mutex) {
- buf_page_set_accessed(bpage, time_ms);
-- buf_pool_mutex_exit();
-+ mutex_exit(block_mutex);
-+ }
-+ //buf_pool_mutex_exit();
- }
- }
-
-@@ -1551,7 +1598,8 @@
- {
- buf_block_t* block;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
-@@ -1559,7 +1607,8 @@
- block->check_index_page_at_flush = FALSE;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
- }
-
- /********************************************************************//**
-@@ -1577,7 +1626,8 @@
- buf_block_t* block;
- ibool is_hashed;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
-@@ -1587,7 +1637,8 @@
- is_hashed = block->is_hashed;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- return(is_hashed);
- }
-@@ -1608,7 +1659,8 @@
- {
- buf_page_t* bpage;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-
- bpage = buf_page_hash_get(space, offset);
-
-@@ -1616,7 +1668,8 @@
- bpage->file_page_was_freed = TRUE;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- return(bpage);
- }
-@@ -1636,7 +1689,8 @@
- {
- buf_page_t* bpage;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-
- bpage = buf_page_hash_get(space, offset);
-
-@@ -1644,7 +1698,8 @@
- bpage->file_page_was_freed = FALSE;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- return(bpage);
- }
-@@ -1678,8 +1733,9 @@
- buf_pool->stat.n_page_gets++;
-
- for (;;) {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
- lookup:
-+ rw_lock_s_lock(&page_hash_latch);
- bpage = buf_page_hash_get(space, offset);
- if (bpage) {
- break;
-@@ -1687,7 +1743,8 @@
-
- /* Page not in buf_pool: needs to be read from file */
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- buf_read_page(space, zip_size, offset);
-
-@@ -1699,29 +1756,34 @@
- if (UNIV_UNLIKELY(!bpage->zip.data)) {
- /* There is no compressed page. */
- err_exit:
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
- return(NULL);
- }
-
-+ block_mutex = buf_page_get_mutex_enter(bpage);
-+
-+ rw_lock_s_unlock(&page_hash_latch);
-+
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- case BUF_BLOCK_ZIP_FREE:
-+ if (block_mutex)
-+ mutex_exit(block_mutex);
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
-- block_mutex = &buf_pool_zip_mutex;
-- mutex_enter(block_mutex);
-+ ut_a(block_mutex == &buf_pool_zip_mutex);
- bpage->buf_fix_count++;
- goto got_block;
- case BUF_BLOCK_FILE_PAGE:
-- block_mutex = &((buf_block_t*) bpage)->mutex;
-- mutex_enter(block_mutex);
-+ ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
-
- /* Discard the uncompressed page frame if possible. */
-- if (buf_LRU_free_block(bpage, FALSE, NULL)
-+ if (buf_LRU_free_block(bpage, FALSE, NULL, FALSE)
- == BUF_LRU_FREED) {
-
- mutex_exit(block_mutex);
-@@ -1740,7 +1802,7 @@
- must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
- access_time = buf_page_is_accessed(bpage);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- mutex_exit(block_mutex);
-
-@@ -1995,7 +2057,7 @@
- const buf_block_t* block) /*!< in: pointer to block,
- not dereferenced */
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
- if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
- /* The pointer should be aligned. */
-@@ -2029,6 +2091,7 @@
- ulint fix_type;
- ibool must_read;
- ulint retries = 0;
-+ mutex_t* block_mutex;
-
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
-@@ -2046,9 +2109,11 @@
- buf_pool->stat.n_page_gets++;
- loop:
- block = guess;
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-
- if (block) {
-+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
-+
- /* If the guess is a compressed page descriptor that
- has been allocated by buf_buddy_alloc(), it may have
- been invalidated by buf_buddy_relocate(). In that
-@@ -2057,11 +2122,15 @@
- the guess may be pointing to a buffer pool chunk that
- has been released when resizing the buffer pool. */
-
-- if (!buf_block_is_uncompressed(block)
-+ if (!block_mutex) {
-+ block = guess = NULL;
-+ } else if (!buf_block_is_uncompressed(block)
- || offset != block->page.offset
- || space != block->page.space
- || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-
-+ mutex_exit(block_mutex);
-+
- block = guess = NULL;
- } else {
- ut_ad(!block->page.in_zip_hash);
-@@ -2070,14 +2139,20 @@
- }
-
- if (block == NULL) {
-+ rw_lock_s_lock(&page_hash_latch);
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-+ if (block) {
-+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
-+ ut_a(block_mutex);
-+ }
-+ rw_lock_s_unlock(&page_hash_latch);
- }
-
- loop2:
- if (block == NULL) {
- /* Page not in buf_pool: needs to be read from file */
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- if (mode == BUF_GET_IF_IN_POOL) {
-
-@@ -2120,7 +2195,8 @@
-
- if (must_read && mode == BUF_GET_IF_IN_POOL) {
- /* The page is only being read to buffer */
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(block_mutex);
-
- return(NULL);
- }
-@@ -2130,38 +2206,50 @@
- ibool success;
-
- case BUF_BLOCK_FILE_PAGE:
-+ if (block_mutex == &buf_pool_zip_mutex) {
-+ /* it is wrong mutex... */
-+ mutex_exit(block_mutex);
-+ goto loop;
-+ }
- break;
-
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
-+ ut_ad(block_mutex == &buf_pool_zip_mutex);
- bpage = &block->page;
- /* Protect bpage->buf_fix_count. */
-- mutex_enter(&buf_pool_zip_mutex);
-+ /* Already proteced here. */
-+ //mutex_enter(&buf_pool_zip_mutex);
-
- if (bpage->buf_fix_count
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
- /* This condition often occurs when the buffer
- is not buffer-fixed, but I/O-fixed by
- buf_page_init_for_read(). */
-- mutex_exit(&buf_pool_zip_mutex);
-+ //mutex_exit(&buf_pool_zip_mutex);
- wait_until_unfixed:
- /* The block is buffer-fixed or I/O-fixed.
- Try again later. */
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(block_mutex);
- os_thread_sleep(WAIT_FOR_READ);
-
- goto loop;
- }
-
- /* Allocate an uncompressed page. */
-- buf_pool_mutex_exit();
-- mutex_exit(&buf_pool_zip_mutex);
-+ //buf_pool_mutex_exit();
-+ //mutex_exit(&buf_pool_zip_mutex);
-+ mutex_exit(block_mutex);
-
- block = buf_LRU_get_free_block(0);
- ut_a(block);
-+ block_mutex = &block->mutex;
-
-- buf_pool_mutex_enter();
-- mutex_enter(&block->mutex);
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-+ mutex_enter(block_mutex);
-
- {
- buf_page_t* hash_bpage
-@@ -2172,35 +2260,49 @@
- while buf_pool_mutex was released.
- Free the block that was allocated. */
-
-- buf_LRU_block_free_non_file_page(block);
-- mutex_exit(&block->mutex);
-+ buf_LRU_block_free_non_file_page(block, TRUE);
-+ mutex_exit(block_mutex);
-
- block = (buf_block_t*) hash_bpage;
-+ if (block) {
-+ block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
-+ ut_a(block_mutex);
-+ }
-+ rw_lock_x_unlock(&page_hash_latch);
-+ mutex_exit(&LRU_list_mutex);
- goto loop2;
- }
- }
-
-+ mutex_enter(&buf_pool_zip_mutex);
-+
- if (UNIV_UNLIKELY
- (bpage->buf_fix_count
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
-
-+ mutex_exit(&buf_pool_zip_mutex);
- /* The block was buffer-fixed or I/O-fixed
- while buf_pool_mutex was not held by this thread.
- Free the block that was allocated and try again.
- This should be extremely unlikely. */
-
-- buf_LRU_block_free_non_file_page(block);
-- mutex_exit(&block->mutex);
-+ buf_LRU_block_free_non_file_page(block, TRUE);
-+ //mutex_exit(&block->mutex);
-
-+ rw_lock_x_unlock(&page_hash_latch);
-+ mutex_exit(&LRU_list_mutex);
- goto wait_until_unfixed;
- }
-
- /* Move the compressed page from bpage to block,
- and uncompress it. */
-
-- mutex_enter(&buf_pool_zip_mutex);
-+ mutex_enter(&flush_list_mutex);
-
- buf_relocate(bpage, &block->page);
-+
-+ rw_lock_x_unlock(&page_hash_latch);
-+
- buf_block_init_low(block);
- block->lock_hash_val = lock_rec_hash(space, offset);
-
-@@ -2209,7 +2311,7 @@
-
- if (buf_page_get_state(&block->page)
- == BUF_BLOCK_ZIP_PAGE) {
-- UT_LIST_REMOVE(list, buf_pool->zip_clean,
-+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
- &block->page);
- ut_ad(!block->page.in_flush_list);
- } else {
-@@ -2218,6 +2320,8 @@
- &block->page);
- }
-
-+ mutex_exit(&flush_list_mutex);
-+
- /* Buffer-fix, I/O-fix, and X-latch the block
- for the duration of the decompression.
- Also add the block to the unzip_LRU list. */
-@@ -2226,19 +2330,24 @@
- /* Insert at the front of unzip_LRU list */
- buf_unzip_LRU_add_block(block, FALSE);
-
-+ mutex_exit(&LRU_list_mutex);
-+
- block->page.buf_fix_count = 1;
- buf_block_set_io_fix(block, BUF_IO_READ);
- rw_lock_x_lock_func(&block->lock, 0, file, line);
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
-
-- mutex_exit(&block->mutex);
-+ mutex_exit(block_mutex);
- mutex_exit(&buf_pool_zip_mutex);
-+
-+ mutex_enter(&buf_pool_mutex);
- buf_pool->n_pend_unzip++;
-+ mutex_exit(&buf_pool_mutex);
-
-- buf_buddy_free(bpage, sizeof *bpage);
-+ buf_buddy_free(bpage, sizeof *bpage, FALSE);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- /* Decompress the page and apply buffered operations
- while not holding buf_pool_mutex or block->mutex. */
-@@ -2251,12 +2360,15 @@
- }
-
- /* Unfix and unlatch the block. */
-- buf_pool_mutex_enter();
-- mutex_enter(&block->mutex);
-+ //buf_pool_mutex_enter();
-+ block_mutex = &block->mutex;
-+ mutex_enter(block_mutex);
- block->page.buf_fix_count--;
- buf_block_set_io_fix(block, BUF_IO_NONE);
-- mutex_exit(&block->mutex);
-+
-+ mutex_enter(&buf_pool_mutex);
- buf_pool->n_pend_unzip--;
-+ mutex_exit(&buf_pool_mutex);
- rw_lock_x_unlock(&block->lock);
- break;
-
-@@ -2271,7 +2383,7 @@
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-- mutex_enter(&block->mutex);
-+ //mutex_enter(&block->mutex);
- #if UNIV_WORD_SIZE == 4
- /* On 32-bit systems, there is no padding in buf_page_t. On
- other systems, Valgrind could complain about uninitialized pad
-@@ -2305,13 +2417,14 @@
-
- buf_block_buf_fix_inc(block, file, line);
-
-- mutex_exit(&block->mutex);
-+ //mutex_exit(&block->mutex);
-
- /* Check if this is the first access to the page */
-
- access_time = buf_page_is_accessed(&block->page);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(block_mutex);
-
- buf_page_set_accessed_make_young(&block->page, access_time);
-
-@@ -2539,9 +2652,11 @@
- mutex_exit(&block->mutex);
-
- if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
- buf_LRU_make_block_young(&block->page);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- } else if (!buf_page_is_accessed(&block->page)) {
- /* Above, we do a dirty read on purpose, to avoid
- mutex contention. The field buf_page_t::access_time
-@@ -2549,9 +2664,11 @@
- field must be protected by mutex, however. */
- ulint time_ms = ut_time_ms();
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&block->mutex);
- buf_page_set_accessed(&block->page, time_ms);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&block->mutex);
- }
-
- ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
-@@ -2617,16 +2734,19 @@
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
- block = buf_block_hash_get(space_id, page_no);
-
- if (!block) {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
- return(NULL);
- }
-
- mutex_enter(&block->mutex);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-@@ -2713,7 +2833,10 @@
- {
- buf_page_t* hash_page;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
- ut_ad(mutex_own(&(block->mutex)));
- ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-
-@@ -2746,7 +2869,8 @@
- (const void*) hash_page, (const void*) block);
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(&block->mutex);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_x_unlock(&page_hash_latch);
- buf_print();
- buf_LRU_print();
- buf_validate();
-@@ -2825,16 +2949,24 @@
- ut_ad(block);
- }
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-
- if (buf_page_hash_get(space, offset)) {
- /* The page is already in the buffer pool. */
- err_exit:
- if (block) {
- mutex_enter(&block->mutex);
-- buf_LRU_block_free_non_file_page(block);
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ buf_LRU_block_free_non_file_page(block, FALSE);
- mutex_exit(&block->mutex);
- }
-+ else {
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
-
- bpage = NULL;
- goto func_exit;
-@@ -2854,6 +2986,8 @@
- mutex_enter(&block->mutex);
- buf_page_init(space, offset, block);
-
-+ rw_lock_x_unlock(&page_hash_latch);
-+
- /* The block must be put to the LRU list, to the old blocks */
- buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-
-@@ -2881,7 +3015,7 @@
- been added to buf_pool->LRU and
- buf_pool->page_hash. */
- mutex_exit(&block->mutex);
-- data = buf_buddy_alloc(zip_size, &lru);
-+ data = buf_buddy_alloc(zip_size, &lru, FALSE);
- mutex_enter(&block->mutex);
- block->page.zip.data = data;
-
-@@ -2894,6 +3028,7 @@
- buf_unzip_LRU_add_block(block, TRUE);
- }
-
-+ mutex_exit(&LRU_list_mutex);
- mutex_exit(&block->mutex);
- } else {
- /* Defer buf_buddy_alloc() until after the block has
-@@ -2905,8 +3040,8 @@
- control block (bpage), in order to avoid the
- invocation of buf_buddy_relocate_block() on
- uninitialized data. */
-- data = buf_buddy_alloc(zip_size, &lru);
-- bpage = buf_buddy_alloc(sizeof *bpage, &lru);
-+ data = buf_buddy_alloc(zip_size, &lru, TRUE);
-+ bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
-
- /* If buf_buddy_alloc() allocated storage from the LRU list,
- it released and reacquired buf_pool_mutex. Thus, we must
-@@ -2915,8 +3050,11 @@
- && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
-
- /* The block was added by some other thread. */
-- buf_buddy_free(bpage, sizeof *bpage);
-- buf_buddy_free(data, zip_size);
-+ buf_buddy_free(bpage, sizeof *bpage, TRUE);
-+ buf_buddy_free(data, zip_size, TRUE);
-+
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-
- bpage = NULL;
- goto func_exit;
-@@ -2946,18 +3084,26 @@
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
- buf_page_address_fold(space, offset), bpage);
-
-+ rw_lock_x_unlock(&page_hash_latch);
-+
- /* The block must be put to the LRU list, to the old blocks */
- buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-+ mutex_enter(&flush_list_mutex);
- buf_LRU_insert_zip_clean(bpage);
-+ mutex_exit(&flush_list_mutex);
-+
-+ mutex_exit(&LRU_list_mutex);
-
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-
- mutex_exit(&buf_pool_zip_mutex);
- }
-
-+ mutex_enter(&buf_pool_mutex);
- buf_pool->n_pend_reads++;
-+ mutex_exit(&buf_pool_mutex);
- func_exit:
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
-@@ -2995,7 +3141,9 @@
-
- free_block = buf_LRU_get_free_block(0);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-
- block = (buf_block_t*) buf_page_hash_get(space, offset);
-
-@@ -3008,7 +3156,9 @@
- #endif /* UNIV_DEBUG_FILE_ACCESSES */
-
- /* Page can be found in buf_pool */
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-
- buf_block_free(free_block);
-
-@@ -3030,6 +3180,7 @@
- mutex_enter(&block->mutex);
-
- buf_page_init(space, offset, block);
-+ rw_lock_x_unlock(&page_hash_latch);
-
- /* The block must be put to the LRU list */
- buf_LRU_add_block(&block->page, FALSE);
-@@ -3056,7 +3207,7 @@
- the reacquisition of buf_pool_mutex. We also must
- defer this operation until after the block descriptor
- has been added to buf_pool->LRU and buf_pool->page_hash. */
-- data = buf_buddy_alloc(zip_size, &lru);
-+ data = buf_buddy_alloc(zip_size, &lru, FALSE);
- mutex_enter(&block->mutex);
- block->page.zip.data = data;
-
-@@ -3074,7 +3225,8 @@
-
- buf_page_set_accessed(&block->page, time_ms);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-
- mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-
-@@ -3124,6 +3276,8 @@
- enum buf_io_fix io_type;
- const ibool uncompressed = (buf_page_get_state(bpage)
- == BUF_BLOCK_FILE_PAGE);
-+ enum buf_flush flush_type;
-+ mutex_t* block_mutex;
-
- ut_a(buf_page_in_file(bpage));
-
-@@ -3257,8 +3411,17 @@
- }
- }
-
-- buf_pool_mutex_enter();
-- mutex_enter(buf_page_get_mutex(bpage));
-+ //buf_pool_mutex_enter();
-+ if (io_type == BUF_IO_WRITE) {
-+ flush_type = buf_page_get_flush_type(bpage);
-+ /* to keep consistency at buf_LRU_insert_zip_clean() */
-+ //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
-+ mutex_enter(&LRU_list_mutex);
-+ //}
-+ }
-+ block_mutex = buf_page_get_mutex_enter(bpage);
-+ ut_a(block_mutex);
-+ mutex_enter(&buf_pool_mutex);
-
- #ifdef UNIV_IBUF_COUNT_DEBUG
- if (io_type == BUF_IO_WRITE || uncompressed) {
-@@ -3298,6 +3461,11 @@
-
- buf_flush_write_complete(bpage);
-
-+ /* to keep consistency at buf_LRU_insert_zip_clean() */
-+ //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
-+ mutex_exit(&LRU_list_mutex);
-+ //}
-+
- if (uncompressed) {
- rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_WRITE);
-@@ -3320,8 +3488,9 @@
- }
- #endif /* UNIV_DEBUG */
-
-- mutex_exit(buf_page_get_mutex(bpage));
-- buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-+ mutex_exit(block_mutex);
-+ //buf_pool_mutex_exit();
- }
-
- /*********************************************************************//**
-@@ -3368,7 +3537,8 @@
- freed = buf_LRU_search_and_free_block(100);
- }
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
- ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
-@@ -3381,7 +3551,8 @@
- memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
- buf_refresh_io_stats();
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- }
-
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-@@ -3406,7 +3577,10 @@
-
- ut_ad(buf_pool);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-+ /* for keep the new latch order, it cannot validate correctly... */
-
- chunk = buf_pool->chunks;
-
-@@ -3505,7 +3679,7 @@
- /* Check clean compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-- b = UT_LIST_GET_NEXT(list, b)) {
-+ b = UT_LIST_GET_NEXT(zip_list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- switch (buf_page_get_io_fix(b)) {
- case BUF_IO_NONE:
-@@ -3530,8 +3704,9 @@
-
- /* Check dirty compressed-only blocks. */
-
-+ mutex_enter(&flush_list_mutex);
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-- b = UT_LIST_GET_NEXT(list, b)) {
-+ b = UT_LIST_GET_NEXT(flush_list, b)) {
- ut_ad(b->in_flush_list);
-
- switch (buf_page_get_state(b)) {
-@@ -3576,6 +3751,7 @@
- }
- ut_a(buf_page_hash_get(b->space, b->offset) == b);
- }
-+ mutex_exit(&flush_list_mutex);
-
- mutex_exit(&buf_pool_zip_mutex);
-
-@@ -3587,19 +3763,27 @@
- }
-
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
-+ /* because of latching order with block->mutex, we cannot get free_list_mutex before that */
-+/*
- if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
- fprintf(stderr, "Free list len %lu, free blocks %lu\n",
- (ulong) UT_LIST_GET_LEN(buf_pool->free),
- (ulong) n_free);
- ut_error;
- }
-+*/
-+ /* because of latching order with block->mutex, we cannot get flush_list_mutex before that */
-+/*
- ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
-
- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
-+*/
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-
- ut_a(buf_LRU_validate());
- ut_a(buf_flush_validate());
-@@ -3633,7 +3817,10 @@
- index_ids = mem_alloc(sizeof(dulint) * size);
- counts = mem_alloc(sizeof(ulint) * size);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ mutex_enter(&free_list_mutex);
-+ mutex_enter(&flush_list_mutex);
-
- fprintf(stderr,
- "buf_pool size %lu\n"
-@@ -3700,7 +3887,10 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ mutex_exit(&free_list_mutex);
-+ mutex_exit(&flush_list_mutex);
-
- for (i = 0; i < n_found; i++) {
- index = dict_index_get_if_in_cache(index_ids[i]);
-@@ -3739,7 +3929,7 @@
- ulint i;
- ulint fixed_pages_number = 0;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-
- chunk = buf_pool->chunks;
-
-@@ -3773,7 +3963,7 @@
- /* Traverse the lists of clean and dirty compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
-- b = UT_LIST_GET_NEXT(list, b)) {
-+ b = UT_LIST_GET_NEXT(zip_list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
-
-@@ -3783,8 +3973,9 @@
- }
- }
-
-+ mutex_enter(&flush_list_mutex);
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
-- b = UT_LIST_GET_NEXT(list, b)) {
-+ b = UT_LIST_GET_NEXT(flush_list, b)) {
- ut_ad(b->in_flush_list);
-
- switch (buf_page_get_state(b)) {
-@@ -3807,9 +3998,10 @@
- break;
- }
- }
-+ mutex_exit(&flush_list_mutex);
-
- mutex_exit(&buf_pool_zip_mutex);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- return(fixed_pages_number);
- }
-@@ -3867,7 +4059,11 @@
-
- ut_ad(buf_pool);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ mutex_enter(&free_list_mutex);
-+ mutex_enter(&buf_pool_mutex);
-+ mutex_enter(&flush_list_mutex);
-
- fprintf(file,
- "Buffer pool size %lu\n"
-@@ -3966,7 +4162,11 @@
- buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
-
- buf_refresh_io_stats();
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ mutex_exit(&free_list_mutex);
-+ mutex_exit(&buf_pool_mutex);
-+ mutex_exit(&flush_list_mutex);
- }
-
- /**********************************************************************//**
-@@ -3993,7 +4193,7 @@
-
- ut_ad(buf_pool);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter(); /* optimistic */
-
- chunk = buf_pool->chunks;
-
-@@ -4010,7 +4210,7 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit(); /* optimistic */
-
- return(TRUE);
- }
-@@ -4026,7 +4226,8 @@
- {
- ibool ret;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
-
- if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->n_flush[BUF_FLUSH_LIST]
-@@ -4036,7 +4237,8 @@
- ret = TRUE;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- return(ret);
- }
-@@ -4051,11 +4253,13 @@
- {
- ulint len;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&free_list_mutex);
-
- len = UT_LIST_GET_LEN(buf_pool->free);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&free_list_mutex);
-
- return(len);
- }
-diff -ruN a/storage/innodb_plugin/buf/buf0flu.c b/storage/innodb_plugin/buf/buf0flu.c
---- a/storage/innodb_plugin/buf/buf0flu.c 2010-08-27 15:54:59.022021357 +0900
-+++ b/storage/innodb_plugin/buf/buf0flu.c 2010-08-27 16:11:40.607020890 +0900
-@@ -102,7 +102,8 @@
- const ib_rbt_node_t* c_node;
- const ib_rbt_node_t* p_node;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&flush_list_mutex));
-
- /* Insert this buffer into the rbt. */
- c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
-@@ -132,7 +133,8 @@
- ibool ret = FALSE;
- #endif /* UNIV_DEBUG */
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&flush_list_mutex));
- #ifdef UNIV_DEBUG
- ret =
- #endif /* UNIV_DEBUG */
-@@ -199,12 +201,14 @@
- buf_flush_init_flush_rbt(void)
- /*==========================*/
- {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&flush_list_mutex);
-
- /* Create red black tree for speedy insertions in flush list. */
- buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
- buf_flush_block_cmp);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&flush_list_mutex);
- }
-
- /********************************************************************//**
-@@ -214,7 +218,8 @@
- buf_flush_free_flush_rbt(void)
- /*==========================*/
- {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&flush_list_mutex);
-
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low());
-@@ -223,7 +228,8 @@
- rbt_free(buf_pool->flush_rbt);
- buf_pool->flush_rbt = NULL;
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&flush_list_mutex);
- }
-
- /********************************************************************//**
-@@ -234,7 +240,9 @@
- /*=============================*/
- buf_block_t* block) /*!< in/out: block which is modified */
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&block->mutex));
-+ ut_ad(mutex_own(&flush_list_mutex));
- ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
- || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
- <= block->page.oldest_modification));
-@@ -252,7 +260,7 @@
- ut_ad(!block->page.in_zip_hash);
- ut_ad(!block->page.in_flush_list);
- ut_d(block->page.in_flush_list = TRUE);
-- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
-+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
-
- #ifdef UNIV_DEBUG_VALGRIND
- {
-@@ -283,7 +291,9 @@
- buf_page_t* prev_b;
- buf_page_t* b;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&block->mutex));
-+ ut_ad(mutex_own(&flush_list_mutex));
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- ut_ad(block->page.in_LRU_list);
-@@ -324,14 +334,14 @@
- > block->page.oldest_modification) {
- ut_ad(b->in_flush_list);
- prev_b = b;
-- b = UT_LIST_GET_NEXT(list, b);
-+ b = UT_LIST_GET_NEXT(flush_list, b);
- }
- }
-
- if (prev_b == NULL) {
-- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
-+ UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
- } else {
-- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
-+ UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list,
- prev_b, &block->page);
- }
-
-@@ -352,7 +362,7 @@
- buf_page_in_file(bpage) and in the LRU list */
- {
- //ut_ad(buf_pool_mutex_own());
-- //ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- //ut_ad(bpage->in_LRU_list); /* optimistic use */
-
- if (UNIV_LIKELY(bpage->in_LRU_list && buf_page_in_file(bpage))) {
-@@ -387,12 +397,12 @@
- buf_page_in_file(bpage) */
- enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
- {
-- ut_a(buf_page_in_file(bpage));
-- ut_ad(buf_pool_mutex_own());
-+ //ut_a(buf_page_in_file(bpage));
-+ //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(flush_type == BUF_FLUSH_LRU || BUF_FLUSH_LIST);
-
-- if (bpage->oldest_modification != 0
-+ if (buf_page_in_file(bpage) && bpage->oldest_modification != 0
- && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
- ut_ad(bpage->in_flush_list);
-
-@@ -421,8 +431,11 @@
- /*=============*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-+
-+ mutex_enter(&flush_list_mutex);
-+
- ut_ad(bpage->in_flush_list);
-
- switch (buf_page_get_state(bpage)) {
-@@ -433,15 +446,16 @@
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
-+ mutex_exit(&flush_list_mutex);
- ut_error;
- return;
- case BUF_BLOCK_ZIP_DIRTY:
- buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
-- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
- buf_LRU_insert_zip_clean(bpage);
- break;
- case BUF_BLOCK_FILE_PAGE:
-- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
- break;
- }
-
-@@ -456,8 +470,9 @@
-
- bpage->oldest_modification = 0;
-
-- ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
-+ ut_d(UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
- ut_ad(ut_list_node_313->in_flush_list)));
-+ mutex_exit(&flush_list_mutex);
- }
-
- /********************************************************************//**
-@@ -474,7 +489,8 @@
- buf_page_t* prev;
- buf_page_t* prev_b = NULL;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&flush_list_mutex));
-
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
-@@ -492,18 +508,18 @@
- because we assert on in_flush_list in comparison function. */
- ut_d(bpage->in_flush_list = FALSE);
-
-- prev = UT_LIST_GET_PREV(list, bpage);
-- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-+ prev = UT_LIST_GET_PREV(flush_list, bpage);
-+ UT_LIST_REMOVE(flush_list, buf_pool->flush_list, bpage);
-
- if (prev) {
- ut_ad(prev->in_flush_list);
- UT_LIST_INSERT_AFTER(
-- list,
-+ flush_list,
- buf_pool->flush_list,
- prev, dpage);
- } else {
- UT_LIST_ADD_FIRST(
-- list,
-+ flush_list,
- buf_pool->flush_list,
- dpage);
- }
-@@ -977,7 +993,9 @@
- io_fixed and oldest_modification != 0. Thus, it cannot be
- relocated in the buffer pool or removed from flush_list or
- LRU_list. */
-- ut_ad(!buf_pool_mutex_own());
-+ //ut_ad(!buf_pool_mutex_own());
-+ ut_ad(!mutex_own(&LRU_list_mutex));
-+ ut_ad(!mutex_own(&flush_list_mutex));
- ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
- ut_ad(bpage->oldest_modification != 0);
-@@ -1137,12 +1155,19 @@
- ibool is_uncompressed;
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
-+ || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
-+#endif
- ut_ad(buf_page_in_file(bpage));
-
- block_mutex = buf_page_get_mutex(bpage);
- ut_ad(mutex_own(block_mutex));
-
-+ mutex_enter(&buf_pool_mutex);
-+ rw_lock_s_unlock(&page_hash_latch);
-+
- ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
-
- buf_page_set_io_fix(bpage, BUF_IO_WRITE);
-@@ -1173,7 +1198,8 @@
- }
-
- mutex_exit(block_mutex);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- /* Even though bpage is not protected by any mutex at
- this point, it is safe to access bpage, because it is
-@@ -1210,7 +1236,8 @@
- immediately. */
-
- mutex_exit(block_mutex);
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
- break;
-
- default:
-@@ -1275,7 +1302,8 @@
- high = fil_space_get_size(space);
- }
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-
- for (i = low; i < high; i++) {
-
-@@ -1294,11 +1322,9 @@
- if (flush_type != BUF_FLUSH_LRU
- || i == offset
- || buf_page_is_old(bpage)) {
-- mutex_t* block_mutex = buf_page_get_mutex(bpage);
--
-- mutex_enter(block_mutex);
-+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
-
-- if (buf_flush_ready_for_flush(bpage, flush_type)
-+ if (block_mutex && buf_flush_ready_for_flush(bpage, flush_type)
- && (i == offset || !bpage->buf_fix_count)) {
- /* We only try to flush those
- neighbors != offset where the buf fix count is
-@@ -1312,14 +1338,16 @@
- ut_ad(!mutex_own(block_mutex));
- count++;
-
-- buf_pool_mutex_enter();
-- } else {
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-+ } else if (block_mutex) {
- mutex_exit(block_mutex);
- }
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- return(count);
- }
-@@ -1350,9 +1378,11 @@
- min_n), otherwise ignored */
- {
- buf_page_t* bpage;
-+ buf_page_t* prev_bpage = NULL;
- ulint page_count = 0;
- ulint space;
- ulint offset;
-+ ulint remaining = 0;
-
- ut_ad((flush_type == BUF_FLUSH_LRU)
- || (flush_type == BUF_FLUSH_LIST));
-@@ -1360,20 +1390,28 @@
- ut_ad((flush_type != BUF_FLUSH_LIST)
- || sync_thread_levels_empty_gen(TRUE));
- #endif /* UNIV_SYNC_DEBUG */
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
-
- if ((buf_pool->n_flush[flush_type] > 0)
- || (buf_pool->init_flush[flush_type] == TRUE)) {
-
- /* There is already a flush batch of the same type running */
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- return(ULINT_UNDEFINED);
- }
-
- buf_pool->init_flush[flush_type] = TRUE;
-
-+ mutex_exit(&buf_pool_mutex);
-+
-+ if (flush_type == BUF_FLUSH_LRU) {
-+ mutex_enter(&LRU_list_mutex);
-+ }
-+
- for (;;) {
- flush_next:
- /* If we have flushed enough, leave the loop */
-@@ -1390,7 +1428,13 @@
- } else {
- ut_ad(flush_type == BUF_FLUSH_LIST);
-
-+ mutex_enter(&flush_list_mutex);
-+ remaining = UT_LIST_GET_LEN(buf_pool->flush_list);
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-+ if (bpage) {
-+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
-+ }
-+ mutex_exit(&flush_list_mutex);
- if (!bpage
- || bpage->oldest_modification >= lsn_limit) {
- /* We have flushed enough */
-@@ -1407,26 +1451,35 @@
- function a pointer to a block in the list! */
-
- do {
-- mutex_t*block_mutex = buf_page_get_mutex(bpage);
-+ mutex_t*block_mutex = buf_page_get_mutex_enter(bpage);
- ibool ready;
-
-- ut_a(buf_page_in_file(bpage));
-+ //ut_a(buf_page_in_file(bpage));
-
-- mutex_enter(block_mutex);
-- ready = buf_flush_ready_for_flush(bpage, flush_type);
-- mutex_exit(block_mutex);
-+ if (block_mutex) {
-+ ready = buf_flush_ready_for_flush(bpage, flush_type);
-+ mutex_exit(block_mutex);
-+ } else {
-+ ready = FALSE;
-+ }
-
- if (ready) {
- space = buf_page_get_space(bpage);
- offset = buf_page_get_page_no(bpage);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ if (flush_type == BUF_FLUSH_LRU) {
-+ mutex_exit(&LRU_list_mutex);
-+ }
-
- /* Try to flush also all the neighbors */
- page_count += buf_flush_try_neighbors(
- space, offset, flush_type, srv_flush_neighbor_pages);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ if (flush_type == BUF_FLUSH_LRU) {
-+ mutex_enter(&LRU_list_mutex);
-+ }
- goto flush_next;
-
- } else if (flush_type == BUF_FLUSH_LRU) {
-@@ -1434,16 +1487,35 @@
- } else {
- ut_ad(flush_type == BUF_FLUSH_LIST);
-
-- bpage = UT_LIST_GET_PREV(list, bpage);
-- ut_ad(!bpage || bpage->in_flush_list);
-+ mutex_enter(&flush_list_mutex);
-+ bpage = UT_LIST_GET_PREV(flush_list, bpage);
-+ //ut_ad(!bpage || bpage->in_flush_list); /* optimistic */
-+ if (bpage != prev_bpage) {
-+ /* the search may warp.. retrying */
-+ bpage = NULL;
-+ }
-+ if (bpage) {
-+ prev_bpage = UT_LIST_GET_PREV(flush_list, bpage);
-+ }
-+ mutex_exit(&flush_list_mutex);
-+ remaining--;
- }
- } while (bpage != NULL);
-
-+ if (remaining)
-+ goto flush_next;
-+
- /* If we could not find anything to flush, leave the loop */
-
- break;
- }
-
-+ if (flush_type == BUF_FLUSH_LRU) {
-+ mutex_exit(&LRU_list_mutex);
-+ }
-+
-+ mutex_enter(&buf_pool_mutex);
-+
- buf_pool->init_flush[flush_type] = FALSE;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-@@ -1453,7 +1525,8 @@
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- buf_flush_buffered_writes();
-
-@@ -1514,7 +1587,7 @@
- retry:
- //buf_pool_mutex_enter();
- if (have_LRU_mutex)
-- buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- n_replaceable = UT_LIST_GET_LEN(buf_pool->free);
-
-@@ -1531,15 +1604,15 @@
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- continue;
- }
-- block_mutex = buf_page_get_mutex(bpage);
--
-- mutex_enter(block_mutex);
-+ block_mutex = buf_page_get_mutex_enter(bpage);
-
-- if (buf_flush_ready_for_replace(bpage)) {
-+ if (block_mutex && buf_flush_ready_for_replace(bpage)) {
- n_replaceable++;
- }
-
-- mutex_exit(block_mutex);
-+ if (block_mutex) {
-+ mutex_exit(block_mutex);
-+ }
-
- distance++;
-
-@@ -1548,7 +1621,7 @@
-
- //buf_pool_mutex_exit();
- if (have_LRU_mutex)
-- buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-
- if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) {
-
-@@ -1715,7 +1788,7 @@
- buf_page_t* bpage;
- const ib_rbt_node_t* rnode = NULL;
-
-- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
-+ UT_LIST_VALIDATE(flush_list, buf_page_t, buf_pool->flush_list,
- ut_ad(ut_list_node_313->in_flush_list));
-
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-@@ -1730,7 +1803,7 @@
- while (bpage != NULL) {
- const ib_uint64_t om = bpage->oldest_modification;
- ut_ad(bpage->in_flush_list);
-- ut_a(buf_page_in_file(bpage));
-+ //ut_a(buf_page_in_file(bpage)); /* optimistic */
- ut_a(om > 0);
-
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
-@@ -1742,7 +1815,7 @@
- rnode = rbt_next(buf_pool->flush_rbt, rnode);
- }
-
-- bpage = UT_LIST_GET_NEXT(list, bpage);
-+ bpage = UT_LIST_GET_NEXT(flush_list, bpage);
-
- ut_a(!bpage || om >= bpage->oldest_modification);
- }
-@@ -1764,11 +1837,13 @@
- {
- ibool ret;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&flush_list_mutex);
-
- ret = buf_flush_validate_low();
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&flush_list_mutex);
-
- return(ret);
- }
-diff -ruN a/storage/innodb_plugin/buf/buf0lru.c b/storage/innodb_plugin/buf/buf0lru.c
---- a/storage/innodb_plugin/buf/buf0lru.c 2010-08-27 15:54:59.025058614 +0900
-+++ b/storage/innodb_plugin/buf/buf0lru.c 2010-08-27 16:11:40.611021077 +0900
-@@ -145,8 +145,9 @@
- void
- buf_LRU_block_free_hashed_page(
- /*===========================*/
-- buf_block_t* block); /*!< in: block, must contain a file page and
-+ buf_block_t* block, /*!< in: block, must contain a file page and
- be in a state where it can be freed */
-+ ibool have_page_hash_mutex);
-
- /******************************************************************//**
- Determines if the unzip_LRU list should be used for evicting a victim
-@@ -154,16 +155,21 @@
- @return TRUE if should use unzip_LRU */
- UNIV_INLINE
- ibool
--buf_LRU_evict_from_unzip_LRU(void)
-+buf_LRU_evict_from_unzip_LRU(
-+ ibool have_LRU_mutex)
- /*==============================*/
- {
- ulint io_avg;
- ulint unzip_avg;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
-+ if (!have_LRU_mutex)
-+ mutex_enter(&LRU_list_mutex);
- /* If the unzip_LRU list is empty, we can only use the LRU. */
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
-+ if (!have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
- return(FALSE);
- }
-
-@@ -172,14 +178,20 @@
- decompressed pages in the buffer pool. */
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
- <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
-+ if (!have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
- return(FALSE);
- }
-
- /* If eviction hasn't started yet, we assume by default
- that a workload is disk bound. */
- if (buf_pool->freed_page_clock == 0) {
-+ if (!have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
- return(TRUE);
- }
-+ if (!have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
-
- /* Calculate the average over past intervals, and add the values
- of the current interval. */
-@@ -245,19 +257,23 @@
-
- page_arr = ut_malloc(sizeof(ulint)
- * BUF_LRU_DROP_SEARCH_HASH_SIZE);
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- scan_again:
- num_entries = 0;
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (bpage != NULL) {
-- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
- buf_page_t* prev_bpage;
-
-- mutex_enter(block_mutex);
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
-+ if (!block_mutex) {
-+ goto next_page;
-+ }
-+
- ut_a(buf_page_in_file(bpage));
-
- if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
-@@ -285,12 +301,14 @@
- }
- /* Array full. We release the buf_pool_mutex to
- obey the latching order. */
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-
- buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
- num_entries);
- num_entries = 0;
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
- } else {
- mutex_exit(block_mutex);
- }
-@@ -315,7 +333,8 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-
- /* Drop any remaining batch of search hashed pages. */
- buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
-@@ -343,7 +362,9 @@
- buf_LRU_drop_page_hash_for_tablespace(id);
-
- scan_again:
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-
- all_freed = TRUE;
-
-@@ -371,8 +392,16 @@
-
- all_freed = FALSE;
- } else {
-- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-- mutex_enter(block_mutex);
-+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
-+
-+ if (!block_mutex) {
-+ /* It may be impossible case...
-+ Something wrong, so will be scan_again */
-+
-+ all_freed = FALSE;
-+
-+ goto next_page_no_mutex;
-+ }
-
- if (bpage->buf_fix_count > 0) {
-
-@@ -431,7 +460,9 @@
- ulint page_no;
- ulint zip_size;
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-
- zip_size = buf_page_get_zip_size(bpage);
- page_no = buf_page_get_page_no(bpage);
-@@ -456,7 +487,7 @@
- if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
- != BUF_BLOCK_ZIP_FREE) {
- buf_LRU_block_free_hashed_page((buf_block_t*)
-- bpage);
-+ bpage, TRUE);
- } else {
- /* The block_mutex should have been
- released by buf_LRU_block_remove_hashed_page()
-@@ -488,7 +519,9 @@
- bpage = prev_bpage;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-
- if (!all_freed) {
- os_thread_sleep(20000);
-@@ -507,7 +540,9 @@
- {
- buf_page_t* b;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-+ ut_ad(mutex_own(&flush_list_mutex));
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
-
- /* Find the first successor of bpage in the LRU list
-@@ -515,17 +550,17 @@
- b = bpage;
- do {
- b = UT_LIST_GET_NEXT(LRU, b);
-- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
-+ } while (b && (buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE || !b->in_LRU_list));
-
- /* Insert bpage before b, i.e., after the predecessor of b. */
- if (b) {
-- b = UT_LIST_GET_PREV(list, b);
-+ b = UT_LIST_GET_PREV(zip_list, b);
- }
-
- if (b) {
-- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
-+ UT_LIST_INSERT_AFTER(zip_list, buf_pool->zip_clean, b, bpage);
- } else {
-- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
-+ UT_LIST_ADD_FIRST(zip_list, buf_pool->zip_clean, bpage);
- }
- }
-
-@@ -537,16 +572,17 @@
- ibool
- buf_LRU_free_from_unzip_LRU_list(
- /*=============================*/
-- ulint n_iterations) /*!< in: how many times this has been called
-+ ulint n_iterations, /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; we will search
- n_iterations / 5 of the unzip_LRU list,
- or nothing if n_iterations >= 5 */
-+ ibool have_LRU_mutex)
- {
- buf_block_t* block;
- ulint distance;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own()); /* optimistic */
-
- /* Theoratically it should be much easier to find a victim
- from unzip_LRU as we can choose even a dirty block (as we'll
-@@ -556,7 +592,7 @@
- if we have done five iterations so far. */
-
- if (UNIV_UNLIKELY(n_iterations >= 5)
-- || !buf_LRU_evict_from_unzip_LRU()) {
-+ || !buf_LRU_evict_from_unzip_LRU(have_LRU_mutex)) {
-
- return(FALSE);
- }
-@@ -564,18 +600,25 @@
- distance = 100 + (n_iterations
- * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
-
-+restart:
- for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
- UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
- block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
-
- enum buf_lru_free_block_status freed;
-
-+ mutex_enter(&block->mutex);
-+ if (!block->in_unzip_LRU_list || !block->page.in_LRU_list
-+ || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-+ mutex_exit(&block->mutex);
-+ goto restart;
-+ }
-+
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->in_unzip_LRU_list);
- ut_ad(block->page.in_LRU_list);
-
-- mutex_enter(&block->mutex);
-- freed = buf_LRU_free_block(&block->page, FALSE, NULL);
-+ freed = buf_LRU_free_block(&block->page, FALSE, NULL, have_LRU_mutex);
- mutex_exit(&block->mutex);
-
- switch (freed) {
-@@ -608,20 +651,22 @@
- ibool
- buf_LRU_free_from_common_LRU_list(
- /*==============================*/
-- ulint n_iterations) /*!< in: how many times this has been called
-+ ulint n_iterations, /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if
- n_iterations < 10, then we search
- n_iterations / 10 * buf_pool->curr_size
- pages from the end of the LRU list */
-+ ibool have_LRU_mutex)
- {
- buf_page_t* bpage;
- ulint distance;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own()); /* optimistic */
-
- distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
-
-+restart:
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
- bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
-@@ -629,14 +674,23 @@
- enum buf_lru_free_block_status freed;
- unsigned accessed;
- mutex_t* block_mutex
-- = buf_page_get_mutex(bpage);
-+ = buf_page_get_mutex_enter(bpage);
-+
-+ if (!block_mutex) {
-+ goto restart;
-+ }
-+
-+ if (!bpage->in_LRU_list
-+ || !buf_page_in_file(bpage)) {
-+ mutex_exit(block_mutex);
-+ goto restart;
-+ }
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
-- mutex_enter(block_mutex);
- accessed = buf_page_is_accessed(bpage);
-- freed = buf_LRU_free_block(bpage, TRUE, NULL);
-+ freed = buf_LRU_free_block(bpage, TRUE, NULL, have_LRU_mutex);
- mutex_exit(block_mutex);
-
- switch (freed) {
-@@ -685,22 +739,33 @@
- n_iterations / 5 of the unzip_LRU list. */
- {
- ibool freed = FALSE;
-+ ibool have_LRU_mutex = FALSE;
-+
-+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
-+ have_LRU_mutex = TRUE;
-
-- buf_pool_mutex_enter();
-+ /* optimistic search... */
-+ //buf_pool_mutex_enter();
-+ if (have_LRU_mutex)
-+ mutex_enter(&LRU_list_mutex);
-
-- freed = buf_LRU_free_from_unzip_LRU_list(n_iterations);
-+ freed = buf_LRU_free_from_unzip_LRU_list(n_iterations, have_LRU_mutex);
-
- if (!freed) {
-- freed = buf_LRU_free_from_common_LRU_list(n_iterations);
-+ freed = buf_LRU_free_from_common_LRU_list(n_iterations, have_LRU_mutex);
- }
-
-+ mutex_enter(&buf_pool_mutex);
- if (!freed) {
- buf_pool->LRU_flush_ended = 0;
- } else if (buf_pool->LRU_flush_ended > 0) {
- buf_pool->LRU_flush_ended--;
- }
-+ mutex_exit(&buf_pool_mutex);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ if (have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
-
- return(freed);
- }
-@@ -718,18 +783,22 @@
- buf_LRU_try_free_flushed_blocks(void)
- /*=================================*/
- {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
-
- while (buf_pool->LRU_flush_ended > 0) {
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- buf_LRU_search_and_free_block(1);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
- }
-
- /******************************************************************//**
-@@ -744,7 +813,9 @@
- {
- ibool ret = FALSE;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-+ mutex_enter(&free_list_mutex);
-
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 4) {
-@@ -752,7 +823,9 @@
- ret = TRUE;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ mutex_exit(&free_list_mutex);
-
- return(ret);
- }
-@@ -768,9 +841,10 @@
- {
- buf_block_t* block;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
-- block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
-+ mutex_enter(&free_list_mutex);
-+ block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
-
- if (block) {
- ut_ad(block->page.in_free_list);
-@@ -778,7 +852,9 @@
- ut_ad(!block->page.in_flush_list);
- ut_ad(!block->page.in_LRU_list);
- ut_a(!buf_page_in_file(&block->page));
-- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
-+ UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
-+
-+ mutex_exit(&free_list_mutex);
-
- mutex_enter(&block->mutex);
-
-@@ -786,6 +862,8 @@
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-
- mutex_exit(&block->mutex);
-+ } else {
-+ mutex_exit(&free_list_mutex);
- }
-
- return(block);
-@@ -809,7 +887,7 @@
- ibool mon_value_was = FALSE;
- ibool started_monitor = FALSE;
- loop:
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
-@@ -889,14 +967,16 @@
- if (UNIV_UNLIKELY(zip_size)) {
- ibool lru;
- page_zip_set_size(&block->page.zip, zip_size);
-- block->page.zip.data = buf_buddy_alloc(zip_size, &lru);
-+ mutex_enter(&LRU_list_mutex);
-+ block->page.zip.data = buf_buddy_alloc(zip_size, &lru, FALSE);
-+ mutex_exit(&LRU_list_mutex);
- UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
- } else {
- page_zip_set_size(&block->page.zip, 0);
- block->page.zip.data = NULL;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- if (started_monitor) {
- srv_print_innodb_monitor = mon_value_was;
-@@ -908,7 +988,7 @@
- /* If no block was in the free list, search from the end of the LRU
- list and try to free a block there */
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-
- freed = buf_LRU_search_and_free_block(n_iterations);
-
-@@ -957,18 +1037,21 @@
-
- os_aio_simulated_wake_handler_threads();
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
-
- if (buf_pool->LRU_flush_ended > 0) {
- /* We have written pages in an LRU flush. To make the insert
- buffer more efficient, we try to move these pages to the free
- list. */
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- buf_LRU_try_free_flushed_blocks();
- } else {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
- }
-
- if (n_iterations > 10) {
-@@ -993,7 +1076,8 @@
- ulint new_len;
-
- ut_a(buf_pool->LRU_old);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
- ut_ad(buf_LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
- ut_ad(buf_LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
- #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
-@@ -1058,7 +1142,8 @@
- {
- buf_page_t* bpage;
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
-
- /* We first initialize all blocks in the LRU list as old and then use
-@@ -1091,13 +1176,14 @@
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_page_in_file(bpage));
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_block_t* block = (buf_block_t*) bpage;
-
- ut_ad(block->in_unzip_LRU_list);
-- ut_d(block->in_unzip_LRU_list = FALSE);
-+ block->in_unzip_LRU_list = FALSE;
-
- UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
- }
-@@ -1113,7 +1199,8 @@
- {
- ut_ad(buf_pool);
- ut_ad(bpage);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-
- ut_a(buf_page_in_file(bpage));
-
-@@ -1188,12 +1275,13 @@
- {
- ut_ad(buf_pool);
- ut_ad(block);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-
- ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
-
- ut_ad(!block->in_unzip_LRU_list);
-- ut_d(block->in_unzip_LRU_list = TRUE);
-+ block->in_unzip_LRU_list = TRUE;
-
- if (old) {
- UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
-@@ -1212,7 +1300,8 @@
- {
- ut_ad(buf_pool);
- ut_ad(bpage);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-
- ut_a(buf_page_in_file(bpage));
-
-@@ -1261,7 +1350,8 @@
- {
- ut_ad(buf_pool);
- ut_ad(bpage);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-
- ut_a(buf_page_in_file(bpage));
- ut_ad(!bpage->in_LRU_list);
-@@ -1338,7 +1428,8 @@
- /*=====================*/
- buf_page_t* bpage) /*!< in: control block */
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-
- if (bpage->old) {
- buf_pool->stat.n_pages_made_young++;
-@@ -1380,18 +1471,19 @@
- buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip, /*!< in: TRUE if should remove also the
- compressed page of an uncompressed page */
-- ibool* buf_pool_mutex_released)
-+ ibool* buf_pool_mutex_released,
- /*!< in: pointer to a variable that will
- be assigned TRUE if buf_pool_mutex
- was temporarily released, or NULL */
-+ ibool have_LRU_mutex)
- {
- buf_page_t* b = NULL;
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(block_mutex));
- ut_ad(buf_page_in_file(bpage));
-- ut_ad(bpage->in_LRU_list);
-+ //ut_ad(bpage->in_LRU_list);
- ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
- #if UNIV_WORD_SIZE == 4
- /* On 32-bit systems, there is no padding in buf_page_t. On
-@@ -1400,7 +1492,7 @@
- UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
- #endif
-
-- if (!buf_page_can_relocate(bpage)) {
-+ if (!bpage->in_LRU_list || !block_mutex || !buf_page_can_relocate(bpage)) {
-
- /* Do not free buffer-fixed or I/O-fixed blocks. */
- return(BUF_LRU_NOT_FREED);
-@@ -1432,15 +1524,15 @@
- If it cannot be allocated (without freeing a block
- from the LRU list), refuse to free bpage. */
- alloc:
-- buf_pool_mutex_exit_forbid();
-- b = buf_buddy_alloc(sizeof *b, NULL);
-- buf_pool_mutex_exit_allow();
-+ //buf_pool_mutex_exit_forbid();
-+ b = buf_buddy_alloc(sizeof *b, NULL, FALSE);
-+ //buf_pool_mutex_exit_allow();
-
- if (UNIV_UNLIKELY(!b)) {
- return(BUF_LRU_CANNOT_RELOCATE);
- }
-
-- memcpy(b, bpage, sizeof *b);
-+ //memcpy(b, bpage, sizeof *b);
- }
-
- #ifdef UNIV_DEBUG
-@@ -1451,6 +1543,39 @@
- }
- #endif /* UNIV_DEBUG */
-
-+ /* not to break latch order, must re-enter block_mutex */
-+ mutex_exit(block_mutex);
-+
-+ if (!have_LRU_mutex)
-+ mutex_enter(&LRU_list_mutex); /* optimistic */
-+ rw_lock_x_lock(&page_hash_latch);
-+ mutex_enter(block_mutex);
-+
-+ /* recheck states of block */
-+ if (!bpage->in_LRU_list || block_mutex != buf_page_get_mutex(bpage)
-+ || !buf_page_can_relocate(bpage)) {
-+not_freed:
-+ if (b) {
-+ buf_buddy_free(b, sizeof *b, TRUE);
-+ }
-+ if (!have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ return(BUF_LRU_NOT_FREED);
-+ } else if (zip || !bpage->zip.data) {
-+ if (bpage->oldest_modification)
-+ goto not_freed;
-+ } else if (bpage->oldest_modification) {
-+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
-+ ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
-+ goto not_freed;
-+ }
-+ }
-+
-+ if (b) {
-+ memcpy(b, bpage, sizeof *b);
-+ }
-+
- if (buf_LRU_block_remove_hashed_page(bpage, zip)
- != BUF_BLOCK_ZIP_FREE) {
- ut_a(bpage->buf_fix_count == 0);
-@@ -1462,6 +1587,10 @@
-
- ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
-
-+ while (prev_b && !prev_b->in_LRU_list) {
-+ prev_b = UT_LIST_GET_PREV(LRU, prev_b);
-+ }
-+
- b->state = b->oldest_modification
- ? BUF_BLOCK_ZIP_DIRTY
- : BUF_BLOCK_ZIP_PAGE;
-@@ -1537,12 +1666,14 @@
- buf_LRU_add_block_low(b, buf_page_is_old(b));
- }
-
-+ mutex_enter(&flush_list_mutex);
- if (b->state == BUF_BLOCK_ZIP_PAGE) {
- buf_LRU_insert_zip_clean(b);
- } else {
- /* Relocate on buf_pool->flush_list. */
- buf_flush_relocate_on_flush_list(bpage, b);
- }
-+ mutex_exit(&flush_list_mutex);
-
- bpage->zip.data = NULL;
- page_zip_set_size(&bpage->zip, 0);
-@@ -1558,7 +1689,9 @@
- *buf_pool_mutex_released = TRUE;
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
- mutex_exit(block_mutex);
-
- /* Remove possible adaptive hash index on the page.
-@@ -1590,7 +1723,9 @@
- : BUF_NO_CHECKSUM_MAGIC);
- }
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ if (have_LRU_mutex)
-+ mutex_enter(&LRU_list_mutex);
- mutex_enter(block_mutex);
-
- if (b) {
-@@ -1600,13 +1735,17 @@
- mutex_exit(&buf_pool_zip_mutex);
- }
-
-- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
-+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage, FALSE);
- } else {
- /* The block_mutex should have been released by
- buf_LRU_block_remove_hashed_page() when it returns
- BUF_BLOCK_ZIP_FREE. */
- ut_ad(block_mutex == &buf_pool_zip_mutex);
- mutex_enter(block_mutex);
-+
-+ if (!have_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
- }
-
- return(BUF_LRU_FREED);
-@@ -1618,12 +1757,13 @@
- void
- buf_LRU_block_free_non_file_page(
- /*=============================*/
-- buf_block_t* block) /*!< in: block, must not contain a file page */
-+ buf_block_t* block, /*!< in: block, must not contain a file page */
-+ ibool have_page_hash_mutex)
- {
- void* data;
-
- ut_ad(block);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(&block->mutex));
-
- switch (buf_block_get_state(block)) {
-@@ -1657,15 +1797,17 @@
- if (data) {
- block->page.zip.data = NULL;
- mutex_exit(&block->mutex);
-- buf_pool_mutex_exit_forbid();
-- buf_buddy_free(data, page_zip_get_size(&block->page.zip));
-- buf_pool_mutex_exit_allow();
-+ //buf_pool_mutex_exit_forbid();
-+ buf_buddy_free(data, page_zip_get_size(&block->page.zip), have_page_hash_mutex);
-+ //buf_pool_mutex_exit_allow();
- mutex_enter(&block->mutex);
- page_zip_set_size(&block->page.zip, 0);
- }
-
-- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
-+ mutex_enter(&free_list_mutex);
-+ UT_LIST_ADD_FIRST(free, buf_pool->free, (&block->page));
- ut_d(block->page.in_free_list = TRUE);
-+ mutex_exit(&free_list_mutex);
-
- UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
- }
-@@ -1692,7 +1834,11 @@
- {
- const buf_page_t* hashed_bpage;
- ut_ad(bpage);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
-+#ifdef UNIV_SYNC_DEBUG
-+ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
-+#endif
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
-@@ -1798,7 +1944,9 @@
-
- #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(buf_page_get_mutex(bpage));
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
- buf_print();
- buf_LRU_print();
- buf_validate();
-@@ -1821,14 +1969,14 @@
- ut_a(bpage->zip.data);
- ut_a(buf_page_get_zip_size(bpage));
-
-- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
-+ UT_LIST_REMOVE(zip_list, buf_pool->zip_clean, bpage);
-
- mutex_exit(&buf_pool_zip_mutex);
-- buf_pool_mutex_exit_forbid();
-+ //buf_pool_mutex_exit_forbid();
- buf_buddy_free(bpage->zip.data,
-- page_zip_get_size(&bpage->zip));
-- buf_buddy_free(bpage, sizeof(*bpage));
-- buf_pool_mutex_exit_allow();
-+ page_zip_get_size(&bpage->zip), TRUE);
-+ buf_buddy_free(bpage, sizeof(*bpage), TRUE);
-+ //buf_pool_mutex_exit_allow();
- UNIV_MEM_UNDESC(bpage);
- return(BUF_BLOCK_ZIP_FREE);
-
-@@ -1850,9 +1998,9 @@
- ut_ad(!bpage->in_flush_list);
- ut_ad(!bpage->in_LRU_list);
- mutex_exit(&((buf_block_t*) bpage)->mutex);
-- buf_pool_mutex_exit_forbid();
-- buf_buddy_free(data, page_zip_get_size(&bpage->zip));
-- buf_pool_mutex_exit_allow();
-+ //buf_pool_mutex_exit_forbid();
-+ buf_buddy_free(data, page_zip_get_size(&bpage->zip), TRUE);
-+ //buf_pool_mutex_exit_allow();
- mutex_enter(&((buf_block_t*) bpage)->mutex);
- page_zip_set_size(&bpage->zip, 0);
- }
-@@ -1878,15 +2026,16 @@
- void
- buf_LRU_block_free_hashed_page(
- /*===========================*/
-- buf_block_t* block) /*!< in: block, must contain a file page and
-+ buf_block_t* block, /*!< in: block, must contain a file page and
- be in a state where it can be freed */
-+ ibool have_page_hash_mutex)
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(&block->mutex));
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
-- buf_LRU_block_free_non_file_page(block);
-+ buf_LRU_block_free_non_file_page(block, have_page_hash_mutex);
- }
-
- /**********************************************************************//**
-@@ -1912,7 +2061,8 @@
- }
-
- if (adjust) {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- if (ratio != buf_LRU_old_ratio) {
- buf_LRU_old_ratio = ratio;
-@@ -1923,7 +2073,8 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- } else {
- buf_LRU_old_ratio = ratio;
- }
-@@ -1948,7 +2099,8 @@
- goto func_exit;
- }
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
-
- /* Update the index. */
- item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
-@@ -1962,7 +2114,8 @@
- /* Put current entry in the array. */
- memcpy(item, &buf_LRU_stat_cur, sizeof *item);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- func_exit:
- /* Clear the current entry. */
-@@ -1984,7 +2137,8 @@
- ulint new_len;
-
- ut_ad(buf_pool);
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
-@@ -2044,16 +2198,22 @@
-
- ut_a(buf_pool->LRU_old_len == old_len);
-
-- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
-+ mutex_exit(&LRU_list_mutex);
-+ mutex_enter(&free_list_mutex);
-+
-+ UT_LIST_VALIDATE(free, buf_page_t, buf_pool->free,
- ut_ad(ut_list_node_313->in_free_list));
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
- bpage != NULL;
-- bpage = UT_LIST_GET_NEXT(list, bpage)) {
-+ bpage = UT_LIST_GET_NEXT(free, bpage)) {
-
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
- }
-
-+ mutex_exit(&free_list_mutex);
-+ mutex_enter(&LRU_list_mutex);
-+
- UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
- ut_ad(ut_list_node_313->in_unzip_LRU_list
- && ut_list_node_313->page.in_LRU_list));
-@@ -2067,7 +2227,8 @@
- ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- return(TRUE);
- }
- #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-@@ -2083,7 +2244,8 @@
- const buf_page_t* bpage;
-
- ut_ad(buf_pool);
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&LRU_list_mutex);
-
- bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
-@@ -2140,6 +2302,7 @@
- bpage = UT_LIST_GET_NEXT(LRU, bpage);
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&LRU_list_mutex);
- }
- #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-diff -ruN a/storage/innodb_plugin/buf/buf0rea.c b/storage/innodb_plugin/buf/buf0rea.c
---- a/storage/innodb_plugin/buf/buf0rea.c 2010-08-27 15:54:59.027059378 +0900
-+++ b/storage/innodb_plugin/buf/buf0rea.c 2010-08-27 16:11:40.614021339 +0900
-@@ -290,10 +290,12 @@
-
- tablespace_version = fil_space_get_version(space);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&buf_pool_mutex);
-
- if (high > fil_space_get_size(space)) {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
- /* The area is not whole, return */
-
- return(0);
-@@ -301,10 +303,12 @@
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&buf_pool_mutex);
-
- return(0);
- }
-+ mutex_exit(&buf_pool_mutex);
-
- /* Check that almost all pages in the area have been accessed; if
- offset == low, the accesses must be in a descending order, otherwise,
-@@ -323,6 +327,7 @@
-
- fail_count = 0;
-
-+ rw_lock_s_lock(&page_hash_latch);
- for (i = low; i < high; i++) {
- bpage = buf_page_hash_get(space, i);
-
-@@ -350,7 +355,8 @@
-
- if (fail_count > threshold) {
- /* Too many failures: return */
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
- return(0);
- }
-
-@@ -365,7 +371,8 @@
- bpage = buf_page_hash_get(space, offset);
-
- if (bpage == NULL) {
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- return(0);
- }
-@@ -391,7 +398,8 @@
- pred_offset = fil_page_get_prev(frame);
- succ_offset = fil_page_get_next(frame);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- if ((offset == low) && (succ_offset == offset + 1)) {
-
-diff -ruN a/storage/innodb_plugin/handler/i_s.cc b/storage/innodb_plugin/handler/i_s.cc
---- a/storage/innodb_plugin/handler/i_s.cc 2010-08-27 15:59:21.753412068 +0900
-+++ b/storage/innodb_plugin/handler/i_s.cc 2010-08-27 16:11:40.617020805 +0900
-@@ -2230,7 +2230,8 @@
-
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&zip_free_mutex);
-
- for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
- buf_buddy_stat_t* buddy_stat = &buf_buddy_stat[x];
-@@ -2256,7 +2257,8 @@
- }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&zip_free_mutex);
- DBUG_RETURN(status);
- }
-
-diff -ruN a/storage/innodb_plugin/handler/innodb_patch_info.h b/storage/innodb_plugin/handler/innodb_patch_info.h
---- a/storage/innodb_plugin/handler/innodb_patch_info.h 2010-08-27 16:11:12.167183642 +0900
-+++ b/storage/innodb_plugin/handler/innodb_patch_info.h 2010-08-27 16:11:40.614990183 +0900
-@@ -33,5 +33,6 @@
- {"innodb_overwrite_relay_log_info","overwrite relay-log.info when slave recovery","Building as plugin, it is not used.","http://www.percona.com/docs/wiki/percona-xtradb:innodb_overwrite_relay_log_info"},
- {"innodb_thread_concurrency_timer_based","use InnoDB timer based concurrency throttling (backport from MySQL 5.4.0)","",""},
- {"innodb_dict_size_limit","Limit dictionary cache size","Variable innodb_dict_size_limit in bytes","http://www.percona.com/docs/wiki/percona-xtradb"},
-+{"innodb_split_buf_pool_mutex","More fix of buffer_pool mutex","Spliting buf_pool_mutex and optimizing based on innodb_opt_lru_count","http://www.percona.com/docs/wiki/percona-xtradb"},
- {NULL, NULL, NULL, NULL}
- };
-diff -ruN a/storage/innodb_plugin/include/buf0buddy.h b/storage/innodb_plugin/include/buf0buddy.h
---- a/storage/innodb_plugin/include/buf0buddy.h 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0buddy.h 2010-08-27 16:11:40.618988049 +0900
-@@ -49,10 +49,11 @@
- buf_buddy_alloc(
- /*============*/
- ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
-- ibool* lru) /*!< in: pointer to a variable that will be assigned
-+ ibool* lru, /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
- or NULL if the LRU list should not be used */
-+ ibool have_page_hash_mutex)
- __attribute__((malloc));
-
- /**********************************************************************//**
-@@ -63,7 +64,8 @@
- /*===========*/
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
-- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
-+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
-+ ibool have_page_hash_mutex)
- __attribute__((nonnull));
-
- /** Statistics of buddy blocks of a given size. */
-diff -ruN a/storage/innodb_plugin/include/buf0buddy.ic b/storage/innodb_plugin/include/buf0buddy.ic
---- a/storage/innodb_plugin/include/buf0buddy.ic 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0buddy.ic 2010-08-27 16:11:40.619989772 +0900
-@@ -44,10 +44,11 @@
- /*================*/
- ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
-- ibool* lru) /*!< in: pointer to a variable that will be assigned
-+ ibool* lru, /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
- or NULL if the LRU list should not be used */
-+ ibool have_page_hash_mutex)
- __attribute__((malloc));
-
- /**********************************************************************//**
-@@ -58,8 +59,9 @@
- /*===============*/
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
-- ulint i) /*!< in: index of buf_pool->zip_free[],
-+ ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
-+ ibool have_page_hash_mutex)
- __attribute__((nonnull));
-
- /**********************************************************************//**
-@@ -96,14 +98,15 @@
- buf_buddy_alloc(
- /*============*/
- ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
-- ibool* lru) /*!< in: pointer to a variable that will be assigned
-+ ibool* lru, /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
- or NULL if the LRU list should not be used */
-+ ibool have_page_hash_mutex)
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
-- return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
-+ return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
- }
-
- /**********************************************************************//**
-@@ -114,11 +117,24 @@
- /*===========*/
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
-- ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
-+ ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
-+ ibool have_page_hash_mutex)
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
-- buf_buddy_free_low(buf, buf_buddy_get_slot(size));
-+ if (!have_page_hash_mutex) {
-+ mutex_enter(&LRU_list_mutex);
-+ rw_lock_x_lock(&page_hash_latch);
-+ }
-+
-+ mutex_enter(&zip_free_mutex);
-+ buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
-+ mutex_exit(&zip_free_mutex);
-+
-+ if (!have_page_hash_mutex) {
-+ mutex_exit(&LRU_list_mutex);
-+ rw_lock_x_unlock(&page_hash_latch);
-+ }
- }
-
- #ifdef UNIV_MATERIALIZE
-diff -ruN a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h
---- a/storage/innodb_plugin/include/buf0buf.h 2010-08-27 15:55:39.399063353 +0900
-+++ b/storage/innodb_plugin/include/buf0buf.h 2010-08-27 16:11:40.622020552 +0900
-@@ -713,6 +713,15 @@
- const buf_page_t* bpage) /*!< in: pointer to control block */
- __attribute__((pure));
-
-+/*************************************************************************
-+Gets the mutex of a block and enter the mutex with consistency. */
-+UNIV_INLINE
-+mutex_t*
-+buf_page_get_mutex_enter(
-+/*=========================*/
-+ const buf_page_t* bpage) /*!< in: pointer to control block */
-+ __attribute__((pure));
-+
- /*********************************************************************//**
- Get the flush type of a page.
- @return flush type */
-@@ -1066,7 +1075,7 @@
- All these are protected by buf_pool_mutex. */
- /* @{ */
-
-- UT_LIST_NODE_T(buf_page_t) list;
-+ /* UT_LIST_NODE_T(buf_page_t) list; */
- /*!< based on state, this is a
- list node, protected only by
- buf_pool_mutex, in one of the
-@@ -1086,6 +1095,10 @@
- BUF_BLOCK_REMOVE_HASH or
- BUF_BLOCK_READY_IN_USE. */
-
-+ /* resplit for optimistic use */
-+ UT_LIST_NODE_T(buf_page_t) free;
-+ UT_LIST_NODE_T(buf_page_t) flush_list;
-+ UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
- #ifdef UNIV_DEBUG
- ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
- when buf_pool_mutex is free, the
-@@ -1166,11 +1179,11 @@
- a block is in the unzip_LRU list
- if page.state == BUF_BLOCK_FILE_PAGE
- and page.zip.data != NULL */
--#ifdef UNIV_DEBUG
-+//#ifdef UNIV_DEBUG
- ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
- decompressed LRU list;
- used in debugging */
--#endif /* UNIV_DEBUG */
-+//#endif /* UNIV_DEBUG */
- mutex_t mutex; /*!< mutex protecting this block:
- state (also protected by the buffer
- pool mutex), io_fix, buf_fix_count,
-@@ -1446,6 +1459,12 @@
- /** mutex protecting the buffer pool struct and control blocks, except the
- read-write lock in them */
- extern mutex_t buf_pool_mutex;
-+extern mutex_t LRU_list_mutex;
-+extern mutex_t flush_list_mutex;
-+extern rw_lock_t page_hash_latch;
-+extern mutex_t free_list_mutex;
-+extern mutex_t zip_free_mutex;
-+extern mutex_t zip_hash_mutex;
- /** mutex protecting the control blocks of compressed-only pages
- (of type buf_page_t, not buf_block_t) */
- extern mutex_t buf_pool_zip_mutex;
-diff -ruN a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic
---- a/storage/innodb_plugin/include/buf0buf.ic 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0buf.ic 2010-08-27 16:11:40.624990413 +0900
-@@ -121,7 +121,9 @@
- buf_page_t* bpage;
- ib_uint64_t lsn;
-
-- buf_pool_mutex_enter();
-+try_again:
-+ //buf_pool_mutex_enter();
-+ mutex_enter(&flush_list_mutex);
-
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-
-@@ -130,9 +132,14 @@
- } else {
- ut_ad(bpage->in_flush_list);
- lsn = bpage->oldest_modification;
-+ if (lsn == 0) {
-+ mutex_exit(&flush_list_mutex);
-+ goto try_again;
-+ }
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ mutex_exit(&flush_list_mutex);
-
- /* The returned answer may be out of date: the flush_list can
- change after the mutex has been released. */
-@@ -252,7 +259,7 @@
- case BUF_BLOCK_ZIP_FREE:
- /* This is a free page in buf_pool->zip_free[].
- Such pages should only be accessed by the buddy allocator. */
-- ut_error;
-+ /* ut_error; */ /* optimistic */
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
-@@ -295,7 +302,7 @@
- {
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_FREE:
-- ut_error;
-+ /* ut_error; */ /* optimistic */
- return(NULL);
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
-@@ -305,6 +312,28 @@
- }
- }
-
-+/*************************************************************************
-+Gets the mutex of a block and enter the mutex with consistency. */
-+UNIV_INLINE
-+mutex_t*
-+buf_page_get_mutex_enter(
-+/*=========================*/
-+ const buf_page_t* bpage) /*!< in: pointer to control block */
-+{
-+ mutex_t* block_mutex;
-+
-+ while(1) {
-+ block_mutex = buf_page_get_mutex(bpage);
-+ if (!block_mutex)
-+ return block_mutex;
-+
-+ mutex_enter(block_mutex);
-+ if (block_mutex == buf_page_get_mutex(bpage))
-+ return block_mutex;
-+ mutex_exit(block_mutex);
-+ }
-+}
-+
- /*********************************************************************//**
- Get the flush type of a page.
- @return flush type */
-@@ -400,7 +429,7 @@
- buf_page_t* bpage, /*!< in/out: control block */
- enum buf_io_fix io_fix) /*!< in: io_fix state */
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- bpage->io_fix = io_fix;
-@@ -428,12 +457,13 @@
- /*==================*/
- const buf_page_t* bpage) /*!< control block being relocated */
- {
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_in_file(bpage));
-- ut_ad(bpage->in_LRU_list);
-+ /* optimistic */
-+ //ut_ad(bpage->in_LRU_list);
-
-- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
-+ return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
- && bpage->buf_fix_count == 0);
- }
-
-@@ -447,7 +477,7 @@
- const buf_page_t* bpage) /*!< in: control block */
- {
- ut_ad(buf_page_in_file(bpage));
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */
-
- return(bpage->old);
- }
-@@ -462,7 +492,8 @@
- ibool old) /*!< in: old */
- {
- ut_a(buf_page_in_file(bpage));
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(&LRU_list_mutex));
- ut_ad(bpage->in_LRU_list);
-
- #ifdef UNIV_LRU_DEBUG
-@@ -509,7 +540,8 @@
- ulint time_ms) /*!< in: ut_time_ms() */
- {
- ut_a(buf_page_in_file(bpage));
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- if (!bpage->access_time) {
- /* Make this the time of the first access. */
-@@ -741,17 +773,17 @@
- /*===========*/
- buf_block_t* block) /*!< in, own: block to be freed */
- {
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-
- mutex_enter(&block->mutex);
-
- ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-
-- buf_LRU_block_free_non_file_page(block);
-+ buf_LRU_block_free_non_file_page(block, FALSE);
-
- mutex_exit(&block->mutex);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
- }
- #endif /* !UNIV_HOTBACKUP */
-
-@@ -799,17 +831,17 @@
- page frame */
- {
- ib_uint64_t lsn;
-- mutex_t* block_mutex = buf_page_get_mutex(bpage);
-+ mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
-
-- mutex_enter(block_mutex);
--
-- if (buf_page_in_file(bpage)) {
-+ if (block_mutex && buf_page_in_file(bpage)) {
- lsn = bpage->newest_modification;
- } else {
- lsn = 0;
- }
-
-- mutex_exit(block_mutex);
-+ if (block_mutex) {
-+ mutex_exit(block_mutex);
-+ }
-
- return(lsn);
- }
-@@ -825,7 +857,7 @@
- buf_block_t* block) /*!< in: block */
- {
- #ifdef UNIV_SYNC_DEBUG
-- ut_ad((buf_pool_mutex_own()
-+ ut_ad((mutex_own(&LRU_list_mutex)
- && (block->page.buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
- #endif /* UNIV_SYNC_DEBUG */
-@@ -917,7 +949,11 @@
- ulint fold;
-
- ut_ad(buf_pool);
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-+#ifdef UNIV_SYNC_DEBUG
-+ ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
-+ || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
-+#endif
-
- /* Look for the page in the hash table */
-
-@@ -972,11 +1008,13 @@
- {
- const buf_page_t* bpage;
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-+ rw_lock_s_lock(&page_hash_latch);
-
- bpage = buf_page_hash_get(space, offset);
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ rw_lock_s_unlock(&page_hash_latch);
-
- return(bpage != NULL);
- }
-@@ -1038,11 +1076,14 @@
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_a(block->page.buf_fix_count > 0);
-
-+ /* buf_flush_note_modification() should be called before this function. */
-+/*
- if (rw_latch == RW_X_LATCH && mtr->modifications) {
- buf_pool_mutex_enter();
- buf_flush_note_modification(block, mtr);
- buf_pool_mutex_exit();
- }
-+*/
-
- mutex_enter(&block->mutex);
-
-diff -ruN a/storage/innodb_plugin/include/buf0flu.ic b/storage/innodb_plugin/include/buf0flu.ic
---- a/storage/innodb_plugin/include/buf0flu.ic 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0flu.ic 2010-08-27 16:11:40.625993554 +0900
-@@ -55,13 +55,23 @@
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr) /*!< in: mtr */
- {
-+ ibool use_LRU_mutex = FALSE;
-+
-+ if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
-+ use_LRU_mutex = TRUE;
-+
-+ if (use_LRU_mutex)
-+ mutex_enter(&LRU_list_mutex);
-+
-+ mutex_enter(&block->mutex);
-+
- ut_ad(block);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
- #ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- #endif /* UNIV_SYNC_DEBUG */
-- ut_ad(buf_pool_mutex_own());
-+ //ut_ad(buf_pool_mutex_own());
-
- ut_ad(mtr->start_lsn != 0);
- ut_ad(mtr->modifications);
-@@ -70,16 +80,23 @@
- block->page.newest_modification = mtr->end_lsn;
-
- if (!block->page.oldest_modification) {
-+ mutex_enter(&flush_list_mutex);
-
- block->page.oldest_modification = mtr->start_lsn;
- ut_ad(block->page.oldest_modification != 0);
-
- buf_flush_insert_into_flush_list(block);
-+ mutex_exit(&flush_list_mutex);
- } else {
- ut_ad(block->page.oldest_modification <= mtr->start_lsn);
- }
-
-+ mutex_exit(&block->mutex);
-+
- ++srv_buf_pool_write_requests;
-+
-+ if (use_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
- }
-
- /********************************************************************//**
-@@ -94,6 +111,16 @@
- ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
- set of mtr's */
- {
-+ ibool use_LRU_mutex = FALSE;
-+
-+ if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
-+ use_LRU_mutex = TRUE;
-+
-+ if (use_LRU_mutex)
-+ mutex_enter(&LRU_list_mutex);
-+
-+ mutex_enter(&(block->mutex));
-+
- ut_ad(block);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-@@ -101,23 +128,28 @@
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- #endif /* UNIV_SYNC_DEBUG */
-
-- buf_pool_mutex_enter();
-+ //buf_pool_mutex_enter();
-
- ut_ad(block->page.newest_modification <= end_lsn);
-
- block->page.newest_modification = end_lsn;
-
- if (!block->page.oldest_modification) {
-+ mutex_enter(&flush_list_mutex);
-
- block->page.oldest_modification = start_lsn;
-
- ut_ad(block->page.oldest_modification != 0);
-
- buf_flush_insert_sorted_into_flush_list(block);
-+ mutex_exit(&flush_list_mutex);
- } else {
- ut_ad(block->page.oldest_modification <= start_lsn);
- }
-
-- buf_pool_mutex_exit();
-+ //buf_pool_mutex_exit();
-+ if (use_LRU_mutex)
-+ mutex_exit(&LRU_list_mutex);
-+ mutex_exit(&(block->mutex));
- }
- #endif /* !UNIV_HOTBACKUP */
-diff -ruN a/storage/innodb_plugin/include/buf0lru.h b/storage/innodb_plugin/include/buf0lru.h
---- a/storage/innodb_plugin/include/buf0lru.h 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/buf0lru.h 2010-08-27 16:11:40.627990038 +0900
-@@ -112,10 +112,11 @@
- buf_page_t* bpage, /*!< in: block to be freed */
- ibool zip, /*!< in: TRUE if should remove also the
- compressed page of an uncompressed page */
-- ibool* buf_pool_mutex_released);
-+ ibool* buf_pool_mutex_released,
- /*!< in: pointer to a variable that will
- be assigned TRUE if buf_pool_mutex
- was temporarily released, or NULL */
-+ ibool have_LRU_mutex);
- /******************************************************************//**
- Try to free a replaceable block.
- @return TRUE if found and freed */
-@@ -157,7 +158,8 @@
- void
- buf_LRU_block_free_non_file_page(
- /*=============================*/
-- buf_block_t* block); /*!< in: block, must not contain a file page */
-+ buf_block_t* block, /*!< in: block, must not contain a file page */
-+ ibool have_page_hash_mutex);
- /******************************************************************//**
- Adds a block to the LRU list. */
- UNIV_INTERN
-diff -ruN a/storage/innodb_plugin/include/sync0sync.h b/storage/innodb_plugin/include/sync0sync.h
---- a/storage/innodb_plugin/include/sync0sync.h 2010-08-04 02:24:19.000000000 +0900
-+++ b/storage/innodb_plugin/include/sync0sync.h 2010-08-27 16:11:40.628990180 +0900
-@@ -487,8 +487,14 @@
- SYNC_SEARCH_SYS, as memory allocation
- can call routines there! Otherwise
- the level is SYNC_MEM_HASH. */
-+#define SYNC_BUF_LRU_LIST 157
-+#define SYNC_BUF_PAGE_HASH 156
-+#define SYNC_BUF_BLOCK 155
-+#define SYNC_BUF_FREE_LIST 153
-+#define SYNC_BUF_ZIP_FREE 152
-+#define SYNC_BUF_ZIP_HASH 151
- #define SYNC_BUF_POOL 150
--#define SYNC_BUF_BLOCK 149
-+#define SYNC_BUF_FLUSH_LIST 149
- #define SYNC_DOUBLEWRITE 140
- #define SYNC_ANY_LATCH 135
- #define SYNC_THR_LOCAL 133
-@@ -519,7 +525,7 @@
- os_fast_mutex; /*!< We use this OS mutex in place of lock_word
- when atomic operations are not enabled */
- #endif
-- ulint waiters; /*!< This ulint is set to 1 if there are (or
-+ volatile ulint waiters; /*!< This ulint is set to 1 if there are (or
- may be) threads waiting in the global wait
- array for this mutex to be released.
- Otherwise, this is 0. */
-diff -ruN a/storage/innodb_plugin/mtr/mtr0mtr.c b/storage/innodb_plugin/mtr/mtr0mtr.c
---- a/storage/innodb_plugin/mtr/mtr0mtr.c 2010-08-04 02:24:20.000000000 +0900
-+++ b/storage/innodb_plugin/mtr/mtr0mtr.c 2010-08-27 16:11:40.631020912 +0900
-@@ -105,6 +105,38 @@
- }
- }
-
-+UNIV_INLINE
-+void
-+mtr_memo_note_modification_all(
-+/*===========================*/
-+ mtr_t* mtr) /* in: mtr */
-+{
-+ mtr_memo_slot_t* slot;
-+ dyn_array_t* memo;
-+ ulint offset;
-+
-+ ut_ad(mtr);
-+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
-+ ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
-+ commit */
-+ ut_ad(mtr->modifications);
-+
-+ memo = &(mtr->memo);
-+
-+ offset = dyn_array_get_data_size(memo);
-+
-+ while (offset > 0) {
-+ offset -= sizeof(mtr_memo_slot_t);
-+ slot = dyn_array_get_element(memo, offset);
-+
-+ if (UNIV_LIKELY(slot->object != NULL) &&
-+ slot->type == MTR_MEMO_PAGE_X_FIX) {
-+ buf_flush_note_modification(
-+ (buf_block_t*)slot->object, mtr);
-+ }
-+ }
-+}
-+
- /************************************************************//**
- Writes the contents of a mini-transaction log, if any, to the database log. */
- static
-@@ -188,6 +220,8 @@
-
- if (write_log) {
- mtr_log_reserve_and_write(mtr);
-+
-+ mtr_memo_note_modification_all(mtr);
- }
-
- /* We first update the modification info to buffer pages, and only
-@@ -198,11 +232,13 @@
- required when we insert modified buffer pages in to the flush list
- which must be sorted on oldest_modification. */
-
-- mtr_memo_pop_all(mtr);
--
- if (write_log) {
- log_release();
- }
-+
-+ /* All unlocking has been moved here, after log_sys mutex release. */
-+ mtr_memo_pop_all(mtr);
-+
- #endif /* !UNIV_HOTBACKUP */
-
- ut_d(mtr->state = MTR_COMMITTED);
-@@ -273,6 +309,12 @@
- slot = dyn_array_get_element(memo, offset);
-
- if ((object == slot->object) && (type == slot->type)) {
-+ if (mtr->modifications &&
-+ UNIV_LIKELY(slot->object != NULL) &&
-+ slot->type == MTR_MEMO_PAGE_X_FIX) {
-+ buf_flush_note_modification(
-+ (buf_block_t*)slot->object, mtr);
-+ }
-
- mtr_memo_slot_release(mtr, slot);
-
-diff -ruN a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c
---- a/storage/innodb_plugin/srv/srv0srv.c 2010-08-27 16:11:12.194989878 +0900
-+++ b/storage/innodb_plugin/srv/srv0srv.c 2010-08-27 16:11:40.634022489 +0900
-@@ -2829,7 +2829,7 @@
-
- mutex_exit(&(log_sys->mutex));
-
-- buf_pool_mutex_enter();
-+ mutex_enter(&flush_list_mutex);
-
- level = 0;
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-@@ -2851,7 +2851,7 @@
- bpl = 0;
- }
-
-- buf_pool_mutex_exit();
-+ mutex_exit(&flush_list_mutex);
-
- if (!srv_use_doublewrite_buf) {
- /* flush is faster than when doublewrite */
-diff -ruN a/storage/innodb_plugin/sync/sync0sync.c b/storage/innodb_plugin/sync/sync0sync.c
---- a/storage/innodb_plugin/sync/sync0sync.c 2010-08-04 02:24:20.000000000 +0900
-+++ b/storage/innodb_plugin/sync/sync0sync.c 2010-08-27 16:11:40.636021261 +0900
-@@ -254,7 +254,7 @@
- mutex->lock_word = 0;
- #endif
- mutex->event = os_event_create(NULL);
-- mutex_set_waiters(mutex, 0);
-+ mutex->waiters = 0;
- #ifdef UNIV_DEBUG
- mutex->magic_n = MUTEX_MAGIC_N;
- #endif /* UNIV_DEBUG */
-@@ -432,6 +432,15 @@
- mutex_t* mutex, /*!< in: mutex */
- ulint n) /*!< in: value to set */
- {
-+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
-+ ut_ad(mutex);
-+
-+ if (n) {
-+ os_compare_and_swap_ulint(&mutex->waiters, 0, 1);
-+ } else {
-+ os_compare_and_swap_ulint(&mutex->waiters, 1, 0);
-+ }
-+#else
- volatile ulint* ptr; /* declared volatile to ensure that
- the value is stored to memory */
- ut_ad(mutex);
-@@ -440,6 +449,7 @@
-
- *ptr = n; /* Here we assume that the write of a single
- word in memory is atomic */
-+#endif
- }
-
- /******************************************************************//**
-@@ -1153,6 +1163,12 @@
- case SYNC_TRX_SYS_HEADER:
- case SYNC_FILE_FORMAT_TAG:
- case SYNC_DOUBLEWRITE:
-+ case SYNC_BUF_LRU_LIST:
-+ case SYNC_BUF_FLUSH_LIST:
-+ case SYNC_BUF_PAGE_HASH:
-+ case SYNC_BUF_FREE_LIST:
-+ case SYNC_BUF_ZIP_FREE:
-+ case SYNC_BUF_ZIP_HASH:
- case SYNC_BUF_POOL:
- case SYNC_SEARCH_SYS:
- case SYNC_SEARCH_SYS_CONF:
-@@ -1181,7 +1197,7 @@
- buffer block (block->mutex or buf_pool_zip_mutex). */
- if (!sync_thread_levels_g(array, level, FALSE)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
-- ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
-+ ut_a(sync_thread_levels_contain(array, SYNC_BUF_LRU_LIST));
- }
- break;
- case SYNC_REC_LOCK:
+++ /dev/null
---- mysql-5.1.21-beta/configure.in~ 2007-08-29 19:00:29.000000000 +0300
-+++ mysql-5.1.21-beta/configure.in 2007-08-29 19:00:35.000000000 +0300
-@@ -836,8 +836,9 @@
-
- AC_CHECK_LIB(m, floor, [], AC_CHECK_LIB(m, __infinity))
-
--AC_CHECK_LIB(nsl_r, gethostbyname_r, [],
-- AC_CHECK_LIB(nsl, gethostbyname_r))
-+AC_CHECK_FUNC(gethostbyname_r, [], [
-+ AC_CHECK_LIB(nsl_r, gethostbyname_r, [],
-+ AC_CHECK_LIB(nsl, gethostbyname_r)) ])
- AC_CHECK_FUNC(gethostbyname_r)
-
- AC_SEARCH_LIBS(setsockopt, socket)
-@@ -845,6 +846,11 @@
- AC_SEARCH_LIBS(bind, bind)
- # Check if crypt() exists in libc or libcrypt, sets LIBS if needed
- AC_SEARCH_LIBS(crypt, crypt, AC_DEFINE(HAVE_CRYPT, 1, [crypt]))
-+save_LIBS="$LIBS"
-+AC_CHECK_LIB(crypt, crypt, [LIBS="-lcrypt $LIBS"; LIBCRYPT="-lcrypt"])
-+AC_SUBST(LIBCRYPT)
-+AC_CHECK_FUNC(crypt, AC_DEFINE([HAVE_CRYPT], [1], [crypt]))
-+LIBS="$save_LIBS"
- # See if we need a library for address lookup.
- AC_SEARCH_LIBS(inet_aton, [socket nsl resolv])
-
---- mysql-5.1.21-beta/sql/Makefile.am~ 2007-08-29 18:58:33.000000000 +0300
-+++ mysql-5.1.21-beta/sql/Makefile.am 2007-08-29 19:01:33.000000000 +0300
-@@ -43,7 +43,7 @@
- @pstack_libs@ \
- @mysql_plugin_libs@ \
- $(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \
-- $(yassl_libs) $(openssl_libs) @MYSQLD_EXTRA_LIBS@
-+ $(yassl_libs) $(openssl_libs) @LIBCRYPT@ @MYSQLD_EXTRA_LIBS@
-
- noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \
- item_strfunc.h item_timefunc.h \
---- mysql-5.1.26-rc/config/ac-macros/ssl.m4.orig 2008-07-01 00:35:01.000000000 +0200
-+++ mysql-5.1.26-rc/config/ac-macros/ssl.m4 2008-08-25 19:53:00.258254465 +0200
-@@ -102,7 +102,12 @@
- #
- # Try to link with openSSL libs in <location>
- #
-- openssl_libs="-L$location/lib/ -lssl -lcrypto"
-+ if test "$location" != "/usr"
-+ then
-+ openssl_libs="-L$location/lib -lssl -lcrypto"
-+ else
-+ openssl_libs="-lssl -lcrypto"
-+ fi
- MYSQL_CHECK_SSL_DIR([$openssl_includes], [$openssl_libs])
-
- if test "$mysql_ssl_found" == "no"
+++ /dev/null
---- mysql-4.1.9/sql/mysqld.cc.orig 2005-01-11 23:06:00.000000000 +0100
-+++ mysql-4.1.9/sql/mysqld.cc 2005-02-06 17:21:26.238717200 +0100
-@@ -133,7 +133,16 @@
- #endif /* __WIN__ */
-
- #ifdef HAVE_LIBWRAP
-+#define hosts_access hosts_access_old
-+#define sock_host sock_host_old
-+#define eval_client eval_client_old
- #include <tcpd.h>
-+#undef hosts_access
-+#undef sock_host
-+#undef eval_client
-+extern int hosts_access(struct request_info *);
-+extern int sock_host(struct request_info *);
-+extern int eval_client(struct request_info *);
- #include <syslog.h>
- #ifdef NEED_SYS_SYSLOG_H
- #include <sys/syslog.h>
+++ /dev/null
---- mysql-4.0.13/configure.in.orig Fri Jul 11 11:59:51 2003
-+++ mysql-4.0.13/configure.in Fri Jul 11 14:06:50 2003
-@@ -431,6 +431,7 @@
- # Lock for PS
- AC_PATH_PROG(PS, ps, ps)
- AC_MSG_CHECKING("how to check if pid exists")
-+if test -z "$FIND_PROC"; then
- PS=$ac_cv_path_PS
- # Linux style
- if $PS p $$ 2> /dev/null | grep $0 > /dev/null
-@@ -467,12 +468,14 @@
- AC_MSG_ERROR([Could not find the right ps switches. Which OS is this ?. See the Installation chapter in the Reference Manual.])
- esac
- fi
-+fi
- AC_SUBST(FIND_PROC)
- AC_MSG_RESULT("$FIND_PROC")
-
- # Check if a pid is valid
- AC_PATH_PROG(KILL, kill, kill)
- AC_MSG_CHECKING("for kill switches")
-+if test -z "$CHECK_PID"; then
- if $ac_cv_path_KILL -0 $$
- then
- CHECK_PID="$ac_cv_path_KILL -0 \$\$PID > /dev/null 2> /dev/null"
-@@ -483,6 +486,7 @@
- AC_MSG_WARN([kill -0 to check for pid seems to fail])
- CHECK_PID="$ac_cv_path_KILL -s SIGCONT \$\$PID > /dev/null 2> /dev/null"
- fi
-+fi
- AC_SUBST(CHECK_PID)
- AC_MSG_RESULT("$CHECK_PID")
-
--- /dev/null
+# name : mysql-test.diff
+# introduced : ???
+# maintainer : ???
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/mysql-test/include/default_mysqld.cnf b/mysql-test/include/default_mysqld.cnf
+--- a/mysql-test/include/default_mysqld.cnf 2010-11-03 07:01:11.000000000 +0900
++++ b/mysql-test/include/default_mysqld.cnf 2010-12-10 16:48:10.996387047 +0900
+@@ -29,7 +29,7 @@
+ max_heap_table_size= 1M
+
+ loose-innodb_data_file_path= ibdata1:10M:autoextend
+-loose-innodb_buffer_pool_size= 8M
++loose-innodb_buffer_pool_size= 32M
+ loose-innodb_write_io_threads= 2
+ loose-innodb_read_io_threads= 2
+ loose-innodb_log_buffer_size= 1M
+diff -ruN a/mysql-test/r/connect.result b/mysql-test/r/connect.result
+--- a/mysql-test/r/connect.result 2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/connect.result 2010-12-10 16:48:10.997386982 +0900
+@@ -1,3 +1,4 @@
++set global log_warnings=0;
+ drop table if exists t1,t2;
+ show tables;
+ Tables_in_mysql
+@@ -221,3 +222,4 @@
+ # ------------------------------------------------------------------
+ # -- End of 5.1 tests
+ # ------------------------------------------------------------------
++set global log_warnings=1;
+diff -ruN a/mysql-test/r/create.result b/mysql-test/r/create.result
+--- a/mysql-test/r/create.result 2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/create.result 2010-12-10 16:48:11.001009442 +0900
+@@ -1741,7 +1741,8 @@
+ `COMMAND` varchar(16) NOT NULL DEFAULT '',
+ `TIME` int(7) NOT NULL DEFAULT '0',
+ `STATE` varchar(64) DEFAULT NULL,
+- `INFO` longtext
++ `INFO` longtext,
++ `TIME_MS` bigint(21) NOT NULL DEFAULT '0'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create temporary table t1 like information_schema.processlist;
+@@ -1755,7 +1756,8 @@
+ `COMMAND` varchar(16) NOT NULL DEFAULT '',
+ `TIME` int(7) NOT NULL DEFAULT '0',
+ `STATE` varchar(64) DEFAULT NULL,
+- `INFO` longtext
++ `INFO` longtext,
++ `TIME_MS` bigint(21) NOT NULL DEFAULT '0'
+ ) ENGINE=MyISAM DEFAULT CHARSET=utf8
+ drop table t1;
+ create table t1 like information_schema.character_sets;
+diff -ruN a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result
+--- a/mysql-test/r/information_schema.result 2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/information_schema.result 2011-01-10 23:02:09.000000000 +0300
+@@ -46,14 +46,17 @@
+ select * from v1;
+ c
+ CHARACTER_SETS
++CLIENT_STATISTICS
+ COLLATIONS
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INDEX_STATISTICS
+ ENGINES
+ EVENTS
+ FILES
+ GLOBAL_STATUS
++GLOBAL_TEMPORARY_TABLES
+ GLOBAL_VARIABLES
+ KEY_COLUMN_USAGE
+ PARAMETERS
+@@ -63,6 +66,7 @@
+ PROFILING
+ REFERENTIAL_CONSTRAINTS
+ ROUTINES
++QUERY_RESPONSE_TIME
+ SCHEMATA
+ SCHEMA_PRIVILEGES
+ SESSION_STATUS
+@@ -72,9 +76,14 @@
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ USER_PRIVILEGES
++USER_STATISTICS
+ VIEWS
++XTRADB_ADMIN_COMMAND
+ columns_priv
+ db
+ event
+@@ -112,6 +121,9 @@
+ TABLESPACES TABLESPACES
+ TABLE_CONSTRAINTS TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES TABLE_PRIVILEGES
++TABLE_STATISTICS TABLE_STATISTICS
++TEMPORARY_TABLES TEMPORARY_TABLES
++THREAD_STATISTICS THREAD_STATISTICS
+ TRIGGERS TRIGGERS
+ tables_priv tables_priv
+ time_zone time_zone
+@@ -132,6 +144,9 @@
+ TABLESPACES TABLESPACES
+ TABLE_CONSTRAINTS TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES TABLE_PRIVILEGES
++TABLE_STATISTICS TABLE_STATISTICS
++TEMPORARY_TABLES TEMPORARY_TABLES
++THREAD_STATISTICS THREAD_STATISTICS
+ TRIGGERS TRIGGERS
+ tables_priv tables_priv
+ time_zone time_zone
+@@ -152,6 +167,9 @@
+ TABLESPACES TABLESPACES
+ TABLE_CONSTRAINTS TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES TABLE_PRIVILEGES
++TABLE_STATISTICS TABLE_STATISTICS
++TEMPORARY_TABLES TEMPORARY_TABLES
++THREAD_STATISTICS THREAD_STATISTICS
+ TRIGGERS TRIGGERS
+ tables_priv tables_priv
+ time_zone time_zone
+@@ -634,13 +652,16 @@
+ where table_schema='information_schema' limit 2;
+ TABLE_NAME TABLE_TYPE ENGINE
+ CHARACTER_SETS SYSTEM VIEW MEMORY
+-COLLATIONS SYSTEM VIEW MEMORY
++CLIENT_STATISTICS SYSTEM VIEW MEMORY
+ show tables from information_schema like "T%";
+ Tables_in_information_schema (T%)
+ TABLES
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ create database information_schema;
+ ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema'
+@@ -651,6 +672,9 @@
+ TABLESPACES SYSTEM VIEW
+ TABLE_CONSTRAINTS SYSTEM VIEW
+ TABLE_PRIVILEGES SYSTEM VIEW
++TABLE_STATISTICS SYSTEM VIEW
++TEMPORARY_TABLES SYSTEM VIEW
++THREAD_STATISTICS SYSTEM VIEW
+ TRIGGERS SYSTEM VIEW
+ create table t1(a int);
+ ERROR 42000: Access denied for user 'root'@'localhost' to database 'information_schema'
+@@ -664,6 +688,9 @@
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ select table_name from tables where table_name='user';
+ table_name
+@@ -806,6 +833,8 @@
+ FILES CREATE_TIME datetime
+ FILES UPDATE_TIME datetime
+ FILES CHECK_TIME datetime
++GLOBAL_TEMPORARY_TABLES CREATE_TIME datetime
++GLOBAL_TEMPORARY_TABLES UPDATE_TIME datetime
+ PARTITIONS CREATE_TIME datetime
+ PARTITIONS UPDATE_TIME datetime
+ PARTITIONS CHECK_TIME datetime
+@@ -814,6 +843,8 @@
+ TABLES CREATE_TIME datetime
+ TABLES UPDATE_TIME datetime
+ TABLES CHECK_TIME datetime
++TEMPORARY_TABLES CREATE_TIME datetime
++TEMPORARY_TABLES UPDATE_TIME datetime
+ TRIGGERS CREATED datetime
+ event execute_at datetime
+ event last_executed datetime
+@@ -854,7 +885,9 @@
+ TABLE_NAME COLUMN_NAME PRIVILEGES
+ COLUMNS TABLE_NAME select
+ COLUMN_PRIVILEGES TABLE_NAME select
++INDEX_STATISTICS TABLE_NAME select
+ FILES TABLE_NAME select
++GLOBAL_TEMPORARY_TABLES TABLE_NAME select
+ KEY_COLUMN_USAGE TABLE_NAME select
+ PARTITIONS TABLE_NAME select
+ REFERENTIAL_CONSTRAINTS TABLE_NAME select
+@@ -862,7 +895,11 @@
+ TABLES TABLE_NAME select
+ TABLE_CONSTRAINTS TABLE_NAME select
+ TABLE_PRIVILEGES TABLE_NAME select
++TABLE_STATISTICS TABLE_NAME select
++TEMPORARY_TABLES TABLE_NAME select
+ VIEWS TABLE_NAME select
++INNODB_TABLE_STATS table_name select
++INNODB_INDEX_STATS table_name select
+ delete from mysql.user where user='mysqltest_4';
+ delete from mysql.db where user='mysqltest_4';
+ flush privileges;
+@@ -871,7 +908,7 @@
+ AND table_name not like 'ndb%' AND table_name not like 'innodb_%'
+ GROUP BY TABLE_SCHEMA;
+ table_schema count(*)
+-information_schema 30
++information_schema 39
+ mysql 23
+ create table t1 (i int, j int);
+ create trigger trg1 before insert on t1 for each row
+@@ -1245,12 +1282,12 @@
+ DROP USER mysql_bug20230@localhost;
+ SELECT MAX(table_name) FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test');
+ MAX(table_name)
+-VIEWS
++XTRADB_ADMIN_COMMAND
+ SELECT table_name from information_schema.tables
+ WHERE table_name=(SELECT MAX(table_name)
+ FROM information_schema.tables WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test'));
+ table_name
+-VIEWS
++XTRADB_ADMIN_COMMAND
+ DROP TABLE IF EXISTS bug23037;
+ DROP FUNCTION IF EXISTS get_value;
+ SELECT COLUMN_NAME, MD5(COLUMN_DEFAULT), LENGTH(COLUMN_DEFAULT) FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME='bug23037';
+@@ -1311,6 +1348,7 @@
+ group by t.table_name order by num1, t.table_name;
+ table_name group_concat(t.table_schema, '.', t.table_name) num1
+ CHARACTER_SETS information_schema.CHARACTER_SETS 1
++CLIENT_STATISTICS information_schema.CLIENT_STATISTICS 1
+ COLLATIONS information_schema.COLLATIONS 1
+ COLLATION_CHARACTER_SET_APPLICABILITY information_schema.COLLATION_CHARACTER_SET_APPLICABILITY 1
+ COLUMNS information_schema.COLUMNS 1
+@@ -1319,13 +1357,16 @@
+ EVENTS information_schema.EVENTS 1
+ FILES information_schema.FILES 1
+ GLOBAL_STATUS information_schema.GLOBAL_STATUS 1
++GLOBAL_TEMPORARY_TABLES information_schema.GLOBAL_TEMPORARY_TABLES 1
+ GLOBAL_VARIABLES information_schema.GLOBAL_VARIABLES 1
++INDEX_STATISTICS information_schema.INDEX_STATISTICS 1
+ KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1
+ PARAMETERS information_schema.PARAMETERS 1
+ PARTITIONS information_schema.PARTITIONS 1
+ PLUGINS information_schema.PLUGINS 1
+ PROCESSLIST information_schema.PROCESSLIST 1
+ PROFILING information_schema.PROFILING 1
++QUERY_RESPONSE_TIME information_schema.QUERY_RESPONSE_TIME 1
+ REFERENTIAL_CONSTRAINTS information_schema.REFERENTIAL_CONSTRAINTS 1
+ ROUTINES information_schema.ROUTINES 1
+ SCHEMATA information_schema.SCHEMATA 1
+@@ -1337,8 +1378,12 @@
+ TABLESPACES information_schema.TABLESPACES 1
+ TABLE_CONSTRAINTS information_schema.TABLE_CONSTRAINTS 1
+ TABLE_PRIVILEGES information_schema.TABLE_PRIVILEGES 1
++TABLE_STATISTICS information_schema.TABLE_STATISTICS 1
++TEMPORARY_TABLES information_schema.TEMPORARY_TABLES 1
++THREAD_STATISTICS information_schema.THREAD_STATISTICS 1
+ TRIGGERS information_schema.TRIGGERS 1
+ USER_PRIVILEGES information_schema.USER_PRIVILEGES 1
++USER_STATISTICS information_schema.USER_STATISTICS 1
+ VIEWS information_schema.VIEWS 1
+ create table t1(f1 int);
+ create view v1 as select f1+1 as a from t1;
+diff -ruN a/mysql-test/r/information_schema_db.result b/mysql-test/r/information_schema_db.result
+--- a/mysql-test/r/information_schema_db.result 2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/information_schema_db.result 2011-01-10 23:06:43.000000000 +0300
+@@ -6,14 +6,17 @@
+ show tables where Tables_in_information_schema NOT LIKE 'Innodb%';
+ Tables_in_information_schema
+ CHARACTER_SETS
++CLIENT_STATISTICS
+ COLLATIONS
+ COLLATION_CHARACTER_SET_APPLICABILITY
+ COLUMNS
+ COLUMN_PRIVILEGES
++INDEX_STATISTICS
+ ENGINES
+ EVENTS
+ FILES
+ GLOBAL_STATUS
++GLOBAL_TEMPORARY_TABLES
+ GLOBAL_VARIABLES
+ KEY_COLUMN_USAGE
+ PARAMETERS
+@@ -23,6 +26,7 @@
+ PROFILING
+ REFERENTIAL_CONSTRAINTS
+ ROUTINES
++QUERY_RESPONSE_TIME
+ SCHEMATA
+ SCHEMA_PRIVILEGES
+ SESSION_STATUS
+@@ -32,15 +36,23 @@
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ USER_PRIVILEGES
++USER_STATISTICS
+ VIEWS
++XTRADB_ADMIN_COMMAND
+ show tables from INFORMATION_SCHEMA like 'T%';
+ Tables_in_information_schema (T%)
+ TABLES
+ TABLESPACES
+ TABLE_CONSTRAINTS
+ TABLE_PRIVILEGES
++TABLE_STATISTICS
++TEMPORARY_TABLES
++THREAD_STATISTICS
+ TRIGGERS
+ create database `inf%`;
+ create database mbase;
+diff -ruN a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result
+--- a/mysql-test/r/mysqld--help-notwin.result 2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/mysqld--help-notwin.result 2011-01-10 23:34:28.000000000 +0300
+@@ -125,6 +125,9 @@
+ --div-precision-increment=#
+ Precision of the result of '/' operator will be increased
+ on that value
++ --enable-query-response-time-stats
++ Enable or disable query response time statisics
++ collecting
+ --engine-condition-pushdown
+ Push supported query conditions to the storage engine.
+ Deprecated, use --optimizer-switch instead.
+@@ -244,21 +247,43 @@
+ --log-slow-admin-statements
+ Log slow OPTIMIZE, ANALYZE, ALTER and other
+ administrative statements to the slow log if it is open.
++ --log-slow-filter=name
++ Log only the queries that followed certain execution
++ plan. Multiple flags allowed in a comma-separated string.
++ [qc_miss, full_scan, full_join, tmp_table,
++ tmp_table_on_disk, filesort, filesort_on_disk]
+ --log-slow-queries[=name]
+ Log slow queries to a table or log file. Defaults logging
+ to table mysql.slow_log or hostname-slow.log if
+ --log-output=file is used. Must be enabled to activate
+ other slow log options. Deprecated option, use
+ --slow-query-log/--slow-query-log-file instead.
++ --log-slow-rate-limit=#
++ Rate limit statement writes to slow log to only those
++ from every (1/log_slow_rate_limit) session.
+ --log-slow-slave-statements
+- Log slow statements executed by slave thread to the slow
+- log if it is open.
++ Log queries replayed be the slave SQL thread
++ --log-slow-sp-statements
++ Log slow statements executed by stored procedure to the
++ slow log if it is open.
++ (Defaults to on; use --skip-log-slow-sp-statements to disable.)
++ --log-slow-timestamp-every
++ Timestamp is printed for all records of the slow log even
++ if they are same time.
++ --log-slow-verbosity=name
++ Choose how verbose the messages to your slow log will be.
++ Multiple flags allowed in a comma-separated string.
++ [microtime, query_plan, innodb, profiling,
++ profiling_get_rusage]
+ --log-tc=name Path to transaction coordinator log (used for
+ transactions that affect more than one storage engine,
+ when binary log is disabled).
+ --log-tc-size=# Size of transaction coordinator log.
+ -W, --log-warnings[=#]
+ Log some not critical warnings to the log file
++ --log-warnings-silence=name
++ disable logging of enumerated warnings: 1592: unsafe
++ statements for binary logging; possible values : [1592]
+ --long-query-time=# Log all queries that have taken more than long_query_time
+ seconds to execute to file. The argument will be treated
+ as a decimal value with microsecond precision
+@@ -464,6 +489,10 @@
+ The minimum size for blocks allocated by the query cache
+ --query-cache-size=#
+ The memory allocated to store results from old queries
++ --query-cache-strip-comments
++ Enable and disable optimisation "strip comment for query
++ cache" - optimisation strip all comments from query while
++ search query result in query cache
+ --query-cache-type=name
+ OFF = Don't cache or retrieve results. ON = Cache all
+ results except SELECT SQL_NO_CACHE ... queries. DEMAND =
+@@ -472,6 +501,9 @@
+ Invalidate queries in query cache on LOCK for write
+ --query-prealloc-size=#
+ Persistent buffer for query parsing and execution
++ --query-response-time-range-base=#
++ Select base of log for query_response_time ranges.
++ WARNING: variable change affect only after flush
+ --range-alloc-block-size=#
+ Allocation block size for storing ranges during
+ optimization
+@@ -649,6 +681,9 @@
+ Log slow queries to given log file. Defaults logging to
+ hostname-slow.log. Must be enabled to activate other slow
+ log options
++ --slow-query-log-microseconds-timestamp
++ Log slow statements executed by stored procedure to the
++ slow log if it is open.
+ --socket=name Socket file to use for connection
+ --sort-buffer-size=#
+ Each thread that needs to do a sort allocates a buffer of
+@@ -691,6 +726,8 @@
+ Define threads usage for handling queries, one of
+ one-thread-per-connection, no-threads, loaded-dynamically
+ --thread-stack=# The stack size for each thread
++ --thread-statistics Control TABLE_STATISTICS running, when userstat_running
++ is enabled
+ --time-format=name The TIME format (ignored)
+ --timed-mutexes Specify whether to time mutexes (only InnoDB mutexes are
+ currently supported)
+@@ -715,11 +752,24 @@
+ Prohibit update of a VIEW, which does not contain a key
+ of the underlying table and the query uses a LIMIT clause
+ (usually get from GUI tools)
++ --use-global-log-slow-control=name
++ Choose flags, wich always use the global variables.
++ Multiple flags allowed in a comma-separated string.
++ [none, log_slow_filter, log_slow_rate_limit,
++ log_slow_verbosity, long_query_time,
++ min_examined_row_limit, all]
+ -u, --user=name Run mysqld daemon as user.
++ --userstat-running Control USER_STATISTICS, CLIENT_STATISTICS,
++ THREAD_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS
++ running
+ -v, --verbose Used with --help option for detailed help.
+ -V, --version Output version information and exit.
+ --wait-timeout=# The number of seconds the server waits for activity on a
+ connection before closing it
++ --xtradb-admin-command[=name]
++ Enable or disable XTRADB_ADMIN_COMMAND plugin. Possible
++ values are ON, OFF, FORCE (don't start if the plugin
++ fails to load).
+
+ Variables (--variable-name=value)
+ abort-slave-event-count 0
+@@ -756,6 +806,7 @@
+ delayed-queue-size 1000
+ disconnect-slave-event-count 0
+ div-precision-increment 4
++enable-query-response-time-stats FALSE
+ engine-condition-pushdown TRUE
+ event-scheduler OFF
+ expire-logs-days 0
+@@ -800,10 +851,16 @@
+ log-short-format FALSE
+ log-slave-updates FALSE
+ log-slow-admin-statements FALSE
++log-slow-filter
++log-slow-rate-limit 1
+ log-slow-slave-statements FALSE
++log-slow-sp-statements TRUE
++log-slow-timestamp-every FALSE
++log-slow-verbosity
+ log-tc tc.log
+ log-tc-size 24576
+ log-warnings 1
++log-warnings-silence
+ long-query-time 10
+ low-priority-updates FALSE
+ lower-case-table-names 1
+@@ -877,9 +934,11 @@
+ query-cache-limit 1048576
+ query-cache-min-res-unit 4096
+ query-cache-size 0
++query-cache-strip-comments FALSE
+ query-cache-type ON
+ query-cache-wlock-invalidate FALSE
+ query-prealloc-size 8192
++query-response-time-range-base 10
+ range-alloc-block-size 4096
+ read-buffer-size 131072
+ read-only FALSE
+@@ -914,6 +973,7 @@
+ slave-type-conversions
+ slow-launch-time 2
+ slow-query-log FALSE
++slow-query-log-microseconds-timestamp FALSE
+ sort-buffer-size 2097152
+ sporadic-binlog-dump-fail FALSE
+ sql-mode
+@@ -931,6 +991,7 @@
+ thread-cache-size 0
+ thread-handling one-thread-per-connection
+ thread-stack 262144
++thread-statistics FALSE
+ time-format %H:%i:%s
+ timed-mutexes FALSE
+ tmp-table-size 16777216
+@@ -938,8 +999,11 @@
+ transaction-isolation REPEATABLE-READ
+ transaction-prealloc-size 4096
+ updatable-views-with-limit YES
++use-global-log-slow-control
++userstat-running FALSE
+ verbose TRUE
+ wait-timeout 28800
++xtradb-admin-command ON
+
+ To see what values a running MySQL server is using, type
+ 'mysqladmin variables' instead of 'mysqld --verbose --help'.
+diff -ruN a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result
+--- a/mysql-test/r/mysqldump.result 2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/mysqldump.result 2010-12-10 16:48:11.013968901 +0900
+@@ -1832,7 +1832,7 @@
+ # Bug#21288 mysqldump segmentation fault when using --where
+ #
+ create table t1 (a int);
+-mysqldump: Couldn't execute 'SELECT /*!40001 SQL_NO_CACHE */ * FROM `t1` WHERE xx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx': You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' at line 1 (1064)
++mysqldump: Couldn't execute 'SELECT /*!40001 SQL_NO_CACHE */ /*!50084 SQL_NO_FCACHE */ * FROM `t1` WHERE xx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx': You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' at line 1 (1064)
+ mysqldump: Got error: 1064: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' at line 1 when retrieving data from server
+
+ /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+diff -ruN a/mysql-test/r/mysqlshow.result b/mysql-test/r/mysqlshow.result
+--- a/mysql-test/r/mysqlshow.result 2010-12-03 20:58:24.000000000 +0300
++++ b/mysql-test/r/mysqlshow.result 2011-01-11 16:41:03.000000000 +0300
+@@ -80,14 +80,17 @@
+ | Tables |
+ +---------------------------------------+
+ | CHARACTER_SETS |
++| CLIENT_STATISTICS |
+ | COLLATIONS |
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS |
+ | COLUMN_PRIVILEGES |
++| INDEX_STATISTICS |
+ | ENGINES |
+ | EVENTS |
+ | FILES |
+ | GLOBAL_STATUS |
++| GLOBAL_TEMPORARY_TABLES |
+ | GLOBAL_VARIABLES |
+ | KEY_COLUMN_USAGE |
+ | PARAMETERS |
+@@ -97,6 +100,7 @@
+ | PROFILING |
+ | REFERENTIAL_CONSTRAINTS |
+ | ROUTINES |
++| QUERY_RESPONSE_TIME |
+ | SCHEMATA |
+ | SCHEMA_PRIVILEGES |
+ | SESSION_STATUS |
+@@ -106,30 +110,52 @@
+ | TABLESPACES |
+ | TABLE_CONSTRAINTS |
+ | TABLE_PRIVILEGES |
++| TABLE_STATISTICS |
++| TEMPORARY_TABLES |
++| THREAD_STATISTICS |
+ | TRIGGERS |
+ | USER_PRIVILEGES |
++| USER_STATISTICS |
+ | VIEWS |
+-| INNODB_CMP_RESET |
++| INNODB_SYS_COLUMNS |
++| INNODB_RSEG |
++| INNODB_CMP |
+ | INNODB_TRX |
+-| INNODB_CMPMEM_RESET |
++| INNODB_SYS_TABLESTATS |
+ | INNODB_LOCK_WAITS |
+-| INNODB_CMPMEM |
+-| INNODB_CMP |
++| XTRADB_ADMIN_COMMAND |
+ | INNODB_LOCKS |
++| INNODB_SYS_FOREIGN_COLS |
++| INNODB_CMP_RESET |
++| INNODB_BUFFER_POOL_PAGES |
++| INNODB_SYS_TABLES |
++| INNODB_BUFFER_POOL_PAGES_INDEX |
++| INNODB_CMPMEM |
++| INNODB_BUFFER_POOL_PAGES_BLOB |
++| INNODB_CMPMEM_RESET |
++| INNODB_SYS_FIELDS |
++| INNODB_TABLE_STATS |
++| INNODB_SYS_STATS |
++| INNODB_SYS_FOREIGN |
++| INNODB_SYS_INDEXES |
++| INNODB_INDEX_STATS |
+ +---------------------------------------+
+ Database: INFORMATION_SCHEMA
+ +---------------------------------------+
+ | Tables |
+ +---------------------------------------+
+ | CHARACTER_SETS |
++| CLIENT_STATISTICS |
+ | COLLATIONS |
+ | COLLATION_CHARACTER_SET_APPLICABILITY |
+ | COLUMNS |
+ | COLUMN_PRIVILEGES |
++| INDEX_STATISTICS |
+ | ENGINES |
+ | EVENTS |
+ | FILES |
+ | GLOBAL_STATUS |
++| GLOBAL_TEMPORARY_TABLES |
+ | GLOBAL_VARIABLES |
+ | KEY_COLUMN_USAGE |
+ | PARAMETERS |
+@@ -139,6 +165,7 @@
+ | PROFILING |
+ | REFERENTIAL_CONSTRAINTS |
+ | ROUTINES |
++| QUERY_RESPONSE_TIME |
+ | SCHEMATA |
+ | SCHEMA_PRIVILEGES |
+ | SESSION_STATUS |
+@@ -148,16 +175,35 @@
+ | TABLESPACES |
+ | TABLE_CONSTRAINTS |
+ | TABLE_PRIVILEGES |
++| TABLE_STATISTICS |
++| TEMPORARY_TABLES |
++| THREAD_STATISTICS |
+ | TRIGGERS |
+ | USER_PRIVILEGES |
++| USER_STATISTICS |
+ | VIEWS |
+-| INNODB_CMP_RESET |
++| INNODB_SYS_COLUMNS |
++| INNODB_RSEG |
++| INNODB_CMP |
+ | INNODB_TRX |
+-| INNODB_CMPMEM_RESET |
++| INNODB_SYS_TABLESTATS |
+ | INNODB_LOCK_WAITS |
+-| INNODB_CMPMEM |
+-| INNODB_CMP |
++| XTRADB_ADMIN_COMMAND |
+ | INNODB_LOCKS |
++| INNODB_SYS_FOREIGN_COLS |
++| INNODB_CMP_RESET |
++| INNODB_BUFFER_POOL_PAGES |
++| INNODB_SYS_TABLES |
++| INNODB_BUFFER_POOL_PAGES_INDEX |
++| INNODB_CMPMEM |
++| INNODB_BUFFER_POOL_PAGES_BLOB |
++| INNODB_CMPMEM_RESET |
++| INNODB_SYS_FIELDS |
++| INNODB_TABLE_STATS |
++| INNODB_SYS_STATS |
++| INNODB_SYS_FOREIGN |
++| INNODB_SYS_INDEXES |
++| INNODB_INDEX_STATS |
+ +---------------------------------------+
+ Wildcard: inf_rmation_schema
+ +--------------------+
+diff -ruN a/mysql-test/r/select.result b/mysql-test/r/select.result
+--- a/mysql-test/r/select.result 2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/r/select.result 2010-12-10 16:48:11.023052909 +0900
+@@ -2196,10 +2196,10 @@
+ select * from (t1 as t2 left join t1 as t3 using (a)) inner join t1 on t1.a>1;
+ a a
+ 1 2
+-2 2
+-3 2
+ 1 3
++2 2
+ 2 3
++3 2
+ 3 3
+ select * from t1 inner join (t1 as t2 left join t1 as t3 using (a)) on t1.a>1;
+ a a
+diff -ruN a/mysql-test/suite/innodb/r/innodb.result b/mysql-test/suite/innodb/r/innodb.result
+--- a/mysql-test/suite/innodb/r/innodb.result 2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/suite/innodb/r/innodb.result 2010-12-10 16:48:11.026994635 +0900
+@@ -1661,7 +1661,7 @@
+ drop table t1;
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_buffer_pool_pages_total';
+ variable_value
+-511
++2047
+ SELECT variable_value FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_page_size';
+ variable_value
+ 16384
+diff -ruN a/mysql-test/suite/sys_vars/r/all_vars.result b/mysql-test/suite/sys_vars/r/all_vars.result
+--- a/mysql-test/suite/sys_vars/r/all_vars.result 2010-12-03 20:58:25.000000000 +0300
++++ b/mysql-test/suite/sys_vars/r/all_vars.result 2011-01-10 23:09:13.000000000 +0300
+@@ -11,7 +11,99 @@
+ select variable_name as `There should be *no* variables listed below:` from t2
+ left join t1 on variable_name=test_name where test_name is null;
+ There should be *no* variables listed below:
++INNODB_BUFFER_POOL_SHM_KEY
++LOG_SLOW_SP_STATEMENTS
++INNODB_DOUBLEWRITE_FILE
++INNODB_THREAD_CONCURRENCY_TIMER_BASED
++LOG_SLOW_TIMESTAMP_EVERY
++INNODB_READ_AHEAD
++INNODB_PASS_CORRUPT_TABLE
++INNODB_RECOVERY_STATS
++INNODB_FLUSH_NEIGHBOR_PAGES
++INNODB_BUFFER_POOL_SHM_CHECKSUM
++INNODB_FLUSH_LOG_AT_TRX_COMMIT_SESSION
++LOG_SLOW_VERBOSITY
++ENABLE_QUERY_RESPONSE_TIME_STATS
++OPTIMIZER_FIX
++INNODB_ADAPTIVE_CHECKPOINT
++INNODB_SHOW_LOCKS_HELD
++INNODB_IBUF_ACCEL_RATE
++INNODB_EXPAND_IMPORT
++QUERY_CACHE_STRIP_COMMENTS
++INNODB_CHECKPOINT_AGE_TARGET
++INNODB_STATS_METHOD
++LOG_SLOW_RATE_LIMIT
++INNODB_PAGE_SIZE
++USERSTAT_RUNNING
++FAST_INDEX_CREATION
++USE_GLOBAL_LOG_SLOW_CONTROL
++INNODB_ENABLE_UNSAFE_GROUP_COMMIT
++INNODB_SHOW_VERBOSE_LOCKS
++INNODB_ADAPTIVE_HASH_INDEX_PARTITIONS
++SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP
++INNODB_LOG_BLOCK_SIZE
++INNODB_USE_SYS_STATS_TABLE
++INNODB_STATS_UPDATE_NEED_LOCK
++QUERY_RESPONSE_TIME_RANGE_BASE
++INNODB_STATS_AUTO_UPDATE
++INNODB_IBUF_ACTIVE_CONTRACT
+ INNODB_FILE_FORMAT_MAX
++INNODB_AUTO_LRU_DUMP
++LOG_WARNINGS_SILENCE
++LOG_SLOW_FILTER
++INNODB_DICT_SIZE_LIMIT
++INNODB_IBUF_MAX_SIZE
++INNODB_FAST_CHECKSUM
++LOG_SLOW_SLAVE_STATEMENTS
++INNODB_OVERWRITE_RELAY_LOG_INFO
++INNODB_EXTRA_RSEGMENTS
++THREAD_STATISTICS
++INNODB_BUFFER_POOL_SHM_KEY
++LOG_SLOW_SP_STATEMENTS
++INNODB_DOUBLEWRITE_FILE
++INNODB_THREAD_CONCURRENCY_TIMER_BASED
++LOG_SLOW_TIMESTAMP_EVERY
++INNODB_READ_AHEAD
++INNODB_PASS_CORRUPT_TABLE
++INNODB_RECOVERY_STATS
++INNODB_FLUSH_NEIGHBOR_PAGES
++INNODB_BUFFER_POOL_SHM_CHECKSUM
++INNODB_FLUSH_LOG_AT_TRX_COMMIT_SESSION
++LOG_SLOW_VERBOSITY
++ENABLE_QUERY_RESPONSE_TIME_STATS
++OPTIMIZER_FIX
++INNODB_ADAPTIVE_CHECKPOINT
++INNODB_SHOW_LOCKS_HELD
++INNODB_IBUF_ACCEL_RATE
++INNODB_EXPAND_IMPORT
++QUERY_CACHE_STRIP_COMMENTS
++INNODB_CHECKPOINT_AGE_TARGET
++INNODB_STATS_METHOD
++LOG_SLOW_RATE_LIMIT
++INNODB_PAGE_SIZE
++USERSTAT_RUNNING
++FAST_INDEX_CREATION
++USE_GLOBAL_LOG_SLOW_CONTROL
++INNODB_ENABLE_UNSAFE_GROUP_COMMIT
++INNODB_SHOW_VERBOSE_LOCKS
++INNODB_ADAPTIVE_HASH_INDEX_PARTITIONS
++SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP
++INNODB_LOG_BLOCK_SIZE
++INNODB_USE_SYS_STATS_TABLE
++INNODB_STATS_UPDATE_NEED_LOCK
++QUERY_RESPONSE_TIME_RANGE_BASE
++INNODB_STATS_AUTO_UPDATE
++INNODB_IBUF_ACTIVE_CONTRACT
+ INNODB_FILE_FORMAT_MAX
++INNODB_AUTO_LRU_DUMP
++LOG_WARNINGS_SILENCE
++LOG_SLOW_FILTER
++INNODB_DICT_SIZE_LIMIT
++INNODB_IBUF_MAX_SIZE
++INNODB_FAST_CHECKSUM
++LOG_SLOW_SLAVE_STATEMENTS
++INNODB_OVERWRITE_RELAY_LOG_INFO
++INNODB_EXTRA_RSEGMENTS
++THREAD_STATISTICS
+ drop table t1;
+ drop table t2;
+diff -ruN a/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result b/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result
+--- a/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result 2010-11-03 07:01:13.000000000 +0900
++++ b/mysql-test/suite/sys_vars/r/innodb_adaptive_flushing_basic.result 2010-12-10 16:48:11.031065741 +0900
+@@ -1,28 +1,28 @@
+ SET @start_global_value = @@global.innodb_adaptive_flushing;
+ SELECT @start_global_value;
+ @start_global_value
+-1
++0
+ Valid values are 'ON' and 'OFF'
+ select @@global.innodb_adaptive_flushing in (0, 1);
+ @@global.innodb_adaptive_flushing in (0, 1)
+ 1
+ select @@global.innodb_adaptive_flushing;
+ @@global.innodb_adaptive_flushing
+-1
++0
+ select @@session.innodb_adaptive_flushing;
+ ERROR HY000: Variable 'innodb_adaptive_flushing' is a GLOBAL variable
+ show global variables like 'innodb_adaptive_flushing';
+ Variable_name Value
+-innodb_adaptive_flushing ON
++innodb_adaptive_flushing OFF
+ show session variables like 'innodb_adaptive_flushing';
+ Variable_name Value
+-innodb_adaptive_flushing ON
++innodb_adaptive_flushing OFF
+ select * from information_schema.global_variables where variable_name='innodb_adaptive_flushing';
+ VARIABLE_NAME VARIABLE_VALUE
+-INNODB_ADAPTIVE_FLUSHING ON
++INNODB_ADAPTIVE_FLUSHING OFF
+ select * from information_schema.session_variables where variable_name='innodb_adaptive_flushing';
+ VARIABLE_NAME VARIABLE_VALUE
+-INNODB_ADAPTIVE_FLUSHING ON
++INNODB_ADAPTIVE_FLUSHING OFF
+ set global innodb_adaptive_flushing='OFF';
+ select @@global.innodb_adaptive_flushing;
+ @@global.innodb_adaptive_flushing
+@@ -89,4 +89,4 @@
+ SET @@global.innodb_adaptive_flushing = @start_global_value;
+ SELECT @@global.innodb_adaptive_flushing;
+ @@global.innodb_adaptive_flushing
+-1
++0
+diff -ruN a/mysql-test/suite/sys_vars/r/plugin_dir_basic.result b/mysql-test/suite/sys_vars/r/plugin_dir_basic.result
+--- a/mysql-test/suite/sys_vars/r/plugin_dir_basic.result 2010-11-03 07:01:13.000000000 +0900
++++ b/mysql-test/suite/sys_vars/r/plugin_dir_basic.result 2010-12-10 16:48:11.033057415 +0900
+@@ -1,20 +1,20 @@
+ select @@global.plugin_dir;
+ @@global.plugin_dir
+-MYSQL_LIBDIR/plugin
++MYSQL_LIBDIR64/plugin
+ select @@session.plugin_dir;
+ ERROR HY000: Variable 'plugin_dir' is a GLOBAL variable
+ show global variables like 'plugin_dir';
+ Variable_name Value
+-plugin_dir MYSQL_LIBDIR/plugin
++plugin_dir MYSQL_LIBDIR64/plugin
+ show session variables like 'plugin_dir';
+ Variable_name Value
+-plugin_dir MYSQL_LIBDIR/plugin
++plugin_dir MYSQL_LIBDIR64/plugin
+ select * from information_schema.global_variables where variable_name='plugin_dir';
+ VARIABLE_NAME VARIABLE_VALUE
+-PLUGIN_DIR MYSQL_LIBDIR/plugin
++PLUGIN_DIR MYSQL_LIBDIR64/plugin
+ select * from information_schema.session_variables where variable_name='plugin_dir';
+ VARIABLE_NAME VARIABLE_VALUE
+-PLUGIN_DIR MYSQL_LIBDIR/plugin
++PLUGIN_DIR MYSQL_LIBDIR64/plugin
+ set global plugin_dir=1;
+ ERROR HY000: Variable 'plugin_dir' is a read only variable
+ set session plugin_dir=1;
+diff -ruN a/mysql-test/t/connect.test b/mysql-test/t/connect.test
+--- a/mysql-test/t/connect.test 2010-11-03 07:01:12.000000000 +0900
++++ b/mysql-test/t/connect.test 2010-12-10 16:48:11.034065111 +0900
+@@ -1,3 +1,5 @@
++set global log_warnings=0;
++
+ # This test is to check various cases of connections
+ # with right and wrong password, with and without database
+ # Unfortunately the check is incomplete as we can't connect without database
+@@ -300,3 +302,4 @@
+ # Wait till all disconnects are completed
+ --source include/wait_until_count_sessions.inc
+
++set global log_warnings=1;
Summary(zh_CN.UTF-8): MySQL数据库服务器
Name: mysql
Version: 5.5.8
-Release: 0.2
+Release: 0.5
License: GPL + MySQL FLOSS Exception
Group: Applications/Databases
# Source0Download: http://dev.mysql.com/downloads/mysql/5.5.html#downloads
Source12: %{name}-ndb-cpc.sysconfig
Source13: %{name}-client.conf
Source14: my.cnf
-Patch0: %{name}-libs.patch
-Patch1: %{name}-libwrap.patch
Patch2: %{name}-c++.patch
Patch3: %{name}-info.patch
Patch4: %{name}-sql-cxx-pic.patch
-Patch5: %{name}-noproc.patch
Patch6: %{name}-system-users.patch
Patch7: %{name}-bug-34192.patch
Patch8: %{name}-client-config.patch
Patch11: %{name}-upgrade.patch
Patch12: %{name}-config.patch
Patch14: %{name}-bug-43594.patch
-Patch15: plugin-avoid-version.patch
-Patch16: %{name}-fix-dummy-thread-race-condition.patch
Patch18: %{name}-sphinx.patch
-# <percona patches, http://bazaar.launchpad.net/~percona-dev/percona-server/5.5.7/files>
-Patch100: %{name}-userstat.patch
-Patch101: %{name}-innodb_extend_slow.patch
-Patch102: %{name}-microsec_process.patch
-Patch103: %{name}-innodb_split_buf_pool_mutex.patch
+# <percona patches, http://bazaar.launchpad.net/~percona-dev/percona-server/5.5.8/files>
+# series file shows the order of patches
+Patch100: microsec_process.patch
+Patch101: optimizer_fix.patch
+Patch102: mysql_dump_ignore_ct.patch
+Patch103: control_online_alter_index.patch
+Patch104: show_temp.patch
+Patch105: innodb_show_status.patch
+Patch106: innodb_io_patches.patch
+Patch107: innodb_opt_lru_count.patch
+Patch108: innodb_extra_rseg.patch
+Patch109: innodb_overwrite_relay_log_info.patch
+Patch110: innodb_thread_concurrency_timer_based.patch
+Patch111: innodb_dict_size_limit.patch
+Patch112: innodb_split_buf_pool_mutex.patch
+Patch113: innodb_expand_import.patch
+Patch114: innodb_show_sys_tables.patch
+Patch115: innodb_stats.patch
+Patch116: innodb_recovery_patches.patch
+Patch117: innodb_admin_command_base.patch
+Patch118: innodb_show_lock_name.patch
+Patch119: innodb_extend_slow.patch
+Patch120: innodb_lru_dump_restore.patch
+Patch121: innodb_separate_doublewrite.patch
+Patch122: innodb_pass_corrupt_table.patch
+Patch123: innodb_fast_checksum.patch
+Patch124: innodb_files_extend.patch
+Patch125: innodb_fix_misc.patch
+Patch126: innodb_deadlock_count.patch
+Patch127: innodb_adaptive_hash_index_partitions.patch
+Patch128: innodb_buffer_pool_pages_i_s.patch
+Patch129: innodb_buffer_pool_shm.patch
+Patch130: innodb_show_status_extend.patch
+Patch131: slow_extended.patch
+Patch132: percona_support.patch
+Patch133: query_cache_enhance.patch
+Patch134: log_connection_error.patch
+Patch135: mysql_syslog.patch
+Patch136: response_time_distribution.patch
+Patch137: error_pad.patch
+Patch138: remove_fcntl_excessive_calls.patch
+Patch139: sql_no_fcache.patch
+Patch140: show_slave_status_nolock.patch
+Patch141: log_warnings_silence.patch
+Patch142: userstat.patch
+Patch143: bug580324.patch
+Patch144: mysql_remove_eol_carret.patch
+Patch145: mysql-test.diff
# </percona>
URL: http://www.mysql.com/products/community/
BuildRequires: bison
mv sphinx-*/mysqlse storage/sphinx
%patch18 -p1
%endif
-# CHECK ME, seems obsolete
-#%patch0 -p1
-#%{?with_tcpd:%patch1 -p1} # WHATS PURPOSE OF THIS PATCH?
#%patch2 -p1 # NEEDS CHECK, which exact program needs -lc++
%patch3 -p1
%ifarch alpha
# gcc 3.3.x ICE
%patch10 -p1
%endif
-# CHECK ME, obsolete
-#%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch11 -p1
%patch12 -p1
%patch14 -p0
-# CHECK ME, obsolete
-#%patch15 -p1
-# OBSOLETE, YES
-#%patch16 -p1
# <percona %patches>
-# CHECK ME
-#%patch100 -p1
-# CHECK ME
-#%patch101 -p1
-# CHECK ME
+%patch100 -p1
+%patch101 -p1
#%patch102 -p1
-# CHECK ME
-#%patch103 -p1
+%patch103 -p1
+%patch104 -p1
+%patch105 -p1
+%patch106 -p1
+%patch107 -p1
+%patch108 -p1
+%patch109 -p1
+%patch110 -p1
+%patch111 -p1
+%patch112 -p1
+%patch113 -p1
+%patch114 -p1
+%patch115 -p1
+%patch116 -p1
+%patch117 -p1
+%patch118 -p1
+%patch119 -p1
+%patch120 -p1
+%patch121 -p1
+%patch122 -p1
+%patch123 -p1
+%patch124 -p1
+%patch125 -p1
+%patch126 -p1
+%patch127 -p1
+%patch128 -p1
+%patch129 -p1
+%patch130 -p1
+%patch131 -p1
+%patch132 -p1
+%patch133 -p1
+%patch134 -p1
+%patch135 -p1
+%patch136 -p1
+%patch137 -p1
+%patch138 -p1
+%patch139 -p1
+%patch140 -p1
+%patch141 -p1
+%patch142 -p1
+%patch143 -p1
+%patch144 -p1
+%patch145 -p1
+# to get these files rebuild
+rm sql/sql_yacc.cc
+rm sql/sql_yacc.h
# </percona>
%build
--- /dev/null
+# name : mysql_dump_ignore_ct.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/client_priv.h b/client/client_priv.h
+--- a/client/client_priv.h 2010-11-03 07:01:14.000000000 +0900
++++ b/client/client_priv.h 2010-12-03 13:39:32.317046060 +0900
+@@ -58,6 +58,7 @@
+ OPT_MYSQL_LOCK_DIRECTORY,
+ OPT_USE_THREADS,
+ OPT_IMPORT_USE_THREADS,
++ OPT_IGNORE_CREATE_ERROR,
+ OPT_MYSQL_NUMBER_OF_QUERY,
+ OPT_IGNORE_TABLE,OPT_INSERT_IGNORE,OPT_SHOW_WARNINGS,OPT_DROP_DATABASE,
+ OPT_TZ_UTC, OPT_CREATE_SLAP_SCHEMA,
+diff -ruN a/client/mysqldump.c b/client/mysqldump.c
+--- a/client/mysqldump.c 2010-11-03 07:01:14.000000000 +0900
++++ b/client/mysqldump.c 2010-12-03 13:44:55.000069761 +0900
+@@ -101,7 +101,7 @@
+ opt_dump_triggers= 0, opt_routines=0, opt_tz_utc=1,
+ opt_slave_apply= 0,
+ opt_include_master_host_port= 0,
+- opt_events= 0,
++ opt_events= 0, opt_ignore_show_create_table_error=0,
+ opt_alltspcs=0, opt_notspcs= 0;
+ static my_bool insert_pat_inited= 0, debug_info_flag= 0, debug_check_flag= 0;
+ static ulong opt_max_allowed_packet, opt_net_buffer_length;
+@@ -349,6 +349,9 @@
+ {"insert-ignore", OPT_INSERT_IGNORE, "Insert rows with INSERT IGNORE.",
+ &opt_ignore, &opt_ignore, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0,
+ 0, 0},
++ {"ignore-create-error", OPT_IGNORE_CREATE_ERROR, "Don't exit on show create table errors.",
++ (uchar**) &opt_ignore_show_create_table_error, (uchar**) &opt_ignore_show_create_table_error, 0, GET_BOOL,
++ NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"lines-terminated-by", OPT_LTB,
+ "Lines in the output file are terminated by the given string.",
+ &lines_terminated, &lines_terminated, 0, GET_STR,
+@@ -2278,13 +2281,21 @@
+ /* Make an sql-file, if path was given iow. option -T was given */
+ char buff[20+FN_REFLEN];
+ MYSQL_FIELD *field;
++
++ my_bool old_ignore_errors=ignore_errors;
++ //fprintf(stderr, "ignore create table %d\n", opt_ignore_show_create_table_error);
++ if (opt_ignore_show_create_table_error)
++ ignore_errors=1;
+
+ my_snprintf(buff, sizeof(buff), "show create table %s", result_table);
+
+ if (switch_character_set_results(mysql, "binary") ||
+ mysql_query_with_error_report(mysql, &result, buff) ||
+ switch_character_set_results(mysql, default_charset))
++ {
++ ignore_errors=old_ignore_errors;
+ DBUG_RETURN(0);
++ }
+
+ if (path)
+ {
--- /dev/null
+# name : mysql_remove_eol_carret.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/client_priv.h b/client/client_priv.h
+--- a/client/client_priv.h 2010-07-06 15:06:50.000000000 -0700
++++ b/client/client_priv.h 2010-07-06 15:07:18.000000000 -0700
+@@ -89,6 +89,7 @@
+ OPT_SYSLOG,
+ #endif
+ OPT_PLUGIN_DIR,
++ OPT_NO_REMOVE_EOL_CARRET,
+ OPT_DEFAULT_PLUGIN,
+ OPT_MAX_CLIENT_OPTION
+ };
+diff -ruN a/client/mysql.cc b/client/mysql.cc
+--- a/client/mysql.cc 2010-06-03 08:50:02.000000000 -0700
++++ b/client/mysql.cc 2010-07-06 15:07:18.000000000 -0700
+@@ -133,6 +133,8 @@
+ enum enum_info_type { INFO_INFO,INFO_ERROR,INFO_RESULT};
+ typedef enum enum_info_type INFO_TYPE;
+
++my_bool opt_no_remove_eol_carret=0;
++
+ static MYSQL mysql; /* The connection */
+ static my_bool ignore_errors=0,wait_flag=0,quick=0,
+ connected=0,opt_raw_data=0,unbuffered=0,output_tables=0,
+@@ -1450,6 +1452,10 @@
+ NO_ARG, 1, 0, 0, 0, 0, 0},
+ {"skip-line-numbers", 'L', "Don't write line number for errors.", 0, 0, 0, GET_NO_ARG,
+ NO_ARG, 0, 0, 0, 0, 0, 0},
++ {"no-remove-eol-carret", OPT_NO_REMOVE_EOL_CARRET, "Do not remove \\r before \\n in batch mode",
++ (uchar**)&opt_no_remove_eol_carret , (uchar**)&opt_no_remove_eol_carret, 0,
++ GET_BOOL,
++ NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"unbuffered", 'n', "Flush buffer after each query.", &unbuffered,
+ &unbuffered, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"column-names", OPT_COLUMN_NAMES, "Write column names in results.",
+diff -ruN a/client/readline.cc b/client/readline.cc
+--- a/client/readline.cc 2010-06-03 08:50:23.000000000 -0700
++++ b/client/readline.cc 2010-07-06 15:07:18.000000000 -0700
+@@ -20,6 +20,8 @@
+ #include <m_string.h>
+ #include "my_readline.h"
+
++extern my_bool opt_no_remove_eol_carret;
++
+ static bool init_line_buffer(LINE_BUFFER *buffer,File file,ulong size,
+ ulong max_size);
+ static bool init_line_buffer_from_string(LINE_BUFFER *buffer,char * str);
+@@ -51,7 +53,7 @@
+ if (!(pos=intern_read_line(line_buff,&out_length, truncated)))
+ return 0;
+ if (out_length && pos[out_length-1] == '\n')
+- if (--out_length && pos[out_length-1] == '\r') /* Remove '\n' */
++ if (--out_length && !opt_no_remove_eol_carret && pos[out_length-1] == '\r') /* Remove '\n' */
+ out_length--; /* Remove '\r' */
+ line_buff->read_length=out_length;
+ pos[out_length]=0;
+diff -ruN a/patch_info/mysql_remove_eol_carret.patch b/patch_info/mysql_remove_eol_carret.patch
+--- a/patch_info/mysql_remove_eol_carret.patch 1969-12-31 16:00:00.000000000 -0800
++++ b/patch_info/mysql_remove_eol_carret.patch 2010-07-06 15:10:10.000000000 -0700
+@@ -0,0 +1,7 @@
++File=mysql_remove_eol_carret.patch
++Name=
++Version=1.1
++Author=Percona <info@percona.com>
++License=GPL
++Comment=Do not remove carret before eol if --no-remove-eol-carret is enabled in MySQL client.
++Changelog
--- /dev/null
+# name : mysql-syslog.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/client_priv.h b/client/client_priv.h
+--- a/client/client_priv.h 2011-01-13 18:35:59.000000000 +0300
++++ b/client/client_priv.h 2011-01-13 18:38:21.000000000 +0300
+@@ -85,6 +85,9 @@
+ OPT_DEBUG_INFO, OPT_DEBUG_CHECK, OPT_COLUMN_TYPES, OPT_ERROR_LOG_FILE,
+ OPT_WRITE_BINLOG, OPT_DUMP_DATE,
+ OPT_INIT_COMMAND,
++#ifndef __WIN__
++ OPT_SYSLOG,
++#endif
+ OPT_PLUGIN_DIR,
+ OPT_DEFAULT_PLUGIN,
+ OPT_MAX_CLIENT_OPTION
+diff -ruN a/client/mysql.cc b/client/mysql.cc
+--- a/client/mysql.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/client/mysql.cc 2011-01-13 18:38:21.000000000 +0300
+@@ -38,6 +38,11 @@
+ #include "my_readline.h"
+ #include <signal.h>
+ #include <violite.h>
++#ifndef __WIN__
++#include "syslog.h"
++#endif
++
++#define MAX_SYSLOG_MESSAGE 900
+
+ #if defined(USE_LIBEDIT_INTERFACE) && defined(HAVE_LOCALE_H)
+ #include <locale.h>
+@@ -140,7 +145,7 @@
+ default_pager_set= 0, opt_sigint_ignore= 0,
+ auto_vertical_output= 0,
+ show_warnings= 0, executing_query= 0, interrupted_query= 0,
+- ignore_spaces= 0;
++ ignore_spaces= 0, opt_syslog= 0;
+ static my_bool debug_info_flag, debug_check_flag;
+ static my_bool column_types_flag;
+ static my_bool preserve_comments= 0;
+@@ -198,6 +203,7 @@
+ void tee_fputs(const char *s, FILE *file);
+ void tee_puts(const char *s, FILE *file);
+ void tee_putc(int c, FILE *file);
++void write_syslog(String *buffer);
+ static void tee_print_sized_data(const char *, unsigned int, unsigned int, bool);
+ /* The names of functions that actually do the manipulation. */
+ static int get_options(int argc,char **argv);
+@@ -1561,6 +1567,10 @@
+ {"show-warnings", OPT_SHOW_WARNINGS, "Show warnings after every statement.",
+ &show_warnings, &show_warnings, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
++#ifndef __WIN__
++ {"syslog", OPT_SYSLOG, "Logs all queries to syslog", 0, 0, 0, GET_NO_ARG,
++ NO_ARG, 0, 0, 0, 0, 0, 0},
++#endif
+ {"plugin_dir", OPT_PLUGIN_DIR, "Directory for client-side plugins.",
+ (uchar**) &opt_plugin_dir, (uchar**) &opt_plugin_dir, 0,
+ GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+@@ -1665,6 +1675,11 @@
+ opt->name);
+ #endif
+ break;
++#ifndef __WIN__
++ case OPT_SYSLOG:
++ opt_syslog = 1;
++ break;
++#endif
+ case OPT_SERVER_ARG:
+ #ifdef EMBEDDED_LIBRARY
+ /*
+@@ -2014,6 +2029,40 @@
+ DBUG_RETURN((COMMANDS *) 0);
+ }
+
++void write_syslog(String *line){
++#ifndef __WIN__
++ uint length= line->length();
++ uint chunk_len= min(MAX_SYSLOG_MESSAGE, length);
++ char *ptr= line->c_ptr_safe();
++ char buff[MAX_SYSLOG_MESSAGE + 1];
++
++ for (;
++ length;
++ length-= chunk_len, ptr+= chunk_len, chunk_len= min(MAX_SYSLOG_MESSAGE,
++ length))
++ {
++ char *str;
++ if (length == chunk_len)
++ str= ptr; // last chunk => skip copy
++ else
++ {
++ memcpy(buff, ptr, chunk_len);
++ buff[chunk_len]= '\0';
++ str= buff;
++ }
++ syslog(LOG_INFO,
++ "SYSTEM_USER:'%s', MYSQL_USER:'%s', CONNECTION_ID:%lu, "
++ "DB_SERVER:'%s', DB:'%s', QUERY:'%s'",
++ getenv("SUDO_USER") ? getenv("SUDO_USER") :
++ getenv("USER") ? getenv("USER") : "--",
++ current_user ? current_user : "--",
++ mysql_thread_id(&mysql),
++ current_host ? current_host : "--",
++ current_db ? current_db : "--",
++ str);
++ }
++#endif
++}
+
+ static bool add_line(String &buffer,char *line,char *in_string,
+ bool *ml_comment, bool truncated)
+@@ -2986,6 +3035,11 @@
+ fix_history(buffer);
+ }
+ #endif
++#ifndef __WIN__
++ if (opt_syslog && buffer->length() && connect_flag == CLIENT_INTERACTIVE){
++ write_syslog(buffer);
++ }
++#endif
+
+ buffer->length(0);
+
--- /dev/null
+# name : optimizer_fix.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN /dev/null b/patch_info/optimizer_fix.info
+--- /dev/null 1970-01-01 09:00:00.000000000 +0900
++++ b/patch_info/optimizer_fix.info 2010-12-02 20:47:55.781968475 +0900
+@@ -0,0 +1,8 @@
++File=optimizer_fix.patch
++Name=Unofficial optimizer fixes
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++2010-01
++Ported to 5.1.42
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-12-02 19:22:40.027024953 +0900
++++ b/sql/mysqld.cc 2010-12-02 20:51:50.811356434 +0900
+@@ -427,6 +427,7 @@
+ uint opt_debug_sync_timeout= 0;
+ #endif /* defined(ENABLED_DEBUG_SYNC) */
+ my_bool opt_old_style_user_limits= 0, trust_function_creators= 0;
++my_bool opt_optimizer_fix= 0;
+ /*
+ True if there is at least one per-hour limit for some user, so we should
+ check them before each query (and possibly reset counters when hour is
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/mysqld.h 2010-12-02 20:51:10.392070356 +0900
+@@ -109,6 +109,7 @@
+ extern ulonglong slave_type_conversions_options;
+ extern my_bool read_only, opt_readonly;
+ extern my_bool lower_case_file_system;
++extern my_bool opt_optimizer_fix;
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+ extern char* opt_secure_file_priv;
+diff -ruN a/sql/opt_range.cc b/sql/opt_range.cc
+--- a/sql/opt_range.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/opt_range.cc 2010-12-02 20:47:55.795969853 +0900
+@@ -727,7 +727,7 @@
+ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+ bool index_read_must_be_used,
+ bool update_tbl_stats,
+- double read_time);
++ double read_time, ha_rows *estimated_records);
+ static
+ TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
+ double read_time,
+@@ -2151,6 +2151,7 @@
+ ha_rows limit, bool force_quick_range)
+ {
+ uint idx;
++ ha_rows estimated_records=0;
+ double scan_time;
+ DBUG_ENTER("SQL_SELECT::test_quick_select");
+ DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu",
+@@ -2319,12 +2320,17 @@
+
+ /* Get best 'range' plan and prepare data for making other plans */
+ if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE,
+- best_read_time)))
++ best_read_time, &estimated_records)))
+ {
+ best_trp= range_trp;
+ best_read_time= best_trp->read_cost;
+ }
+
++ if (opt_optimizer_fix && estimated_records)
++ {
++ records = estimated_records;
++ }
++
+ /*
+ Simultaneous key scans and row deletes on several handler
+ objects are not allowed so don't use ROR-intersection for
+@@ -3820,7 +3826,7 @@
+ {
+ DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
+ "tree in SEL_IMERGE"););
+- if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time)))
++ if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time, NULL)))
+ {
+ /*
+ One of index scans in this index_merge is more expensive than entire
+@@ -4923,11 +4929,12 @@
+ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
+ bool index_read_must_be_used,
+ bool update_tbl_stats,
+- double read_time)
++ double read_time, ha_rows *estimated_records)
+ {
+ int idx;
+ SEL_ARG **key,**end, **key_to_read= NULL;
+ ha_rows UNINIT_VAR(best_records); /* protected by key_to_read */
++ ha_rows min_records= HA_POS_ERROR;
+ TRP_RANGE* read_plan= NULL;
+ bool pk_is_clustered= param->table->file->primary_key_is_clustered();
+ DBUG_ENTER("get_key_scans_params");
+@@ -4998,6 +5005,11 @@
+ key_to_read= key;
+ }
+
++ if (estimated_records && found_records
++ && min_records > found_records)
++ {
++ min_records = found_records;
++ }
+ }
+ }
+
+@@ -5020,6 +5032,12 @@
+ else
+ DBUG_PRINT("info", ("No 'range' table read plan found"));
+
++ /* minimum number of records (not 0) as estimated number of records */
++ if (estimated_records && min_records != HA_POS_ERROR)
++ {
++ *estimated_records = min_records;
++ }
++
+ DBUG_RETURN(read_plan);
+ }
+
+diff -ruN a/sql/sql_select.cc b/sql/sql_select.cc
+--- a/sql/sql_select.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_select.cc 2010-12-02 20:47:55.813953789 +0900
+@@ -2610,6 +2610,11 @@
+ table->reginfo.impossible_range=1;
+ DBUG_RETURN(0);
+ }
++ if (opt_optimizer_fix && error == 0)
++ {
++ /* quick select is not effective. but the estimated value is used. */
++ DBUG_RETURN(select->records);
++ }
+ DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
+ }
+ DBUG_RETURN(HA_POS_ERROR); /* This shouldn't happend */
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2010-12-02 20:31:56.208023606 +0900
++++ b/sql/sys_vars.cc 2010-12-02 21:17:44.618120277 +0900
+@@ -2118,6 +2118,12 @@
+ VALID_RANGE(1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)),
+ DEFAULT(NET_WAIT_TIMEOUT), BLOCK_SIZE(1));
+
++static Sys_var_mybool Sys_optimizer_fix(
++ "optimizer_fix",
++ "Enable unofficial optimizer fixes.",
++ GLOBAL_VAR(opt_optimizer_fix),
++ NO_CMD_LINE, DEFAULT(TRUE));
++
+ /** propagates changes to the relevant flag of @@optimizer_switch */
+ static bool fix_engine_condition_pushdown(sys_var *self, THD *thd,
+ enum_var_type type)
+++ /dev/null
-#!/bin/sh
-# updates percona patches
-# http://www.percona.com/docs/wiki/release:start
-
-version=release-5.1.53-11
-bzr_branch=lp:percona-server/$version
-branch=MYSQL_5_1
-
-filter_names() {
- # mysql_dump_ignore_ct.patch is broken, therefore we skip
- grep -v 'mysql_dump_ignore_ct.patch' | \
- grep -v 'percona-support.patch' | \
- grep -v 'mysqld_safe_syslog.patch' | \
- grep -v 'mysql-test.diff'
-}
-
-filter_files() {
- filterdiff -x '*/configure'
-}
-
-if [ -d $version ]; then
- cd $version
- bzr up
- cd ..
-else
- bzr branch $bzr_branch $version
-fi
-
-> .percona.spec
-> .patch.spec
-i=100
-for patch in $(cat $version/series | filter_names); do
- file=mysql-$patch
- cat $version/$patch | filter_files > $file
-
- if [ -z "$(awk -vfile=$file -F/ '$2 == file{print}' CVS/Entries)" ]; then
- cvs add $file
- ${branch:+cvs up -r $branch $file}
- fi
-
- printf "Patch%d:\t%s\n" $i %{name}-$patch >> .percona.spec
- printf "%%patch%d -p1\n" $i >> .patch.spec
- i=$((i+1))
-done
-
-# update PatchX section
-sed -i -e '
-/^# <percona patches/,/^# <\/percona>/ {
- /^ <\/percona>/b
- /^# <percona patches/ {
- p # print header
- r .percona.spec
- a# </percona>
- }
- d
-}
-' mysql.spec
-
-# update %patchX section
-sed -i -e '
-/^# <percona %patches/,/^# <\/percona>/ {
- /^ <\/percona>/b
- /^# <percona %patches/ {
- p # print header
- r .patch.spec
- a# </percona>
- }
- d
-}
-' mysql.spec
--- /dev/null
+# name : percona-support.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/scripts/mysql_install_db.sh b/scripts/mysql_install_db.sh
+--- a/scripts/mysql_install_db.sh 2009-08-08 09:20:07.000000000 +0000
++++ b/scripts/mysql_install_db.sh 2009-08-08 09:29:23.000000000 +0000
+@@ -475,6 +475,8 @@
+ echo
+ echo "Please report any problems with the $scriptdir/mysqlbug script!"
+ echo
++ echo "For commercial support please contact Percona at http://www.percona.com/support/"
++ echo
+ fi
+
+ exit 0
+++ /dev/null
-diff -ur mysql-5.1.50.org/plugin/daemon_example/Makefile.am mysql-5.1.50/plugin/daemon_example/Makefile.am
---- mysql-5.1.50.org/plugin/daemon_example/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/plugin/daemon_example/Makefile.am 2010-09-23 20:52:58.514057048 +0200
-@@ -26,7 +26,7 @@
-
- EXTRA_LTLIBRARIES = libdaemon_example.la
- pkgplugin_LTLIBRARIES = @plugin_daemon_example_shared_target@
--libdaemon_example_la_LDFLAGS = -module -rpath $(pkgplugindir)
-+libdaemon_example_la_LDFLAGS = -module -avoid-version -rpath $(pkgplugindir)
- libdaemon_example_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- libdaemon_example_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- libdaemon_example_la_SOURCES = daemon_example.cc
-diff -ur mysql-5.1.50.org/storage/archive/Makefile.am mysql-5.1.50/storage/archive/Makefile.am
---- mysql-5.1.50.org/storage/archive/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/archive/Makefile.am 2010-09-23 20:53:04.487687594 +0200
-@@ -35,7 +35,7 @@
-
- EXTRA_LTLIBRARIES = ha_archive.la
- pkgplugin_LTLIBRARIES = @plugin_archive_shared_target@
--ha_archive_la_LDFLAGS = -module -rpath $(pkgplugindir)
-+ha_archive_la_LDFLAGS = -module -avoid-version -rpath $(pkgplugindir)
- ha_archive_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_archive_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_archive_la_SOURCES = ha_archive.cc azio.c
-diff -ur mysql-5.1.50.org/storage/blackhole/Makefile.am mysql-5.1.50/storage/blackhole/Makefile.am
---- mysql-5.1.50.org/storage/blackhole/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/blackhole/Makefile.am 2010-09-23 20:53:04.487687594 +0200
-@@ -34,7 +34,7 @@
-
- EXTRA_LTLIBRARIES = ha_blackhole.la
- pkgplugin_LTLIBRARIES = @plugin_blackhole_shared_target@
--ha_blackhole_la_LDFLAGS=-module -rpath $(pkgplugindir)
-+ha_blackhole_la_LDFLAGS=-module -avoid-version -rpath $(pkgplugindir)
- ha_blackhole_la_CXXFLAGS=$(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_blackhole_la_CFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_blackhole_la_SOURCES=ha_blackhole.cc
-diff -ur mysql-5.1.50.org/storage/csv/Makefile.am mysql-5.1.50/storage/csv/Makefile.am
---- mysql-5.1.50.org/storage/csv/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/csv/Makefile.am 2010-09-23 20:53:04.487687596 +0200
-@@ -31,7 +31,7 @@
-
- EXTRA_LTLIBRARIES = ha_csv.la
- pkglib_LTLIBRARIES = @plugin_csv_shared_target@
--ha_csv_la_LDFLAGS = -module -rpath $(MYSQLLIBdir)
-+ha_csv_la_LDFLAGS = -module -avoid-version -rpath $(MYSQLLIBdir)
- ha_csv_la_CXXFLAGS = $(AM_CXXFLAGS) -DMYSQL_PLUGIN
- ha_csv_la_SOURCES = transparent_file.cc ha_tina.cc
-
-diff -ur mysql-5.1.50.org/storage/example/Makefile.am mysql-5.1.50/storage/example/Makefile.am
---- mysql-5.1.50.org/storage/example/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/example/Makefile.am 2010-09-23 20:53:04.487687596 +0200
-@@ -34,7 +34,7 @@
-
- EXTRA_LTLIBRARIES = ha_example.la
- pkgplugin_LTLIBRARIES = @plugin_example_shared_target@
--ha_example_la_LDFLAGS = -module -rpath $(pkgplugindir)
-+ha_example_la_LDFLAGS = -module -avoid-version -rpath $(pkgplugindir)
- ha_example_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_example_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_example_la_SOURCES = ha_example.cc
-diff -ur mysql-5.1.50.org/storage/federated/Makefile.am mysql-5.1.50/storage/federated/Makefile.am
---- mysql-5.1.50.org/storage/federated/Makefile.am 2010-08-03 19:24:29.000000000 +0200
-+++ mysql-5.1.50/storage/federated/Makefile.am 2010-09-23 20:53:04.487687596 +0200
-@@ -34,7 +34,7 @@
-
- EXTRA_LTLIBRARIES = ha_federated.la
- pkgplugin_LTLIBRARIES = @plugin_federated_shared_target@
--ha_federated_la_LDFLAGS = -module -rpath $(pkgplugindir)
-+ha_federated_la_LDFLAGS = -module -avoid-version -rpath $(pkgplugindir)
- ha_federated_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_federated_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_federated_la_SOURCES = ha_federated.cc
-diff -ur mysql-5.1.50.org/storage/ibmdb2i/Makefile.am mysql-5.1.50/storage/ibmdb2i/Makefile.am
---- mysql-5.1.50.org/storage/ibmdb2i/Makefile.am 2010-08-03 19:24:22.000000000 +0200
-+++ mysql-5.1.50/storage/ibmdb2i/Makefile.am 2010-09-23 20:53:04.491021090 +0200
-@@ -33,7 +33,7 @@
- EXTRA_LTLIBRARIES = ha_ibmdb2i.la
- pkgplugin_LTLIBRARIES = @plugin_ibmdb2i_shared_target@
- ha_ibmdb2i_la_LIBADD = -liconv
--ha_ibmdb2i_la_LDFLAGS = -module -rpath $(MYSQLLIBdir)
-+ha_ibmdb2i_la_LDFLAGS = -module -avoid-version -rpath $(MYSQLLIBdir)
- ha_ibmdb2i_la_CXXFLAGS= $(AM_CXXFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_ibmdb2i_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_ibmdb2i_la_SOURCES = ha_ibmdb2i.cc db2i_ileBridge.cc db2i_conversion.cc \
-diff -ur mysql-5.1.50.org/storage/innobase/Makefile.am mysql-5.1.50/storage/innobase/Makefile.am
---- mysql-5.1.50.org/storage/innobase/Makefile.am 2010-08-03 19:24:20.000000000 +0200
-+++ mysql-5.1.50/storage/innobase/Makefile.am 2010-09-23 20:53:04.494354584 +0200
-@@ -162,7 +162,7 @@
- EXTRA_LTLIBRARIES= ha_innodb.la
- pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@
-
--ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir)
-+ha_innodb_la_LDFLAGS= -module -avoid-version -rpath $(pkgplugindir)
- ha_innodb_la_CXXFLAGS= $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES)
-diff -ur mysql-5.1.50.org/storage/innodb_plugin/Makefile.am mysql-5.1.50/storage/innodb_plugin/Makefile.am
---- mysql-5.1.50.org/storage/innodb_plugin/Makefile.am 2010-08-03 19:24:19.000000000 +0200
-+++ mysql-5.1.50/storage/innodb_plugin/Makefile.am 2010-09-23 20:53:04.494354584 +0200
-@@ -331,7 +331,7 @@
- EXTRA_LTLIBRARIES= ha_innodb_plugin.la
- pkgplugin_LTLIBRARIES= @plugin_innodb_plugin_shared_target@
-
--ha_innodb_plugin_la_LDFLAGS= -module -rpath $(pkgplugindir)
-+ha_innodb_plugin_la_LDFLAGS= -module -avoid-version -rpath $(pkgplugindir)
- ha_innodb_plugin_la_CXXFLAGS= $(AM_CXXFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_plugin_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
- ha_innodb_plugin_la_SOURCES= $(libinnobase_a_SOURCES)
-diff -ur mysql-5.1.50.org/storage/sphinx/Makefile.am mysql-5.1.50/storage/sphinx/Makefile.am
---- mysql-5.1.50.org/storage/sphinx/Makefile.am 2010-09-23 20:51:48.660581537 +0200
-+++ mysql-5.1.50/storage/sphinx/Makefile.am 2010-09-23 20:53:05.261059401 +0200
-@@ -38,12 +38,12 @@
- EXTRA_LTLIBRARIES = ha_sphinx.la
- pkgplugin_LTLIBRARIES = @plugin_sphinx_shared_target@ sphinx.la
-
--ha_sphinx_la_LDFLAGS = -module -avoid-version -rpath $(MYSQLLIBdir)
-+ha_sphinx_la_LDFLAGS = -module -avoid-version -avoid-version -rpath $(MYSQLLIBdir)
- ha_sphinx_la_CXXFLAGS= $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_sphinx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- ha_sphinx_la_SOURCES = ha_sphinx.cc
-
--sphinx_la_LDFLAGS = -module -avoid-version -rpath $(MYSQLLIBdir)
-+sphinx_la_LDFLAGS = -module -avoid-version -avoid-version -rpath $(MYSQLLIBdir)
- sphinx_la_CXXFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- sphinx_la_CFLAGS = $(AM_CFLAGS) -DMYSQL_DYNAMIC_PLUGIN
- sphinx_la_SOURCES = snippets_udf.cc
--- /dev/null
+# name : query_cache_with_comments.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/query_cache_enhance.patch b/patch_info/query_cache_enhance.patch
+--- a/patch_info/query_cache_enhance.patch 1970-01-01 05:00:00.000000000 +0500
++++ b/patch_info/query_cache_enhance.patch 2010-11-12 17:24:47.000000000 +0500
+@@ -0,0 +1,15 @@
++File=query_cache_enhance.patch
++Name= query cache Percona's cumulative patch
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment= 1) Add new status - Waiting on query cache mutex (status_wait_query_cache_mutex.patch)
++ 2) Remove comments from query (need for cache hit) (query_cache_with_comments.patch)
++ 3) Totally disable query cache (query_cache_totally_disable.info)
++2010-05 - First version avaliable (query_cache_with_comments.patch)
++2010-07 - First version avaliable (status_wait_query_cache_mutex.patch
++2010-07 - First version avaliable (query_cache_totally_disable.info)
++2010-07 - Fix crash (query_cache_with_comments.patch)
++2010-07 - Fix incorrect behavior diff (query_cache_with_comments.patch)
++2010-09 - Merge patches to one
++2010-11 - Ported to 5.5
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-11-03 03:01:14.000000000 +0500
++++ b/sql/mysqld.cc 2010-11-13 15:34:40.000000000 +0500
+@@ -893,6 +893,7 @@
+ #endif
+ #ifdef HAVE_QUERY_CACHE
+ ulong query_cache_min_res_unit= QUERY_CACHE_MIN_RESULT_DATA_SIZE;
++my_bool opt_query_cache_strip_comments= FALSE;
+ Query_cache query_cache;
+ #endif
+ #ifdef HAVE_SMEM
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2010-11-03 03:01:14.000000000 +0500
++++ b/sql/mysqld.h 2010-11-13 15:34:36.000000000 +0500
+@@ -91,6 +91,7 @@
+ extern my_bool opt_log, opt_slow_log;
+ extern my_bool opt_backup_history_log;
+ extern my_bool opt_backup_progress_log;
++extern my_bool opt_query_cache_strip_comments;
+ extern ulonglong log_output_options;
+ extern ulong log_backup_output_options;
+ extern my_bool opt_log_queries_not_using_indexes;
+diff -ruN a/sql/query_strip_comments.h b/sql/query_strip_comments.h
+--- a/sql/query_strip_comments.h 1970-01-01 05:00:00.000000000 +0500
++++ b/sql/query_strip_comments.h 2010-11-12 17:24:47.000000000 +0500
+@@ -0,0 +1,37 @@
++#ifndef _SQL_QUERY_STRIPC_COMMENTS_H_
++#define _SQL_QUERY_STRIPC_COMMENTS_H_
++#ifdef HAVE_QUERY_CACHE
++
++// implemented in sql_cache.cc
++class QueryStripComments
++{
++private:
++ QueryStripComments(const QueryStripComments&);
++ QueryStripComments& operator=(const QueryStripComments&);
++public:
++ QueryStripComments();
++ ~QueryStripComments();
++ void set(const char* a_query, uint a_query_length, uint a_additional_length);
++
++ char* query() { return buffer; }
++ uint query_length() { return length; }
++private:
++ void cleanup();
++private:
++ char* buffer;
++ uint length /*query length, not buffer length*/;
++ uint buffer_length;
++};
++class QueryStripComments_Backup
++{
++public:
++ QueryStripComments_Backup(THD* a_thd,QueryStripComments* qsc);
++ ~QueryStripComments_Backup();
++private:
++ THD* thd;
++ char* query;
++ uint length;
++};
++
++#endif // HAVE_QUERY_CACHE
++#endif // _SQL_QUERY_STRIPC_COMMENTS_H_
+diff -ruN a/sql/sql_cache.cc b/sql/sql_cache.cc
+--- a/sql/sql_cache.cc 2010-11-03 03:01:14.000000000 +0500
++++ b/sql/sql_cache.cc 2010-11-12 17:24:47.000000000 +0500
+@@ -344,6 +344,181 @@
+ #include "probes_mysql.h"
+ #include "transaction.h"
+
++#include "query_strip_comments.h"
++
++QueryStripComments::QueryStripComments()
++{
++ buffer = 0;
++ length = 0;
++ buffer_length = 0;
++}
++QueryStripComments::~QueryStripComments()
++{
++ cleanup();
++}
++
++inline bool query_strip_comments_is_white_space(char c)
++{
++ return ((' ' == c) || ('\t' == c) || ('\r' == c) || ('\n' ==c ));
++}
++void QueryStripComments::set(const char* query, uint query_length, uint additional_length)
++{
++ uint new_buffer_length = query_length + additional_length;
++ if(new_buffer_length > buffer_length)
++ {
++ cleanup();
++ buffer = (char*)my_malloc(new_buffer_length,MYF(0));
++ }
++ uint query_position = 0;
++ uint position = 0;
++ // Skip whitespaces from begin
++ while((query_position < query_length) && query_strip_comments_is_white_space(query[query_position]))
++ {
++ ++query_position;
++ }
++ long int last_space = -1;
++ while(query_position < query_length)
++ {
++ char current = query[query_position];
++ bool insert_space = false; // insert space to buffer, (IMPORTANT) don't update query_position
++ switch(current)
++ {
++ case '\'':
++ case '"':
++ {
++ buffer[position++] = query[query_position++]; // copy current symbol
++ while(query_position < query_length)
++ {
++ if(current == query[query_position]) // found pair quote
++ {
++ break;
++ }
++ buffer[position++] = query[query_position++]; // copy current symbol
++ }
++ break;
++ }
++ case '/':
++ {
++ if(((query_position + 2) < query_length) && ('*' == query[query_position+1]) && ('!' != query[query_position+2]))
++ {
++ query_position += 2; // skip "/*"
++ do
++ {
++ if('*' == query[query_position] && '/' == query[query_position+1]) // check for "*/"
++ {
++ query_position += 2; // skip "*/"
++ insert_space = true;
++ break;
++ }
++ else
++ {
++ ++query_position;
++ }
++ }
++ while(query_position < query_length);
++ if(!insert_space)
++ {
++ continue;
++ }
++ }
++ break;
++ }
++ case '-':
++ {
++ if(query[query_position+1] == '-')
++ {
++ ++query_position; // skip "-", and go to search of "\n"
++ }
++ else
++ {
++ break;
++ }
++ }
++ case '#':
++ {
++ do
++ {
++ ++query_position; // skip current symbol (# or -)
++ if('\n' == query[query_position]) // check for '\n'
++ {
++ ++query_position; // skip '\n'
++ insert_space = true;
++ break;
++ }
++ }
++ while(query_position < query_length);
++ if(insert_space)
++ {
++ break;
++ }
++ else
++ {
++ continue;
++ }
++ }
++ default:
++ if(query_strip_comments_is_white_space(current))
++ {
++ insert_space = true;
++ ++query_position;
++ }
++ break; // make gcc happy
++ }
++ if(insert_space)
++ {
++ if((last_space + 1) != position)
++ {
++ last_space = position;
++ buffer[position++] = ' ';
++ }
++ }
++ else
++ {
++ buffer[position++] = query[query_position++];
++ }
++ }
++ while((0 < position) && query_strip_comments_is_white_space(buffer[position - 1]))
++ {
++ --position;
++ }
++ buffer[position] = 0;
++ length = position;
++}
++void QueryStripComments::cleanup()
++{
++ if(buffer)
++ {
++ my_free(buffer);
++ }
++ buffer = 0;
++ length = 0;
++ buffer_length = 0;
++}
++QueryStripComments_Backup::QueryStripComments_Backup(THD* a_thd,QueryStripComments* qsc)
++{
++ if(opt_query_cache_strip_comments)
++ {
++ thd = a_thd;
++ query = thd->query();
++ length = thd->query_length();
++ qsc->set(query,length,thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
++ thd->set_query(qsc->query(),qsc->query_length());
++ }
++ else
++ {
++ thd = 0;
++ query = 0;
++ length = 0;
++ }
++}
++QueryStripComments_Backup::~QueryStripComments_Backup()
++{
++ if(thd)
++ {
++ thd->set_query(query,length);
++ }
++}
++
+ #ifdef EMBEDDED_LIBRARY
+ #include "emb_qcache.h"
+ #endif
+@@ -454,7 +629,12 @@
+ Query_cache_wait_state wait_state(thd, __func__, __FILE__, __LINE__);
+ DBUG_ENTER("Query_cache::try_lock");
+
++ const char* old_proc_info= thd->proc_info;
++ thd_proc_info(thd,"Waiting on query cache mutex");
+ mysql_mutex_lock(&structure_guard_mutex);
++ DBUG_EXECUTE_IF("status_wait_query_cache_mutex_sleep", {
++ sleep(5);
++ });
+ while (1)
+ {
+ if (m_cache_lock_status == Query_cache::UNLOCKED)
+@@ -501,6 +681,7 @@
+ }
+ }
+ mysql_mutex_unlock(&structure_guard_mutex);
++ thd->proc_info = old_proc_info;
+
+ DBUG_RETURN(interrupt);
+ }
+@@ -1274,6 +1455,8 @@
+ unlock();
+ DBUG_VOID_RETURN;
+ }
++ QueryStripComments *query_strip_comments = &(thd->query_strip_comments);
++ QueryStripComments_Backup backup(thd,query_strip_comments);
+
+ /* Key is query + database + flag */
+ if (thd->db_length)
+@@ -1451,6 +1634,9 @@
+ Query_cache_block_table *block_table, *block_table_end;
+ ulong tot_length;
+ Query_cache_query_flags flags;
++ QueryStripComments *query_strip_comments = &(thd->query_strip_comments);
++ char *sql_backup = sql;
++ uint query_length_backup = query_length;
+ DBUG_ENTER("Query_cache::send_result_to_client");
+
+ /*
+@@ -1472,21 +1658,103 @@
+
+ {
+ uint i= 0;
+- /*
+- Skip '(' characters in queries like following:
+- (select a from t1) union (select a from t1);
+- */
+- while (sql[i]=='(')
+- i++;
++ if(opt_query_cache_strip_comments)
++ {
++ /* Skip all comments and non-letter symbols */
++ uint& query_position = i;
++ char* query = sql;
++ while(query_position < query_length)
++ {
++ bool check = false;
++ char current = query[query_position];
++ switch(current)
++ {
++ case '/':
++ if(((query_position + 2) < query_length) && ('*' == query[query_position+1]) && ('!' != query[query_position+2]))
++ {
++ query_position += 2; // skip "/*"
++ do
++ {
++ if('*' == query[query_position] && '/' == query[query_position+1]) // check for "*/" (without space)
++ {
++ query_position += 2; // skip "*/" (without space)
++ break;
++ }
++ else
++ {
++ ++query_position;
++ }
++ }
++ while(query_position < query_length);
++ continue; // analyze current symbol
++ }
++ break;
++ case '-':
++ if(query[query_position+1] == '-')
++ {
++ ++query_position; // skip "-"
++ }
++ else
++ {
++ break;
++ }
++ case '#':
++ do
++ {
++ ++query_position; // skip current symbol
++ if('\n' == query[query_position]) // check for '\n'
++ {
++ ++query_position; // skip '\n'
++ break;
++ }
++ }
++ while(query_position < query_length);
++ continue; // analyze current symbol
++ case '\r':
++ case '\n':
++ case '\t':
++ case ' ':
++ case '(':
++ case ')':
++ break;
++ default:
++ check = true;
++ break; // make gcc happy
++ } // switch(current)
++ if(check)
++ {
++ if(query_position + 2 < query_length)
++ {
++ // cacheable
++ break;
++ }
++ else
++ {
++ DBUG_PRINT("qcache", ("The statement is not a SELECT; Not cached"));
++ goto err;
++ }
++ } // if(check)
++ ++query_position;
++ } // while(query_position < query_length)
++ }
++ else // if(opt_query_cache_strip_comments)
++ {
++ /*
++ Skip '(' characters in queries like following:
++ (select a from t1) union (select a from t1);
++ */
++ while (sql[i]=='(')
++ i++;
+
+- /*
+- Test if the query is a SELECT
+- (pre-space is removed in dispatch_command).
++ } // if(opt_query_cache_strip_comments)
++ /*
++ Test if the query is a SELECT
++ (pre-space is removed in dispatch_command).
+
+- First '/' looks like comment before command it is not
+- frequently appeared in real life, consequently we can
+- check all such queries, too.
+- */
++ First '/' looks like comment before command it is not
++ frequently appeared in real life, consequently we can
++ check all such queries, too.
++ */
+ if ((my_toupper(system_charset_info, sql[i]) != 'S' ||
+ my_toupper(system_charset_info, sql[i + 1]) != 'E' ||
+ my_toupper(system_charset_info, sql[i + 2]) != 'L') &&
+@@ -1521,6 +1789,12 @@
+ goto err_unlock;
+
+ Query_cache_block *query_block;
++ if(opt_query_cache_strip_comments)
++ {
++ query_strip_comments->set(sql, query_length, thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE);
++ sql = query_strip_comments->query();
++ query_length = query_strip_comments->query_length();
++ }
+
+ tot_length= query_length + thd->db_length + 1 + QUERY_CACHE_FLAGS_SIZE;
+ if (thd->db_length)
+@@ -1587,6 +1861,8 @@
+ (uchar*) &flags, QUERY_CACHE_FLAGS_SIZE);
+ query_block = (Query_cache_block *) my_hash_search(&queries, (uchar*) sql,
+ tot_length);
++ sql = sql_backup;
++ query_length = query_length_backup;
+ /* Quick abort on unlocked data */
+ if (query_block == 0 ||
+ query_block->query()->result() == 0 ||
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h 2010-11-03 03:01:14.000000000 +0500
++++ b/sql/sql_class.h 2010-11-13 15:34:25.000000000 +0500
+@@ -40,6 +40,9 @@
+ #include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA,
+ THR_LOCK_INFO */
+
++#ifdef HAVE_QUERY_CACHE
++#include "query_strip_comments.h"
++#endif // HAVE_QUERY_CACHE
+
+ class Reprepare_observer;
+ class Relay_log_info;
+@@ -758,6 +761,9 @@
+ statement lifetime. FIXME: must be const
+ */
+ ulong id;
++#ifdef HAVE_QUERY_CACHE
++ QueryStripComments query_strip_comments; // see sql_cache.cc
++#endif //HAVE_QUERY_CACHE
+
+ /*
+ MARK_COLUMNS_NONE: Means mark_used_colums is not set and no indicator to
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2010-11-03 03:01:14.000000000 +0500
++++ b/sql/sys_vars.cc 2010-11-13 15:34:59.000000000 +0500
+@@ -1724,6 +1724,11 @@
+ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(fix_query_cache_size));
+
++static Sys_var_mybool Sys_query_cache_strip_comments(
++ "query_cache_strip_comments", "Enable and disable optimisation \"strip comment for query cache\" - optimisation strip all comments from query while search query result in query cache",
++ GLOBAL_VAR(opt_query_cache_strip_comments), CMD_LINE(OPT_ARG),
++ DEFAULT(FALSE));
++
+ static Sys_var_ulong Sys_query_cache_limit(
+ "query_cache_limit",
+ "Don't cache results that are bigger than this",
--- /dev/null
+# name : remove_fcntl_excessive_calls.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/remove_fcntl_excessive_calls.info b/patch_info/remove_fcntl_excessive_calls.info
+--- a/patch_info/remove_fcntl_excessive_calls.info 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/remove_fcntl_excessive_calls.info 2010-07-22 21:42:08.560424001 +0400
+@@ -0,0 +1,6 @@
++File=remove_fcntl_excessive_calls.patch
++Name=remove fcntl excessive calls
++Version=1.0
++Author=This is a port of the official fix.
++License=GPL
++Comment=
+diff -ruN a/sql/net_serv.cc b/sql/net_serv.cc
+--- a/sql/net_serv.cc 2010-06-03 19:50:27.000000000 +0400
++++ b/sql/net_serv.cc 2010-07-22 21:40:30.680424001 +0400
+@@ -133,7 +133,7 @@
+ if (vio != 0) /* If real connection */
+ {
+ net->fd = vio_fd(vio); /* For perl DBI/DBD */
+-#if defined(MYSQL_SERVER) && !defined(__WIN__)
++#if defined(MYSQL_SERVER) && !defined(__WIN__) && !defined(NO_ALARM)
+ if (!(test_flags & TEST_BLOCKING))
+ {
+ my_bool old_mode;
+@@ -642,7 +642,7 @@
+ if ((long) (length= vio_write(net->vio,pos,(size_t) (end-pos))) <= 0)
+ {
+ my_bool interrupted = vio_should_retry(net->vio);
+-#if !defined(__WIN__)
++#if !defined(NO_ALARM) && !defined(__WIN__)
+ if ((interrupted || length == 0) && !thr_alarm_in_use(&alarmed))
+ {
+ if (!thr_alarm(&alarmed, net->write_timeout, &alarm_buff))
+@@ -680,7 +680,7 @@
+ my_progname);
+ #endif /* EXTRA_DEBUG */
+ }
+-#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER)
++#if defined(THREAD_SAFE_CLIENT) && defined(NO_ALARM)
+ if (vio_errno(net->vio) == SOCKET_EINTR)
+ {
+ DBUG_PRINT("warning",("Interrupted write. Retrying..."));
+@@ -698,7 +698,7 @@
+ pos+=length;
+ update_statistics(thd_increment_bytes_sent(length));
+ }
+-#ifndef __WIN__
++#if !defined(NO_ALARM) && !defined(__WIN__)
+ end:
+ #endif
+ #ifdef HAVE_COMPRESS
+@@ -830,6 +830,7 @@
+ thr_alarm(&alarmed,net->read_timeout,&alarm_buff);
+ #else
+ /* Read timeout is set in my_net_set_read_timeout */
++ DBUG_ASSERT(net_blocking);
+ #endif /* NO_ALARM */
+
+ pos = net->buff + net->where_b; /* net->packet -4 */
+@@ -844,7 +845,7 @@
+
+ DBUG_PRINT("info",("vio_read returned %ld errno: %d",
+ (long) length, vio_errno(net->vio)));
+-#if !defined(__WIN__) || defined(MYSQL_SERVER)
++#if !defined(NO_ALARM) && (!defined(__WIN__) || defined(MYSQL_SERVER))
+ /*
+ We got an error that there was no data on the socket. We now set up
+ an alarm to not 'read forever', change the socket to non blocking
+@@ -891,7 +892,7 @@
+ my_progname,vio_errno(net->vio));
+ #endif /* EXTRA_DEBUG */
+ }
+-#if defined(THREAD_SAFE_CLIENT) && !defined(MYSQL_SERVER)
++#if defined(THREAD_SAFE_CLIENT) && defined(NO_ALARM)
+ if (vio_errno(net->vio) == SOCKET_EINTR)
+ {
+ DBUG_PRINT("warning",("Interrupted read. Retrying..."));
--- /dev/null
+# name : response-time-distribution.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/CMakeLists.txt b/CMakeLists.txt
+--- a/CMakeLists.txt 2010-12-03 20:58:24.000000000 +0300
++++ b/CMakeLists.txt 2011-01-16 20:11:28.000000000 +0300
+@@ -171,7 +171,12 @@
+ OPTION (WITH_UNIT_TESTS "Compile MySQL with unit tests" ON)
+ MARK_AS_ADVANCED(CYBOZU BACKUP_TEST WITHOUT_SERVER DISABLE_SHARED)
+
+-
++OPTION(WITHOUT_RESPONSE_TIME_DISTRIBUTION "If we want to have response_time_distribution" OFF)
++IF(WITHOUT_RESPONSE_TIME_DISTRIBUTION)
++ELSE()
++ADD_DEFINITIONS(-DHAVE_RESPONSE_TIME_DISTRIBUTION)
++ENDIF()
++
+ OPTION(ENABLE_DEBUG_SYNC "Enable debug sync (debug builds only)" ON)
+ IF(ENABLE_DEBUG_SYNC)
+ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DENABLED_DEBUG_SYNC")
+diff -ruN a/include/mysql_com.h b/include/mysql_com.h
+--- a/include/mysql_com.h 2010-12-03 20:58:24.000000000 +0300
++++ b/include/mysql_com.h 2011-01-16 18:53:58.000000000 +0300
+@@ -137,10 +137,11 @@
+ #define REFRESH_FAST 32768 /* Intern flag */
+
+ /* RESET (remove all queries) from query cache */
+-#define REFRESH_QUERY_CACHE 65536
+-#define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */
+-#define REFRESH_DES_KEY_FILE 0x40000L
+-#define REFRESH_USER_RESOURCES 0x80000L
++#define REFRESH_QUERY_CACHE 65536
++#define REFRESH_QUERY_CACHE_FREE 0x20000L /* pack query cache */
++#define REFRESH_DES_KEY_FILE 0x40000L
++#define REFRESH_USER_RESOURCES 0x80000L
++#define REFRESH_QUERY_RESPONSE_TIME 0x100000L /* response time distibution */
+
+ #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */
+ #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */
+diff -ruN a/patch_info/response-time-distribution.info b/patch_info/response-time-distribution.info
+--- a/patch_info/response-time-distribution.info 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/response-time-distribution.info 2011-01-16 18:53:59.000000000 +0300
+@@ -0,0 +1,9 @@
++File=response-time-distribution.patch
++Name=Response time distribution
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++Changelog
++2010-07-02 first version avaliable
++2010-09-15 add column 'total'
+diff -ruN a/sql/CMakeLists.txt b/sql/CMakeLists.txt
+--- a/sql/CMakeLists.txt 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/CMakeLists.txt 2011-01-16 18:53:59.000000000 +0300
+@@ -51,7 +51,7 @@
+ message.h mf_iocache.cc my_decimal.cc ../sql-common/my_time.c
+ mysqld.cc net_serv.cc keycaches.cc
+ ../sql-common/client_plugin.c
+- opt_range.cc opt_range.h opt_sum.cc
++ opt_range.cc opt_range.h query_response_time.h opt_sum.cc
+ ../sql-common/pack.c parse_file.cc password.c procedure.cc
+ protocol.cc records.cc repl_failsafe.cc rpl_filter.cc set_var.cc
+ slave.cc sp.cc sp_cache.cc sp_head.cc sp_pcontext.cc
+@@ -59,7 +59,7 @@
+ sql_cache.cc sql_class.cc sql_client.cc sql_crypt.cc sql_crypt.h
+ sql_cursor.cc sql_db.cc sql_delete.cc sql_derived.cc sql_do.cc
+ sql_error.cc sql_handler.cc sql_help.cc sql_insert.cc sql_lex.cc
+- sql_list.cc sql_load.cc sql_manager.cc sql_parse.cc
++ sql_list.cc sql_load.cc sql_manager.cc sql_parse.cc query_response_time.cc
+ sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc
+ debug_sync.cc debug_sync.h
+ sql_repl.cc sql_select.cc sql_show.cc sql_state.c sql_string.cc
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h 2011-01-16 18:53:33.000000000 +0300
++++ b/sql/handler.h 2011-01-16 18:54:00.000000000 +0300
+@@ -580,6 +580,7 @@
+ SCH_PROFILES,
+ SCH_REFERENTIAL_CONSTRAINTS,
+ SCH_PROCEDURES,
++ SCH_QUERY_RESPONSE_TIME,
+ SCH_SCHEMATA,
+ SCH_SCHEMA_PRIVILEGES,
+ SCH_SESSION_STATUS,
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/lex.h 2011-01-16 18:54:01.000000000 +0300
+@@ -426,6 +426,7 @@
+ { "PURGE", SYM(PURGE)},
+ { "QUARTER", SYM(QUARTER_SYM)},
+ { "QUERY", SYM(QUERY_SYM)},
++ { "QUERY_RESPONSE_TIME", SYM(QUERY_RESPONSE_TIME_SYM)},
+ { "QUICK", SYM(QUICK)},
+ { "RANGE", SYM(RANGE_SYM)},
+ { "READ", SYM(READ_SYM)},
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2011-01-16 18:53:35.000000000 +0300
++++ b/sql/mysqld.cc 2011-01-17 02:22:27.000000000 +0300
+@@ -69,6 +69,8 @@
+ #include "debug_sync.h"
+ #include "sql_callback.h"
+
++#include "query_response_time.h"
++
+ #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
+ #include "../storage/perfschema/pfs_server.h"
+ #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
+@@ -600,7 +602,7 @@
+ MY_LOCALE *my_default_lc_messages;
+ MY_LOCALE *my_default_lc_time_names;
+
+-SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen, have_query_cache;
++SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen, have_query_cache, have_response_time_distribution;
+ SHOW_COMP_OPTION have_geometry, have_rtree_keys;
+ SHOW_COMP_OPTION have_crypt, have_compress;
+ SHOW_COMP_OPTION have_profiling;
+@@ -901,6 +903,10 @@
+ my_bool opt_enable_shared_memory;
+ HANDLE smem_event_connect_request= 0;
+ #endif
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ulong opt_query_response_time_range_base = QRT_DEFAULT_BASE;
++my_bool opt_enable_query_response_time_stats= 0;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+
+ my_bool opt_use_ssl = 0;
+ char *opt_ssl_ca= NULL, *opt_ssl_capath= NULL, *opt_ssl_cert= NULL,
+@@ -1469,6 +1475,9 @@
+ my_free(opt_bin_logname);
+ bitmap_free(&temp_pool);
+ free_max_user_conn();
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ query_response_time_free();
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ #ifdef HAVE_REPLICATION
+ end_slave_list();
+ #endif
+@@ -3930,6 +3939,9 @@
+ if (!DEFAULT_ERRMSGS[0][0])
+ unireg_abort(1);
+
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ query_response_time_init();
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ /* We have to initialize the storage engines before CSV logging */
+ if (ha_init())
+ {
+@@ -6800,6 +6812,11 @@
+ #else
+ have_query_cache=SHOW_OPTION_NO;
+ #endif
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ have_response_time_distribution= SHOW_OPTION_YES;
++#else // HAVE_RESPONSE_TIME_DISTRIBUTION
++ have_response_time_distribution= SHOW_OPTION_NO;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ #ifdef HAVE_SPATIAL
+ have_geometry=SHOW_OPTION_YES;
+ #else
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2011-01-16 18:53:35.000000000 +0300
++++ b/sql/mysqld.h 2011-01-17 02:08:20.000000000 +0300
+@@ -98,6 +98,10 @@
+ extern bool opt_disable_networking, opt_skip_show_db;
+ extern bool opt_skip_name_resolve;
+ extern bool opt_ignore_builtin_innodb;
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++extern ulong opt_query_response_time_range_base;
++extern my_bool opt_enable_query_response_time_stats;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ extern my_bool opt_character_set_client_handshake;
+ extern bool volatile abort_loop;
+ extern bool in_bootstrap;
+diff -ruN a/sql/query_response_time.cc b/sql/query_response_time.cc
+--- a/sql/query_response_time.cc 1970-01-01 03:00:00.000000000 +0300
++++ b/sql/query_response_time.cc 2011-01-17 02:13:08.000000000 +0300
+@@ -0,0 +1,372 @@
++#include "mysql_version.h"
++#ifdef __FreeBSD__
++#include <sys/types.h>
++#include <machine/atomic.h>
++#endif // __FreeBSD__
++#include "my_global.h"
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++#include "mysql_com.h"
++#include "rpl_tblmap.h"
++#include "table.h"
++#include "field.h"
++#include "sql_show.h"
++#include "query_response_time.h"
++
++#define TIME_STRING_POSITIVE_POWER_LENGTH QRT_TIME_STRING_POSITIVE_POWER_LENGTH
++#define TIME_STRING_NEGATIVE_POWER_LENGTH 6
++#define TOTAL_STRING_POSITIVE_POWER_LENGTH QRT_TOTAL_STRING_POSITIVE_POWER_LENGTH
++#define TOTAL_STRING_NEGATIVE_POWER_LENGTH 6
++#define MINIMUM_BASE 2
++#define MAXIMUM_BASE QRT_MAXIMUM_BASE
++#define POSITIVE_POWER_FILLER QRT_POSITIVE_POWER_FILLER
++#define NEGATIVE_POWER_FILLER QRT_NEGATIVE_POWER_FILLER
++#define STRING_OVERFLOW QRT_STRING_OVERFLOW
++#define TIME_OVERFLOW QRT_TIME_OVERFLOW
++#define DEFAULT_BASE QRT_DEFAULT_BASE
++
++#define do_xstr(s) do_str(s)
++#define do_str(s) #s
++#define do_format(filler,width) "%" filler width "lld"
++/*
++ Format strings for snprintf. Generate from:
++ POSITIVE_POWER_FILLER and TIME_STRING_POSITIVE_POWER_LENGTH
++ NEFATIVE_POWER_FILLER and TIME_STRING_NEGATIVE_POWER_LENGTH
++*/
++#define TIME_STRING_POSITIVE_POWER_FORMAT do_format(POSITIVE_POWER_FILLER,do_xstr(TIME_STRING_POSITIVE_POWER_LENGTH))
++#define TIME_STRING_NEGATIVE_POWER_FORMAT do_format(NEGATIVE_POWER_FILLER,do_xstr(TIME_STRING_NEGATIVE_POWER_LENGTH))
++#define TIME_STRING_FORMAT TIME_STRING_POSITIVE_POWER_FORMAT "." TIME_STRING_NEGATIVE_POWER_FORMAT
++
++#define TOTAL_STRING_POSITIVE_POWER_FORMAT do_format(POSITIVE_POWER_FILLER,do_xstr(TOTAL_STRING_POSITIVE_POWER_LENGTH))
++#define TOTAL_STRING_NEGATIVE_POWER_FORMAT do_format(NEGATIVE_POWER_FILLER,do_xstr(TOTAL_STRING_NEGATIVE_POWER_LENGTH))
++#define TOTAL_STRING_FORMAT TOTAL_STRING_POSITIVE_POWER_FORMAT "." TOTAL_STRING_NEGATIVE_POWER_FORMAT
++
++#define TIME_STRING_LENGTH QRT_TIME_STRING_LENGTH
++#define TIME_STRING_BUFFER_LENGTH (TIME_STRING_LENGTH + 1 /* '\0' */)
++
++#define TOTAL_STRING_LENGTH QRT_TOTAL_STRING_LENGTH
++#define TOTAL_STRING_BUFFER_LENGTH (TOTAL_STRING_LENGTH + 1 /* '\0' */)
++
++/*
++ Calculate length of "log linear"
++ 1)
++ (MINIMUM_BASE ^ result) <= (10 ^ STRING_POWER_LENGTH) < (MINIMUM_BASE ^ (result + 1))
++
++ 2)
++ (MINIMUM_BASE ^ result) <= (10 ^ STRING_POWER_LENGTH)
++ and
++ (MINIMUM_BASE ^ (result + 1)) > (10 ^ STRING_POWER_LENGTH)
++
++ 3)
++ result <= LOG(MINIMUM_BASE, 10 ^ STRING_POWER_LENGTH)= STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10)
++ result + 1 > LOG(MINIMUM_BASE, 10 ^ STRING_POWER_LENGTH)= STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10)
++
++ 4) STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10) - 1 < result <= STRING_POWER_LENGTH * LOG(MINIMUM_BASE,10)
++
++ MINIMUM_BASE= 2 always, LOG(MINIMUM_BASE,10)= 3.3219280948873626, result= (int)3.3219280948873626 * STRING_POWER_LENGTH
++
++ Last counter always use for time overflow
++*/
++#define POSITIVE_POWER_COUNT ((int)(3.32192809 * TIME_STRING_POSITIVE_POWER_LENGTH))
++#define NEGATIVE_POWER_COUNT ((int)(3.32192809 * TIME_STRING_NEGATIVE_POWER_LENGTH))
++#define OVERALL_POWER_COUNT (NEGATIVE_POWER_COUNT + 1 + POSITIVE_POWER_COUNT)
++
++#define MILLION ((unsigned long)1000 * 1000)
++
++namespace query_response_time
++{
++
++class utility
++{
++public:
++ utility() : m_base(0)
++ {
++ m_max_dec_value= MILLION;
++ for(int i= 0; TIME_STRING_POSITIVE_POWER_LENGTH > i; ++i)
++ m_max_dec_value *= 10;
++ setup(DEFAULT_BASE);
++ }
++public:
++ uint base() const { return m_base; }
++ uint negative_count() const { return m_negative_count; }
++ uint positive_count() const { return m_positive_count; }
++ uint bound_count() const { return m_bound_count; }
++ ulonglong max_dec_value() const { return m_max_dec_value; }
++ ulonglong bound(uint index) const { return m_bound[ index ]; }
++public:
++ void setup(uint base)
++ {
++ if(base != m_base)
++ {
++ m_base= base;
++
++ const ulonglong million= 1000 * 1000;
++ ulonglong value= million;
++ m_negative_count= 0;
++ while(value > 0)
++ {
++ m_negative_count += 1;
++ value /= m_base;
++ }
++ m_negative_count -= 1;
++
++ value= million;
++ m_positive_count= 0;
++ while(value < m_max_dec_value)
++ {
++ m_positive_count += 1;
++ value *= m_base;
++ }
++ m_bound_count= m_negative_count + m_positive_count;
++
++ value= million;
++ for(uint i= 0; i < m_negative_count; ++i)
++ {
++ value /= m_base;
++ m_bound[m_negative_count - i - 1]= value;
++ }
++ value= million;
++ for(uint i= 0; i < m_positive_count; ++i)
++ {
++ m_bound[m_negative_count + i]= value;
++ value *= m_base;
++ }
++ }
++ }
++private:
++ uint m_base;
++ uint m_negative_count;
++ uint m_positive_count;
++ uint m_bound_count;
++ ulonglong m_max_dec_value; /* for TIME_STRING_POSITIVE_POWER_LENGTH=7 is 10000000 */
++ ulonglong m_bound[OVERALL_POWER_COUNT];
++};
++
++void print_time(char* buffer, std::size_t buffer_size, std::size_t string_positive_power_length, const char* format, uint64 value)
++{
++ memset(buffer,'X',buffer_size);
++ buffer[string_positive_power_length]= '.';
++ ulonglong second= (value / MILLION);
++ ulonglong microsecond= (value % MILLION);
++ std::size_t result_length= snprintf(buffer,buffer_size,format,second,microsecond);
++ if(result_length < 0)
++ {
++ assert(sizeof(STRING_OVERFLOW) <= buffer_size);
++ memcpy(buffer, STRING_OVERFLOW, sizeof(STRING_OVERFLOW));
++ return;
++ }
++ buffer[result_length]= 0;
++}
++#ifdef __x86_64__
++typedef uint64 TimeCounter;
++void add_time_atomic(TimeCounter* counter, uint64 time)
++{
++ __sync_fetch_and_add(counter,time);
++}
++#endif // __x86_64__
++#ifdef __i386__
++inline uint32 get_high(uint64 value)
++{
++ return ((value >> 32) << 32);
++}
++inline uint32 get_low(uint64 value)
++{
++ return ((value << 32) >> 32);
++}
++#ifdef __FreeBSD__
++inline bool compare_and_swap(volatile uint32 *target, uint32 old, uint32 new_value)
++{
++ return atomic_cmpset_32(target,old,new_value);
++}
++#else // __FreeBSD__
++inline bool compare_and_swap(volatile uint32* target, uint32 old, uint32 new_value)
++{
++ return __sync_bool_compare_and_swap(target,old,new_value);
++}
++#endif // __FreeBSD__
++class TimeCounter
++{
++public:
++ TimeCounter& operator=(uint64 time)
++ {
++ this->m_high= get_high(time);
++ this->m_low= get_low(time);
++ return *this;
++ }
++ operator uint64() const
++ {
++ return ((static_cast<uint64>(m_high) << 32) + static_cast<uint64>(m_low));
++ }
++ void add(uint64 time)
++ {
++ uint32 time_high = get_high(time);
++ uint32 time_low = get_low(time);
++ uint64 time_low64= time_low;
++ while(true)
++ {
++ uint32 old_low= this->m_low;
++ uint64 old_low64= old_low;
++
++ uint64 new_low64= old_low64 + time_low64;
++ uint32 new_low= (get_low(new_low64));
++ bool add_high= (get_high(new_low64) != 0);
++
++ if(!compare_and_swap(&m_low,old_low,new_low))
++ {
++ continue;
++ }
++ if(add_high)
++ {
++ ++time_high;
++ }
++ if(time_high > 0)
++ {
++ __sync_fetch_and_add(&m_high,time_high);
++ }
++ break;
++ }
++ }
++private:
++ uint32 m_low;
++ uint32 m_high;
++};
++void add_time_atomic(TimeCounter* counter, uint64 time)
++{
++ counter->add(time);
++}
++#endif // __i386__
++
++class time_collector
++{
++public:
++ time_collector(utility& u) : m_utility(&u)
++ {
++ }
++ uint32 count(uint index) const { return m_count[index]; }
++ uint64 total(uint index) const { return m_total[index]; }
++public:
++ void flush()
++ {
++ memset(&m_count,0,sizeof(m_count));
++ memset((void*)&m_total,0,sizeof(m_total));
++ }
++ void collect(uint64 time)
++ {
++ bool no_collect= false;
++ DBUG_EXECUTE_IF("response_time_distribution_log_only_more_300_milliseconds", { \
++ no_collect= time < 300 * 1000; \
++ });
++ if(no_collect) return;
++ int i= 0;
++ for(int count= m_utility->bound_count(); count > i; ++i)
++ {
++ if(m_utility->bound(i) > time)
++ {
++ __sync_fetch_and_add(&(m_count[i]),(uint32)1);
++ add_time_atomic(&(m_total[i]),time);
++ break;
++ }
++ }
++ }
++private:
++ utility* m_utility;
++ uint32 m_count[OVERALL_POWER_COUNT + 1];
++ TimeCounter m_total[OVERALL_POWER_COUNT + 1];
++};
++
++class collector
++{
++public:
++ collector() : m_time(m_utility)
++ {
++ m_utility.setup(DEFAULT_BASE);
++ m_time.flush();
++ }
++public:
++ void flush()
++ {
++ m_utility.setup(opt_query_response_time_range_base);
++ m_time.flush();
++ }
++ int fill(THD* thd, TABLE_LIST *tables, COND *cond)
++ {
++ DBUG_ENTER("fill_schema_query_response_time");
++ TABLE *table= static_cast<TABLE*>(tables->table);
++ Field **fields= table->field;
++ for(uint i= 0, count= bound_count() + 1 /* with overflow */; count > i; ++i)
++ {
++ char time[TIME_STRING_BUFFER_LENGTH];
++ char total[TOTAL_STRING_BUFFER_LENGTH];
++ if(i == bound_count())
++ {
++ assert(sizeof(TIME_OVERFLOW) <= TIME_STRING_BUFFER_LENGTH);
++ assert(sizeof(TIME_OVERFLOW) <= TOTAL_STRING_BUFFER_LENGTH);
++ memcpy(time,TIME_OVERFLOW,sizeof(TIME_OVERFLOW));
++ memcpy(total,TIME_OVERFLOW,sizeof(TIME_OVERFLOW));
++ }
++ else
++ {
++ print_time(time,sizeof(time),TIME_STRING_POSITIVE_POWER_LENGTH,TIME_STRING_FORMAT,this->bound(i));
++ print_time(total,sizeof(total),TOTAL_STRING_POSITIVE_POWER_LENGTH,TOTAL_STRING_FORMAT,this->total(i));
++ }
++ fields[0]->store(time,strlen(time),system_charset_info);
++ fields[1]->store(this->count(i));
++ fields[2]->store(total,strlen(total),system_charset_info);
++ if (schema_table_store_record(thd, table))
++ {
++ DBUG_RETURN(1);
++ }
++ }
++ DBUG_RETURN(0);
++ }
++ void collect(ulonglong time)
++ {
++ m_time.collect(time);
++ }
++ uint bound_count() const
++ {
++ return m_utility.bound_count();
++ }
++ ulonglong bound(uint index)
++ {
++ return m_utility.bound(index);
++ }
++ ulonglong count(uint index)
++ {
++ return m_time.count(index);
++ }
++ ulonglong total(uint index)
++ {
++ return m_time.total(index);
++ }
++private:
++ utility m_utility;
++ time_collector m_time;
++};
++
++static collector g_collector;
++
++} // namespace query_response_time
++
++void query_response_time_init()
++{
++}
++
++void query_response_time_free()
++{
++ query_response_time::g_collector.flush();
++}
++
++void query_response_time_flush()
++{
++ query_response_time::g_collector.flush();
++}
++void query_response_time_collect(ulonglong query_time)
++{
++ query_response_time::g_collector.collect(query_time);
++}
++
++int query_response_time_fill(THD* thd, TABLE_LIST *tables, COND *cond)
++{
++ return query_response_time::g_collector.fill(thd,tables,cond);
++}
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+diff -ruN a/sql/query_response_time.h b/sql/query_response_time.h
+--- a/sql/query_response_time.h 1970-01-01 03:00:00.000000000 +0300
++++ b/sql/query_response_time.h 2011-01-17 02:13:34.000000000 +0300
+@@ -0,0 +1,71 @@
++#ifndef QUERY_RESPONSE_TIME_H
++#define QUERY_RESPONSE_TIME_H
++
++/*
++ Settings for query response time
++*/
++
++/*
++ Maximum string length for (10 ^ (-1 * QRT_STRING_NEGATIVE_POWER_LENGTH)) in text representation.
++ Example: for 6 is 0.000001
++ Always 2
++
++ Maximum string length for (10 ^ (QRT_STRING_POSITIVE_POWER_LENGTH + 1) - 1) in text representation.
++ Example: for 7 is 9999999.0
++*/
++#define QRT_TIME_STRING_POSITIVE_POWER_LENGTH 7
++#define QRT_TOTAL_STRING_POSITIVE_POWER_LENGTH 7
++
++/*
++ Minimum base for log - ALWAYS 2
++ Maximum base for log:
++*/
++#define QRT_MAXIMUM_BASE 1000
++
++/*
++ Filler for whole number (positive power)
++ Example: for
++ QRT_POSITIVE_POWER_FILLER ' '
++ QRT_POSITIVE_POWER_LENGTH 7
++ and number 7234 result is:
++ ' 7234'
++*/
++#define QRT_POSITIVE_POWER_FILLER " "
++/*
++ Filler for fractional number. Similiary to whole number
++*/
++#define QRT_NEGATIVE_POWER_FILLER "0"
++
++/*
++ Message if string overflow (string overflow - internal error, this string say about bug in QRT)
++*/
++#define QRT_STRING_OVERFLOW "TOO BIG STRING"
++
++/*
++ Message if time too big for statistic collecting (very long query)
++*/
++#define QRT_TIME_OVERFLOW "TOO LONG"
++
++#define QRT_DEFAULT_BASE 10
++
++#define QRT_TIME_STRING_LENGTH \
++ max( (QRT_TIME_STRING_POSITIVE_POWER_LENGTH + 1 /* '.' */ + 6 /*QRT_TIME_STRING_NEGATIVE_POWER_LENGTH*/), \
++ max( (sizeof(QRT_TIME_OVERFLOW) - 1), \
++ (sizeof(QRT_STRING_OVERFLOW) - 1) ) )
++
++#define QRT_TOTAL_STRING_LENGTH \
++ max( (QRT_TOTAL_STRING_POSITIVE_POWER_LENGTH + 1 /* '.' */ + 6 /*QRT_TOTAL_STRING_NEGATIVE_POWER_LENGTH*/), \
++ max( (sizeof(QRT_TIME_OVERFLOW) - 1), \
++ (sizeof(QRT_STRING_OVERFLOW) - 1) ) )
++
++extern ST_SCHEMA_TABLE query_response_time_table;
++
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++extern void query_response_time_init ();
++extern void query_response_time_free ();
++extern void query_response_time_flush ();
++extern void query_response_time_collect(ulonglong query_time);
++extern int query_response_time_fill (THD* thd, TABLE_LIST *tables, COND *cond);
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++
++#endif // QUERY_RESPONSE_TIME_H
+diff -ruN a/sql/set_var.h b/sql/set_var.h
+--- a/sql/set_var.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/set_var.h 2011-01-17 02:20:59.000000000 +0300
+@@ -293,6 +293,7 @@
+
+ extern SHOW_COMP_OPTION have_ssl, have_symlink, have_dlopen;
+ extern SHOW_COMP_OPTION have_query_cache;
++extern SHOW_COMP_OPTION have_response_time_distribution;
+ extern SHOW_COMP_OPTION have_geometry, have_rtree_keys;
+ extern SHOW_COMP_OPTION have_crypt;
+ extern SHOW_COMP_OPTION have_compress;
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2011-01-16 18:53:35.000000000 +0300
++++ b/sql/sql_parse.cc 2011-01-17 02:15:09.000000000 +0300
+@@ -88,6 +88,7 @@
+ #include "sp_cache.h"
+ #include "events.h"
+ #include "sql_trigger.h"
++#include "query_response_time.h"
+ #include "transaction.h"
+ #include "sql_audit.h"
+ #include "sql_prepare.h"
+@@ -1476,22 +1477,36 @@
+ Do not log administrative statements unless the appropriate option is
+ set.
+ */
++ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ if (opt_enable_query_response_time_stats || thd->enable_slow_log)
++#else // HAVE_RESPONSE_TIME_DISTRIBUTION
+ if (thd->enable_slow_log)
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ {
+- ulonglong end_utime_of_query= thd->current_utime();
+- thd_proc_info(thd, "logging slow query");
+-
+- if (((thd->server_status & SERVER_QUERY_WAS_SLOW) ||
+- ((thd->server_status &
+- (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
+- opt_log_queries_not_using_indexes &&
+- !(sql_command_flags[thd->lex->sql_command] & CF_STATUS_COMMAND))) &&
+- thd->examined_row_count >= thd->variables.min_examined_row_limit)
++ ulonglong end_utime_of_query = thd->current_utime();
++ ulonglong query_execution_time = end_utime_of_query - thd->utime_after_lock;
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ if(opt_enable_query_response_time_stats)
++ {
++ query_response_time_collect(query_execution_time);
++ }
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++ if (thd->enable_slow_log)
+ {
+ thd_proc_info(thd, "logging slow query");
+- thd->status_var.long_query_count++;
+- slow_log_print(thd, thd->query(), thd->query_length(),
+- end_utime_of_query);
++
++ if (((thd->server_status & SERVER_QUERY_WAS_SLOW) ||
++ ((thd->server_status &
++ (SERVER_QUERY_NO_INDEX_USED | SERVER_QUERY_NO_GOOD_INDEX_USED)) &&
++ opt_log_queries_not_using_indexes &&
++ !(sql_command_flags[thd->lex->sql_command] & CF_STATUS_COMMAND))) &&
++ thd->examined_row_count >= thd->variables.min_examined_row_limit)
++ {
++ thd_proc_info(thd, "logging slow query");
++ thd->status_var.long_query_count++;
++ slow_log_print(thd, thd->query(), thd->query_length(),
++ end_utime_of_query);
++ }
+ }
+ }
+ DBUG_VOID_RETURN;
+@@ -1610,6 +1625,7 @@
+ case SCH_CHARSETS:
+ case SCH_ENGINES:
+ case SCH_COLLATIONS:
++ case SCH_QUERY_RESPONSE_TIME:
+ case SCH_COLLATION_CHARACTER_SET_APPLICABILITY:
+ case SCH_USER_PRIVILEGES:
+ case SCH_SCHEMA_PRIVILEGES:
+diff -ruN a/sql/sql_reload.cc b/sql/sql_reload.cc
+--- a/sql/sql_reload.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_reload.cc 2011-01-17 02:16:19.000000000 +0300
+@@ -25,7 +25,7 @@
+ #include "hostname.h" // hostname_cache_refresh
+ #include "sql_repl.h" // reset_master, reset_slave
+ #include "debug_sync.h"
+-
++#include "query_response_time.h"
+
+ /**
+ Reload/resets privileges and the different caches.
+@@ -274,6 +274,12 @@
+ #endif
+ if (options & REFRESH_USER_RESOURCES)
+ reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ if (options & REFRESH_QUERY_RESPONSE_TIME)
++ {
++ query_response_time_flush();
++ }
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ *write_to_binlog= tmp_write_to_binlog;
+ /*
+ If the query was killed then this function must fail.
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc 2011-01-16 18:53:35.000000000 +0300
++++ b/sql/sql_show.cc 2011-01-17 02:17:44.000000000 +0300
+@@ -50,6 +50,7 @@
+ #include "event_data_objects.h"
+ #endif
+ #include <my_dir.h>
++#include "query_response_time.h"
+ #include "lock.h" // MYSQL_OPEN_IGNORE_FLUSH
+ #include "debug_sync.h"
+ #include "datadict.h" // dd_frm_type()
+@@ -7641,6 +7642,14 @@
+
+ */
+
++ST_FIELD_INFO query_response_time_fields_info[] =
++ {
++ {"time", QRT_TIME_STRING_LENGTH, MYSQL_TYPE_STRING, 0, 0, "", SKIP_OPEN_TABLE },
++ {"count", MY_INT32_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONG, 0, MY_I_S_UNSIGNED, "", SKIP_OPEN_TABLE },
++ {"total", QRT_TIME_STRING_LENGTH, MYSQL_TYPE_STRING, 0, 0, "", SKIP_OPEN_TABLE },
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE }
++ };
++
+ ST_SCHEMA_TABLE schema_tables[]=
+ {
+ {"CHARACTER_SETS", charsets_fields_info, create_schema_table,
+@@ -7694,6 +7703,13 @@
+ 1, 9, 0, OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY},
+ {"ROUTINES", proc_fields_info, create_schema_table,
+ fill_schema_proc, make_proc_old_format, 0, -1, -1, 0, 0},
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ {"QUERY_RESPONSE_TIME", query_response_time_fields_info, create_schema_table,
++ query_response_time_fill, make_old_format, 0, -1, -1, 0, 0},
++#else
++ {"QUERY_RESPONSE_TIME", query_response_time_fields_info, create_schema_table,
++ 0, make_old_format, 0, -1, -1, 0, 0},
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ {"SCHEMATA", schema_fields_info, create_schema_table,
+ fill_schema_schemata, make_schemata_old_format, 0, 1, -1, 0, 0},
+ {"SCHEMA_PRIVILEGES", schema_privileges_fields_info, create_schema_table,
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy 2011-01-16 18:53:34.000000000 +0300
++++ b/sql/sql_yacc.yy 2011-01-17 02:19:03.000000000 +0300
+@@ -1193,6 +1193,7 @@
+ %token PURGE
+ %token QUARTER_SYM
+ %token QUERY_SYM
++%token QUERY_RESPONSE_TIME_SYM
+ %token QUICK
+ %token RANGE_SYM /* SQL-2003-R */
+ %token READS_SYM /* SQL-2003-R */
+@@ -11089,6 +11090,15 @@
+ {
+ Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
+ }
++ | QUERY_RESPONSE_TIME_SYM wild_and_where
++ {
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ LEX *lex= Lex;
++ lex->sql_command= SQLCOM_SELECT;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_QUERY_RESPONSE_TIME))
++ MYSQL_YYABORT;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++ }
+ | CREATE PROCEDURE_SYM sp_name
+ {
+ LEX *lex= Lex;
+@@ -11325,6 +11335,12 @@
+ { Lex->type|= REFRESH_STATUS; }
+ | SLAVE
+ { Lex->type|= REFRESH_SLAVE; }
++ | QUERY_RESPONSE_TIME_SYM
++ {
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++ Lex->type|= REFRESH_QUERY_RESPONSE_TIME;
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++ }
+ | MASTER_SYM
+ { Lex->type|= REFRESH_MASTER; }
+ | DES_KEY_FILE
+@@ -12610,6 +12626,7 @@
+ | PROXY_SYM {}
+ | QUARTER_SYM {}
+ | QUERY_SYM {}
++ | QUERY_RESPONSE_TIME_SYM {}
+ | QUICK {}
+ | READ_ONLY_SYM {}
+ | REBUILD_SYM {}
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2011-01-16 18:53:35.000000000 +0300
++++ b/sql/sys_vars.cc 2011-01-17 02:19:53.000000000 +0300
+@@ -49,6 +49,7 @@
+ #include "../storage/perfschema/pfs_server.h"
+ #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
+
++#include "query_response_time.h"
+ /*
+ This forward declaration is needed because including sql_base.h
+ causes further includes. [TODO] Eliminate this forward declaration
+@@ -1775,6 +1776,26 @@
+ DEFAULT(FALSE));
+ #endif /* HAVE_QUERY_CACHE */
+
++
++static Sys_var_have Sys_have_response_time_distribution(
++ "have_response_time_distribution", "have_response_time_distribution",
++ READ_ONLY GLOBAL_VAR(have_response_time_distribution), NO_CMD_LINE);
++
++#ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
++static Sys_var_mybool Sys_enable_query_response_time_stats(
++ "enable_query_response_time_stats", "Enable or disable query response time statisics collecting",
++ GLOBAL_VAR(opt_enable_query_response_time_stats), CMD_LINE(OPT_ARG),
++ DEFAULT(FALSE));
++
++static Sys_var_ulong Sys_query_response_time_range_base(
++ "query_response_time_range_base",
++ "Select base of log for query_response_time ranges. WARNING: variable change affect only after flush",
++ GLOBAL_VAR(opt_query_response_time_range_base),
++ CMD_LINE(REQUIRED_ARG), VALID_RANGE(2, QRT_MAXIMUM_BASE),
++ DEFAULT(QRT_DEFAULT_BASE),
++ BLOCK_SIZE(1));
++#endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++
+ static Sys_var_mybool Sys_secure_auth(
+ "secure_auth",
+ "Disallow authentication for accounts that have old (pre-4.1) "
--- /dev/null
+# name : show_slave_status_nolock.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/patch_info/show_slave_status_nolock.patch b/patch_info/show_slave_status_nolock.patch
+--- a/patch_info/show_slave_status_nolock.patch 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/show_slave_status_nolock.patch 2010-12-29 20:38:13.000000000 +0300
+@@ -0,0 +1,6 @@
++File=show_slave_status_nolock.patch
++Name= SHOW SLAVE STATUS NOLOCK
++Version=1.0
++Author=Percona <info@percona.com>
++License=GPL
++Comment= Implement SHOW SLAVE STATUS without lock (STOP SLAVE lock the same mutex what lock SHOW SLAVE STATUS)
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h 2010-12-29 20:27:19.000000000 +0300
++++ b/sql/lex.h 2010-12-29 20:28:57.000000000 +0300
+@@ -378,6 +378,7 @@
+ { "NONE", SYM(NONE_SYM)},
+ { "NOT", SYM(NOT_SYM)},
+ { "NO_WRITE_TO_BINLOG", SYM(NO_WRITE_TO_BINLOG)},
++ { "NOLOCK", SYM(NOLOCK_SYM)},
+ { "NULL", SYM(NULL_SYM)},
+ { "NUMERIC", SYM(NUMERIC_SYM)},
+ { "NVARCHAR", SYM(NVARCHAR_SYM)},
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-12-29 20:27:19.000000000 +0300
++++ b/sql/mysqld.cc 2010-12-29 20:29:19.000000000 +0300
+@@ -3058,6 +3058,7 @@
+ {"show_relaylog_events", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_RELAYLOG_EVENTS]), SHOW_LONG_STATUS},
+ {"show_slave_hosts", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_HOSTS]), SHOW_LONG_STATUS},
+ {"show_slave_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
++ {"show_slave_status_nolock", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_NOLOCK_STAT]), SHOW_LONG_STATUS},
+ {"show_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
+ {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+ {"show_table_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h 2010-12-29 20:27:19.000000000 +0300
++++ b/sql/sql_lex.h 2010-12-29 20:32:26.000000000 +0300
+@@ -190,6 +190,8 @@
+ SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES,
+ SQLCOM_SIGNAL, SQLCOM_RESIGNAL,
+ SQLCOM_SHOW_RELAYLOG_EVENTS,
++ /* SHOW SLAVE STATUS NOLOCK */
++ SQLCOM_SHOW_SLAVE_NOLOCK_STAT,
+ /*
+ When a command is added here, be sure it's also added in mysqld.cc
+ in "struct show_var_st status_vars[]= {" ...
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-12-29 20:27:19.000000000 +0300
++++ b/sql/sql_parse.cc 2010-12-29 20:34:50.000000000 +0300
+@@ -336,6 +336,7 @@
+ sql_command_flags[SQLCOM_SHOW_CREATE]= CF_STATUS_COMMAND;
+ sql_command_flags[SQLCOM_SHOW_MASTER_STAT]= CF_STATUS_COMMAND;
+ sql_command_flags[SQLCOM_SHOW_SLAVE_STAT]= CF_STATUS_COMMAND;
++ sql_command_flags[SQLCOM_SHOW_SLAVE_NOLOCK_STAT]= CF_STATUS_COMMAND;
+ sql_command_flags[SQLCOM_SHOW_CREATE_PROC]= CF_STATUS_COMMAND;
+ sql_command_flags[SQLCOM_SHOW_CREATE_FUNC]= CF_STATUS_COMMAND;
+ sql_command_flags[SQLCOM_SHOW_CREATE_TRIGGER]= CF_STATUS_COMMAND;
+@@ -2262,12 +2263,16 @@
+ mysql_mutex_unlock(&LOCK_active_mi);
+ break;
+ }
++ case SQLCOM_SHOW_SLAVE_NOLOCK_STAT:
+ case SQLCOM_SHOW_SLAVE_STAT:
+ {
+ /* Accept one of two privileges */
+ if (check_global_access(thd, SUPER_ACL | REPL_CLIENT_ACL))
+ goto error;
+- mysql_mutex_lock(&LOCK_active_mi);
++ if(SQLCOM_SHOW_SLAVE_NOLOCK_STAT != lex->sql_command)
++ {
++ mysql_mutex_lock(&LOCK_active_mi);
++ }
+ if (active_mi != NULL)
+ {
+ res = show_master_info(thd, active_mi);
+@@ -2278,7 +2283,10 @@
+ WARN_NO_MASTER_INFO, ER(WARN_NO_MASTER_INFO));
+ my_ok(thd);
+ }
+- mysql_mutex_unlock(&LOCK_active_mi);
++ if(SQLCOM_SHOW_SLAVE_NOLOCK_STAT != lex->sql_command)
++ {
++ mysql_mutex_unlock(&LOCK_active_mi);
++ }
+ break;
+ }
+ case SQLCOM_SHOW_MASTER_STAT:
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy 2010-12-29 20:27:19.000000000 +0300
++++ b/sql/sql_yacc.yy 2010-12-29 20:36:40.000000000 +0300
+@@ -1292,6 +1292,7 @@
+ %token STARTS_SYM
+ %token START_SYM /* SQL-2003-R */
+ %token STATUS_SYM
++%token NOLOCK_SYM /* SHOW SLAVE STATUS NOLOCK */
+ %token STDDEV_SAMP_SYM /* SQL-2003-N */
+ %token STD_SYM
+ %token STOP_SYM
+@@ -11095,6 +11096,10 @@
+ {
+ Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
+ }
++ | SLAVE STATUS_SYM NOLOCK_SYM
++ {
++ Lex->sql_command = SQLCOM_SHOW_SLAVE_NOLOCK_STAT; //SQLCOM_SHOW_SLAVE_NOLOCK_STAT;
++ }
+ | QUERY_RESPONSE_TIME_SYM wild_and_where
+ {
+ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
--- /dev/null
+# name : show_temp.patch
+# introduced : 11 or before
+# maintainer : Yasufumi
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/sql/handler.h b/sql/handler.h
+--- a/sql/handler.h 2010-12-03 14:09:14.406955791 +0900
++++ b/sql/handler.h 2010-12-03 14:29:16.533356953 +0900
+@@ -569,6 +569,7 @@
+ SCH_EVENTS,
+ SCH_FILES,
+ SCH_GLOBAL_STATUS,
++ SCH_GLOBAL_TEMPORARY_TABLES,
+ SCH_GLOBAL_VARIABLES,
+ SCH_KEY_COLUMN_USAGE,
+ SCH_OPEN_TABLES,
+@@ -590,6 +591,7 @@
+ SCH_TABLE_CONSTRAINTS,
+ SCH_TABLE_NAMES,
+ SCH_TABLE_PRIVILEGES,
++ SCH_TEMPORARY_TABLES,
+ SCH_TRIGGERS,
+ SCH_USER_PRIVILEGES,
+ SCH_VARIABLES,
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-12-02 21:23:05.495293844 +0900
++++ b/sql/mysqld.cc 2010-12-03 14:25:40.317039327 +0900
+@@ -3038,6 +3038,7 @@
+ {"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+ {"show_table_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
+ {"show_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
++ {"show_temporary_tables",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TEMPORARY_TABLES]), SHOW_LONG_STATUS},
+ {"show_triggers", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
+ {"show_variables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
+ {"show_warnings", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h 2010-12-02 19:22:40.040023288 +0900
++++ b/sql/sql_lex.h 2010-12-03 14:09:53.465292483 +0900
+@@ -186,7 +186,7 @@
+ SQLCOM_CREATE_EVENT, SQLCOM_ALTER_EVENT, SQLCOM_DROP_EVENT,
+ SQLCOM_SHOW_CREATE_EVENT, SQLCOM_SHOW_EVENTS,
+ SQLCOM_SHOW_CREATE_TRIGGER,
+- SQLCOM_ALTER_DB_UPGRADE,
++ SQLCOM_ALTER_DB_UPGRADE, SQLCOM_SHOW_TEMPORARY_TABLES,
+ SQLCOM_SHOW_PROFILE, SQLCOM_SHOW_PROFILES,
+ SQLCOM_SIGNAL, SQLCOM_RESIGNAL,
+ SQLCOM_SHOW_RELAYLOG_EVENTS,
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-12-02 19:22:40.046023936 +0900
++++ b/sql/sql_parse.cc 2010-12-03 14:09:53.471950455 +0900
+@@ -349,6 +349,9 @@
+ sql_command_flags[SQLCOM_SHOW_TABLES]= (CF_STATUS_COMMAND |
+ CF_SHOW_TABLE_COMMAND |
+ CF_REEXECUTION_FRAGILE);
++ sql_command_flags[SQLCOM_SHOW_TEMPORARY_TABLES]= (CF_STATUS_COMMAND |
++ CF_SHOW_TABLE_COMMAND |
++ CF_REEXECUTION_FRAGILE);
+ sql_command_flags[SQLCOM_SHOW_TABLE_STATUS]= (CF_STATUS_COMMAND |
+ CF_SHOW_TABLE_COMMAND |
+ CF_REEXECUTION_FRAGILE);
+@@ -1504,6 +1507,8 @@
+
+ case SCH_TABLE_NAMES:
+ case SCH_TABLES:
++ case SCH_TEMPORARY_TABLES:
++ case SCH_GLOBAL_TEMPORARY_TABLES:
+ case SCH_VIEWS:
+ case SCH_TRIGGERS:
+ case SCH_EVENTS:
+@@ -2011,6 +2016,7 @@
+ }
+ case SQLCOM_SHOW_DATABASES:
+ case SQLCOM_SHOW_TABLES:
++ case SQLCOM_SHOW_TEMPORARY_TABLES:
+ case SQLCOM_SHOW_TRIGGERS:
+ case SQLCOM_SHOW_TABLE_STATUS:
+ case SQLCOM_SHOW_OPEN_TABLES:
+@@ -4787,6 +4793,8 @@
+
+ case SCH_TABLE_NAMES:
+ case SCH_TABLES:
++ case SCH_TEMPORARY_TABLES:
++ case SCH_GLOBAL_TEMPORARY_TABLES:
+ case SCH_VIEWS:
+ case SCH_TRIGGERS:
+ case SCH_EVENTS:
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc 2010-12-03 13:38:47.493070606 +0900
++++ b/sql/sql_show.cc 2010-12-03 14:27:04.590939717 +0900
+@@ -2685,6 +2685,7 @@
+ break;
+ case SQLCOM_SHOW_TABLES:
+ case SQLCOM_SHOW_TABLE_STATUS:
++ case SQLCOM_SHOW_TEMPORARY_TABLES:
+ case SQLCOM_SHOW_TRIGGERS:
+ case SQLCOM_SHOW_EVENTS:
+ thd->make_lex_string(&lookup_field_values->db_value,
+@@ -3173,6 +3174,228 @@
+ return (uint) OPEN_FULL_TABLE;
+ }
+
++/**
++ @brief Change I_S table item list for SHOW [GLOBAL] TEMPORARY TABLES [FROM/IN db]
++
++ @param[in] thd thread handler
++ @param[in] schema_table I_S table
++
++ @return Operation status
++ @retval 0 success
++ @retval 1 error
++*/
++int make_temporary_tables_old_format(THD *thd, ST_SCHEMA_TABLE *schema_table)
++{
++ char tmp[128];
++ String buffer(tmp,sizeof(tmp), thd->charset());
++ LEX *lex= thd->lex;
++ Name_resolution_context *context= &lex->select_lex.context;
++
++ if (thd->lex->option_type == OPT_GLOBAL) {
++ ST_FIELD_INFO *field_info= &schema_table->fields_info[0];
++ Item_field *field= new Item_field(context, NullS, NullS, field_info->field_name);
++ if (add_item_to_list(thd, field))
++ return 1;
++ field->set_name(field_info->old_name, strlen(field_info->old_name), system_charset_info);
++ }
++
++ ST_FIELD_INFO *field_info= &schema_table->fields_info[2];
++ buffer.length(0);
++ buffer.append(field_info->old_name);
++ buffer.append(lex->select_lex.db);
++
++ if (lex->wild && lex->wild->ptr())
++ {
++ buffer.append(STRING_WITH_LEN(" ("));
++ buffer.append(lex->wild->ptr());
++ buffer.append(')');
++ }
++
++ Item_field *field= new Item_field(context, NullS, NullS, field_info->field_name);
++ if (add_item_to_list(thd, field))
++ return 1;
++
++ field->set_name(buffer.ptr(), buffer.length(), system_charset_info);
++ return 0;
++}
++
++/**
++ @brief Fill records for temporary tables by reading info from table object
++
++ @param[in] thd thread handler
++ @param[in] table I_S table
++ @param[in] tmp_table temporary table
++ @param[in] db database name
++
++ @return Operation status
++ @retval 0 success
++ @retval 1 error
++*/
++
++static int store_temporary_table_record(THD *thd, TABLE *table, TABLE *tmp_table, const char *db, bool table_name_only)
++{
++ CHARSET_INFO *cs= system_charset_info;
++ DBUG_ENTER("store_temporary_table_record");
++
++ if (db && my_strcasecmp(cs, db, tmp_table->s->db.str))
++ DBUG_RETURN(0);
++
++ restore_record(table, s->default_values);
++
++ //session_id
++ table->field[0]->store((longlong) thd->thread_id, TRUE);
++
++ //database
++ table->field[1]->store(tmp_table->s->db.str, tmp_table->s->db.length, cs);
++
++ //table
++ table->field[2]->store(tmp_table->s->table_name.str, tmp_table->s->table_name.length, cs);
++
++ if (table_name_only)
++ DBUG_RETURN(schema_table_store_record(thd, table));
++
++ //engine
++ handler *handle= tmp_table->file;
++ char *engineType = (char *)(handle ? handle->table_type() : "UNKNOWN");
++ table->field[3]->store(engineType, strlen(engineType), cs);
++
++ //name
++ if (tmp_table->s->path.str) {
++ char *p=strstr(tmp_table->s->path.str, "#sql");
++ int len=tmp_table->s->path.length-(p-tmp_table->s->path.str);
++ table->field[4]->store(p, min(FN_REFLEN, len), cs);
++ }
++
++ // file stats
++ handler *file= tmp_table->file;
++
++ if (file) {
++
++ MYSQL_TIME time;
++
++ /**
++ TODO: InnoDB stat(file) checks file on short names within data dictionary
++ rather than using full path, because of that, temp files created in
++ TMPDIR will not have access/create time as it will not find the file
++
++ The fix is to patch InnoDB to use full path
++ */
++ file->info(HA_STATUS_VARIABLE | HA_STATUS_TIME | HA_STATUS_NO_LOCK);
++
++ table->field[5]->store((longlong) file->stats.records, TRUE);
++ table->field[5]->set_notnull();
++
++ table->field[6]->store((longlong) file->stats.mean_rec_length, TRUE);
++ table->field[7]->store((longlong) file->stats.data_file_length, TRUE);
++ table->field[8]->store((longlong) file->stats.index_file_length, TRUE);
++ if (file->stats.create_time)
++ {
++ thd->variables.time_zone->gmt_sec_to_TIME(&time,
++ (my_time_t) file->stats.create_time);
++ table->field[9]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
++ table->field[9]->set_notnull();
++ }
++ if (file->stats.update_time)
++ {
++ thd->variables.time_zone->gmt_sec_to_TIME(&time,
++ (my_time_t) file->stats.update_time);
++ table->field[10]->store_time(&time, MYSQL_TIMESTAMP_DATETIME);
++ table->field[10]->set_notnull();
++ }
++ }
++
++ DBUG_RETURN(schema_table_store_record(thd, table));
++}
++
++/**
++ @brief Fill I_S tables with global temporary tables
++
++ @param[in] thd thread handler
++ @param[in] tables I_S table
++ @param[in] cond 'WHERE' condition
++
++ @return Operation status
++ @retval 0 success
++ @retval 1 error
++*/
++
++static int fill_global_temporary_tables(THD *thd, TABLE_LIST *tables, COND *cond)
++{
++ DBUG_ENTER("fill_global_temporary_tables");
++
++ mysql_mutex_lock(&LOCK_thread_count);
++
++ bool table_names_only= (thd->lex->sql_command == SQLCOM_SHOW_TEMPORARY_TABLES) ? 1 : 0;
++ I_List_iterator<THD> it(threads);
++ THD *thd_item;
++ TABLE *tmp;
++
++#ifndef NO_EMBEDDED_ACCESS_CHECKS
++ Security_context *sctx= thd->security_ctx;
++ uint db_access;
++#endif
++
++ while ((thd_item=it++)) {
++ for (tmp=thd_item->temporary_tables; tmp; tmp=tmp->next) {
++
++#ifndef NO_EMBEDDED_ACCESS_CHECKS
++ if (test_all_bits(sctx->master_access, DB_ACLS))
++ db_access=DB_ACLS;
++ else
++ db_access= (acl_get(sctx->host, sctx->ip, sctx->priv_user, tmp->s->db.str, 0) | sctx->master_access);
++
++ if (!(db_access & DB_ACLS) && check_grant_db(thd,tmp->s->db.str)) {
++ //no access for temp tables within this db for user
++ continue;
++ }
++#endif
++
++ THD *t= tmp->in_use;
++ tmp->in_use= thd;
++
++ if (store_temporary_table_record(thd_item, tables->table, tmp, thd->lex->select_lex.db, table_names_only)) {
++ tmp->in_use= t;
++ mysql_mutex_unlock(&LOCK_thread_count);
++ DBUG_RETURN(1);
++ }
++
++ tmp->in_use= t;
++ }
++ }
++
++ mysql_mutex_unlock(&LOCK_thread_count);
++ DBUG_RETURN(0);
++}
++
++/**
++ @brief Fill I_S tables with session temporary tables
++
++ @param[in] thd thread handler
++ @param[in] tables I_S table
++ @param[in] cond 'WHERE' condition
++
++ @return Operation status
++ @retval 0 success
++ @retval 1 error
++*/
++
++int fill_temporary_tables(THD *thd, TABLE_LIST *tables, COND *cond)
++{
++ DBUG_ENTER("fill_temporary_tables");
++
++ if (thd->lex->option_type == OPT_GLOBAL)
++ DBUG_RETURN(fill_global_temporary_tables(thd, tables, cond));
++
++ bool table_names_only= (thd->lex->sql_command == SQLCOM_SHOW_TEMPORARY_TABLES) ? 1 : 0;
++ TABLE *tmp;
++
++ for (tmp=thd->temporary_tables; tmp; tmp=tmp->next) {
++ if (store_temporary_table_record(thd, tables->table, tmp, thd->lex->select_lex.db, table_names_only)) {
++ DBUG_RETURN(1);
++ }
++ }
++ DBUG_RETURN(0);
++}
+
+ /**
+ Try acquire high priority share metadata lock on a table (with
+@@ -6802,6 +7025,25 @@
+ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
+ };
+
++ST_FIELD_INFO temporary_table_fields_info[]=
++{
++ {"SESSION_ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Session", SKIP_OPEN_TABLE},
++ {"TABLE_SCHEMA", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Db", SKIP_OPEN_TABLE},
++ {"TABLE_NAME", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Temp_tables_in_", SKIP_OPEN_TABLE},
++ {"ENGINE", NAME_CHAR_LEN, MYSQL_TYPE_STRING, 0, 0, "Engine", OPEN_FRM_ONLY},
++ {"NAME", FN_REFLEN, MYSQL_TYPE_STRING, 0, 0, "Name", SKIP_OPEN_TABLE},
++ {"TABLE_ROWS", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
++ MY_I_S_UNSIGNED, "Rows", OPEN_FULL_TABLE},
++ {"AVG_ROW_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
++ MY_I_S_UNSIGNED, "Avg Row", OPEN_FULL_TABLE},
++ {"DATA_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
++ MY_I_S_UNSIGNED, "Data Length", OPEN_FULL_TABLE},
++ {"INDEX_LENGTH", MY_INT64_NUM_DECIMAL_DIGITS, MYSQL_TYPE_LONGLONG, 0,
++ MY_I_S_UNSIGNED, "Index Size", OPEN_FULL_TABLE},
++ {"CREATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Create Time", OPEN_FULL_TABLE},
++ {"UPDATE_TIME", 0, MYSQL_TYPE_DATETIME, 0, 1, "Update Time", OPEN_FULL_TABLE},
++ {0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
++};
+
+ ST_FIELD_INFO columns_fields_info[]=
+ {
+@@ -7416,6 +7658,9 @@
+ hton_fill_schema_table, 0, 0, -1, -1, 0, 0},
+ {"GLOBAL_STATUS", variables_fields_info, create_schema_table,
+ fill_status, make_old_format, 0, 0, -1, 0, 0},
++ {"GLOBAL_TEMPORARY_TABLES", temporary_table_fields_info, create_schema_table,
++ fill_global_temporary_tables, make_temporary_tables_old_format, 0, 2, 3, 0,
++ OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
+ {"GLOBAL_VARIABLES", variables_fields_info, create_schema_table,
+ fill_variables, make_old_format, 0, 0, -1, 0, 0},
+ {"KEY_COLUMN_USAGE", key_column_usage_fields_info, create_schema_table,
+@@ -7465,6 +7710,9 @@
+ get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
+ {"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
+ fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
++ {"TEMPORARY_TABLES", temporary_table_fields_info, create_schema_table,
++ fill_temporary_tables, make_temporary_tables_old_format, 0, 2, 3, 0,
++ OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
+ {"TRIGGERS", triggers_fields_info, create_schema_table,
+ get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
+ OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy 2010-12-02 19:22:40.077024170 +0900
++++ b/sql/sql_yacc.yy 2010-12-03 14:09:53.496023791 +0900
+@@ -10869,6 +10869,15 @@
+ if (prepare_schema_table(YYTHD, lex, 0, SCH_TABLE_NAMES))
+ MYSQL_YYABORT;
+ }
++ | opt_var_type TEMPORARY TABLES opt_db
++ {
++ LEX *lex= Lex;
++ lex->sql_command= SQLCOM_SHOW_TEMPORARY_TABLES;
++ lex->option_type= $1;
++ lex->select_lex.db= $4;
++ if (prepare_schema_table(YYTHD, lex, 0, SCH_TEMPORARY_TABLES))
++ MYSQL_YYABORT;
++ }
+ | opt_full TRIGGERS_SYM opt_db wild_and_where
+ {
+ LEX *lex= Lex;
--- /dev/null
+# name : slow_extended.patch
+# introduced : 11 or before
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp
+--- a/include/mysql/plugin_audit.h.pp 2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin_audit.h.pp 2010-12-16 04:27:46.000000000 +0300
+@@ -178,6 +178,16 @@
+ char *thd_security_context(void* thd, char *buffer, unsigned int length,
+ unsigned int max_query_len);
+ void thd_inc_row_count(void* thd);
++void increment_thd_innodb_stats(void* thd,
++ unsigned long long trx_id,
++ long io_reads,
++ long long io_read,
++ long io_reads_wait_timer,
++ long lock_que_wait_timer,
++ long que_wait_timer,
++ long page_access);
++unsigned long thd_log_slow_verbosity(const void* thd);
++int thd_opt_slow_log();
+ int mysql_tmpfile(const char *prefix);
+ int thd_killed(const void* thd);
+ unsigned long thd_get_thread_id(const void* thd);
+diff -ruN a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp
+--- a/include/mysql/plugin_auth.h.pp 2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin_auth.h.pp 2010-12-16 04:27:46.000000000 +0300
+@@ -178,6 +178,16 @@
+ char *thd_security_context(void* thd, char *buffer, unsigned int length,
+ unsigned int max_query_len);
+ void thd_inc_row_count(void* thd);
++void increment_thd_innodb_stats(void* thd,
++ unsigned long long trx_id,
++ long io_reads,
++ long long io_read,
++ long io_reads_wait_timer,
++ long lock_que_wait_timer,
++ long que_wait_timer,
++ long page_access);
++unsigned long thd_log_slow_verbosity(const void* thd);
++int thd_opt_slow_log();
+ int mysql_tmpfile(const char *prefix);
+ int thd_killed(const void* thd);
+ unsigned long thd_get_thread_id(const void* thd);
+diff -ruN a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp
+--- a/include/mysql/plugin_ftparser.h.pp 2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin_ftparser.h.pp 2010-12-16 04:27:46.000000000 +0300
+@@ -131,6 +131,16 @@
+ char *thd_security_context(void* thd, char *buffer, unsigned int length,
+ unsigned int max_query_len);
+ void thd_inc_row_count(void* thd);
++void increment_thd_innodb_stats(void* thd,
++ unsigned long long trx_id,
++ long io_reads,
++ long long io_read,
++ long io_reads_wait_timer,
++ long lock_que_wait_timer,
++ long que_wait_timer,
++ long page_access);
++unsigned long thd_log_slow_verbosity(const void* thd);
++int thd_opt_slow_log();
+ int mysql_tmpfile(const char *prefix);
+ int thd_killed(const void* thd);
+ unsigned long thd_get_thread_id(const void* thd);
+diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
+--- a/include/mysql/plugin.h 2010-11-03 01:01:11.000000000 +0300
++++ b/include/mysql/plugin.h 2010-12-16 04:27:46.000000000 +0300
+@@ -536,6 +536,17 @@
+ /* Increments the row counter, see THD::row_count */
+ void thd_inc_row_count(MYSQL_THD thd);
+
++void increment_thd_innodb_stats(MYSQL_THD thd,
++ unsigned long long trx_id,
++ long io_reads,
++ long long io_read,
++ long io_reads_wait_timer,
++ long lock_que_wait_timer,
++ long que_wait_timer,
++ long page_access);
++unsigned long thd_log_slow_verbosity(const MYSQL_THD thd);
++int thd_opt_slow_log();
++#define EXTENDED_SLOWLOG
+ /**
+ Create a temporary file.
+
+diff -ruN a/patch_info/slow_extended.info b/patch_info/slow_extended.info
+--- a/patch_info/slow_extended.info 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/slow_extended.info 2010-12-16 04:27:46.000000000 +0300
+@@ -0,0 +1,25 @@
++File=slow_extended.patch
++Name=Extended statistics in slow.log (not InnoDB part)
++Version=1.3
++Author=Percona <info@percona.com>
++License=GPL
++Comment=
++Changelog
++2008-11-26
++YK: Fix inefficient determination of trx, Make not to call useless gettimeofday when don't use slow log. Make log_slow_queries dynamic (bool).
++
++2008-11-07
++VT: Moved log_slow_rate_limit in SHOW VARIABLE into right place
++
++2008-11
++Arjen Lentz: Fixups (backward compatibility) by Arjen Lentz <arjen@openquery.com.au>
++
++2010-07
++1) Fix overflow of query time and lock time (Bug 600360) (slow_extended_fix_overflow.patch merged)
++2) Control global slow feature merged (control_global_slow.patch merged)
++3) Microseconds in slow query log merged (microseconds_in_slow_query_log.patch merged)
++4) Now use_global_long_query_time and use_global_log_slow_control are synonims. Add value "all" for use_global_log_slow_control (contol-global_slow-2.patch merged)
++5) Fix innodb_stats on replication (Bug 600684)
++6) Change variable types (system/command-line)
++2010-01
++Patch profiling_slow.patch was merged
+diff -ruN a/scripts/mysqldumpslow.sh b/scripts/mysqldumpslow.sh
+--- a/scripts/mysqldumpslow.sh 2010-11-03 01:01:13.000000000 +0300
++++ b/scripts/mysqldumpslow.sh 2010-12-16 04:27:46.000000000 +0300
+@@ -83,8 +83,8 @@
+ s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//;
+ my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('','');
+
+- s/^# Query_time: ([0-9.]+)\s+Lock_time: ([0-9.]+)\s+Rows_sent: ([0-9.]+).*\n//;
+- my ($t, $l, $r) = ($1, $2, $3);
++ s/^# Query_time: (\d+(\.\d+)?) Lock_time: (\d+(\.\d+)?) Rows_sent: (\d+(\.\d+)?).*\n//;
++ my ($t, $l, $r) = ($1, $3, $5);
+ $t -= $l unless $opt{l};
+
+ # remove fluff that mysqld writes to log when it (re)starts:
+diff -ruN a/sql/event_scheduler.cc b/sql/event_scheduler.cc
+--- a/sql/event_scheduler.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/event_scheduler.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -195,6 +195,7 @@
+ thd->client_capabilities|= CLIENT_MULTI_RESULTS;
+ mysql_mutex_lock(&LOCK_thread_count);
+ thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
++ thd->write_to_slow_log = TRUE;
+ mysql_mutex_unlock(&LOCK_thread_count);
+
+ /*
+diff -ruN a/sql/filesort.cc b/sql/filesort.cc
+--- a/sql/filesort.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/filesort.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -197,6 +197,7 @@
+ {
+ status_var_increment(thd->status_var.filesort_scan_count);
+ }
++ thd->query_plan_flags|= QPLAN_FILESORT;
+ #ifdef CAN_TRUST_RANGE
+ if (select && select->quick && select->quick->records > 0L)
+ {
+@@ -262,6 +263,7 @@
+ }
+ else
+ {
++ thd->query_plan_flags|= QPLAN_FILESORT_DISK;
+ if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
+ {
+ my_free(table_sort.buffpek);
+@@ -1201,6 +1203,7 @@
+ DBUG_ENTER("merge_buffers");
+
+ status_var_increment(current_thd->status_var.filesort_merge_passes);
++ current_thd->query_plan_fsort_passes++;
+ if (param->not_killable)
+ {
+ killed= ¬_killable;
+diff -ruN a/sql/log.cc b/sql/log.cc
+--- a/sql/log.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/log.cc 2010-12-02 19:28:31.337989417 +0900
+@@ -630,11 +630,13 @@
+ */
+
+ bool Log_to_csv_event_handler::
+- log_slow(THD *thd, time_t current_time, time_t query_start_arg,
++ log_slow(THD *thd, ulonglong current_utime, time_t query_start_arg,
+ const char *user_host, uint user_host_len,
+ ulonglong query_utime, ulonglong lock_utime, bool is_command,
+ const char *sql_text, uint sql_text_len)
+ {
++ time_t current_time= my_time_possible_from_micro(current_utime);
++
+ TABLE_LIST table_list;
+ TABLE *table;
+ bool result= TRUE;
+@@ -850,14 +852,14 @@
+ /** Wrapper around MYSQL_LOG::write() for slow log. */
+
+ bool Log_to_file_event_handler::
+- log_slow(THD *thd, time_t current_time, time_t query_start_arg,
++ log_slow(THD *thd, ulonglong current_utime, time_t query_start_arg,
+ const char *user_host, uint user_host_len,
+ ulonglong query_utime, ulonglong lock_utime, bool is_command,
+ const char *sql_text, uint sql_text_len)
+ {
+ Silence_log_table_errors error_handler;
+ thd->push_internal_handler(&error_handler);
+- bool retval= mysql_slow_log.write(thd, current_time, query_start_arg,
++ bool retval= mysql_slow_log.write(thd, current_utime, query_start_arg,
+ user_host, user_host_len,
+ query_utime, lock_utime, is_command,
+ sql_text, sql_text_len);
+@@ -1131,7 +1133,7 @@
+ /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
+ user_host_len= (strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
+ sctx->priv_user ? sctx->priv_user : "", "[",
+- sctx->user ? sctx->user : "", "] @ ",
++ sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
+ sctx->host ? sctx->host : "", " [",
+ sctx->ip ? sctx->ip : "", "]", NullS) -
+ user_host_buff);
+@@ -1139,8 +1141,22 @@
+ current_time= my_time_possible_from_micro(current_utime);
+ if (thd->start_utime)
+ {
+- query_utime= (current_utime - thd->start_utime);
+- lock_utime= (thd->utime_after_lock - thd->start_utime);
++ if(current_utime < thd->start_utime)
++ {
++ query_utime= 0;
++ }
++ else
++ {
++ query_utime= (current_utime - thd->start_utime);
++ }
++ if(thd->utime_after_lock < thd->start_utime)
++ {
++ lock_utime= 0;
++ }
++ else
++ {
++ lock_utime= (thd->utime_after_lock - thd->start_utime);
++ }
+ }
+ else
+ {
+@@ -1154,8 +1170,20 @@
+ query_length= command_name[thd->command].length;
+ }
+
++ if (!query_length)
++ {
++ thd->sent_row_count= thd->examined_row_count= 0;
++ thd->sent_row_count= 0;
++ thd->bytes_sent_old= thd->status_var.bytes_sent;
++ thd->tmp_tables_used= thd->tmp_tables_disk_used= 0;
++ thd->tmp_tables_size= 0;
++ thd->innodb_was_used= FALSE;
++ thd->query_plan_flags= QPLAN_NONE;
++ thd->query_plan_fsort_passes= 0;
++ }
++
+ for (current_handler= slow_log_handler_list; *current_handler ;)
+- error= (*current_handler++)->log_slow(thd, current_time, thd->start_time,
++ error= (*current_handler++)->log_slow(thd, current_utime, thd->start_time,
+ user_host_buff, user_host_len,
+ query_utime, lock_utime, is_command,
+ query, query_length) || error;
+@@ -2539,12 +2567,13 @@
+ TRUE - error occured
+ */
+
+-bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
++bool MYSQL_QUERY_LOG::write(THD *thd, ulonglong current_utime,
+ time_t query_start_arg, const char *user_host,
+ uint user_host_len, ulonglong query_utime,
+ ulonglong lock_utime, bool is_command,
+ const char *sql_text, uint sql_text_len)
+ {
++ time_t current_time= my_time_possible_from_micro(current_utime);
+ bool error= 0;
+ DBUG_ENTER("MYSQL_QUERY_LOG::write");
+
+@@ -2566,17 +2595,28 @@
+
+ if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
+ {
+- if (current_time != last_time)
++ if (opt_log_slow_timestamp_every || current_time != last_time)
+ {
+ last_time= current_time;
+ struct tm start;
+ localtime_r(¤t_time, &start);
+-
+- buff_len= my_snprintf(buff, sizeof buff,
+- "# Time: %02d%02d%02d %2d:%02d:%02d\n",
+- start.tm_year % 100, start.tm_mon + 1,
+- start.tm_mday, start.tm_hour,
+- start.tm_min, start.tm_sec);
++ if(opt_slow_query_log_microseconds_timestamp)
++ {
++ ulonglong microsecond = current_utime % (1000 * 1000);
++ buff_len= snprintf(buff, sizeof buff,
++ "# Time: %02d%02d%02d %2d:%02d:%02d.%010lld\n",
++ start.tm_year % 100, start.tm_mon + 1,
++ start.tm_mday, start.tm_hour,
++ start.tm_min, start.tm_sec,microsecond);
++ }
++ else
++ {
++ buff_len= my_snprintf(buff, sizeof buff,
++ "# Time: %02d%02d%02d %2d:%02d:%02d\n",
++ start.tm_year % 100, start.tm_mon + 1,
++ start.tm_mday, start.tm_hour,
++ start.tm_min, start.tm_sec);
++ }
+
+ /* Note that my_b_write() assumes it knows the length for this */
+ if (my_b_write(&log_file, (uchar*) buff, buff_len))
+@@ -2594,12 +2634,69 @@
+ sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
+ sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
+ if (my_b_printf(&log_file,
+- "# Query_time: %s Lock_time: %s"
+- " Rows_sent: %lu Rows_examined: %lu\n",
++ "# Thread_id: %lu Schema: %s Last_errno: %u Killed: %u\n" \
++ "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu Rows_affected: %lu Rows_read: %lu\n"
++ "# Bytes_sent: %lu Tmp_tables: %lu Tmp_disk_tables: %lu Tmp_table_sizes: %lu\n",
++ (ulong) thd->thread_id, (thd->db ? thd->db : ""),
++ thd->last_errno, (uint) thd->killed,
+ query_time_buff, lock_time_buff,
+ (ulong) thd->sent_row_count,
+- (ulong) thd->examined_row_count) == (uint) -1)
++ (ulong) thd->examined_row_count,
++ ((long) thd->get_row_count_func() > 0 ) ? (ulong) thd->get_row_count_func() : 0,
++ (ulong) thd->sent_row_count,
++ (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old),
++ (ulong) thd->tmp_tables_used,
++ (ulong) thd->tmp_tables_disk_used,
++ (ulong) thd->tmp_tables_size) == (uint) -1)
+ tmp_errno= errno;
++
++#if defined(ENABLED_PROFILING)
++ thd->profiling.print_current(&log_file);
++#endif
++ if (thd->innodb_was_used)
++ {
++ char buf[20];
++ snprintf(buf, 20, "%llX", thd->innodb_trx_id);
++ if (my_b_printf(&log_file,
++ "# InnoDB_trx_id: %s\n", buf) == (uint) -1)
++ tmp_errno=errno;
++ }
++ if ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_QUERY_PLAN)) &&
++ my_b_printf(&log_file,
++ "# QC_Hit: %s Full_scan: %s Full_join: %s Tmp_table: %s Tmp_table_on_disk: %s\n" \
++ "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu\n",
++ ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_TMP_TABLE) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_TMP_DISK) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
++ ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"),
++ thd->query_plan_fsort_passes) == (uint) -1)
++ tmp_errno=errno;
++ if ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_INNODB)) && thd->innodb_was_used)
++ {
++ char buf[3][20];
++ snprintf(buf[0], 20, "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0);
++ snprintf(buf[1], 20, "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0);
++ snprintf(buf[2], 20, "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0);
++ if (my_b_printf(&log_file,
++ "# InnoDB_IO_r_ops: %lu InnoDB_IO_r_bytes: %lu InnoDB_IO_r_wait: %s\n" \
++ "# InnoDB_rec_lock_wait: %s InnoDB_queue_wait: %s\n" \
++ "# InnoDB_pages_distinct: %lu\n",
++ (ulong) thd->innodb_io_reads,
++ (ulong) thd->innodb_io_read,
++ buf[0], buf[1], buf[2],
++ (ulong) thd->innodb_page_access) == (uint) -1)
++ tmp_errno=errno;
++ }
++ else
++ {
++ if ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_INNODB)) &&
++ my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1)
++ tmp_errno=errno;
++ }
++
+ if (thd->db && strcmp(thd->db, db))
+ { // Database changed
+ if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
+diff -ruN a/sql/log.h b/sql/log.h
+--- a/sql/log.h 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/log.h 2010-12-16 04:27:46.000000000 +0300
+@@ -242,7 +242,7 @@
+ uint user_host_len, int thread_id,
+ const char *command_type, uint command_type_len,
+ const char *sql_text, uint sql_text_len);
+- bool write(THD *thd, time_t current_time, time_t query_start_arg,
++ bool write(THD *thd, ulonglong current_time, time_t query_start_arg,
+ const char *user_host, uint user_host_len,
+ ulonglong query_utime, ulonglong lock_utime, bool is_command,
+ const char *sql_text, uint sql_text_len);
+@@ -492,7 +492,7 @@
+ virtual bool init()= 0;
+ virtual void cleanup()= 0;
+
+- virtual bool log_slow(THD *thd, time_t current_time,
++ virtual bool log_slow(THD *thd, ulonglong current_time,
+ time_t query_start_arg, const char *user_host,
+ uint user_host_len, ulonglong query_utime,
+ ulonglong lock_utime, bool is_command,
+@@ -521,7 +521,7 @@
+ virtual bool init();
+ virtual void cleanup();
+
+- virtual bool log_slow(THD *thd, time_t current_time,
++ virtual bool log_slow(THD *thd, ulonglong current_utime,
+ time_t query_start_arg, const char *user_host,
+ uint user_host_len, ulonglong query_utime,
+ ulonglong lock_utime, bool is_command,
+@@ -553,7 +553,7 @@
+ virtual bool init();
+ virtual void cleanup();
+
+- virtual bool log_slow(THD *thd, time_t current_time,
++ virtual bool log_slow(THD *thd, ulonglong current_utime,
+ time_t query_start_arg, const char *user_host,
+ uint user_host_len, ulonglong query_utime,
+ ulonglong lock_utime, bool is_command,
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/mysqld.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -418,6 +418,10 @@
+ char* opt_secure_file_priv;
+ my_bool opt_log_slow_admin_statements= 0;
+ my_bool opt_log_slow_slave_statements= 0;
++my_bool opt_log_slow_sp_statements= 0;
++my_bool opt_log_slow_timestamp_every= 0;
++ulonglong opt_use_global_log_slow_control= 0;
++my_bool opt_slow_query_log_microseconds_timestamp= 0;
+ my_bool lower_case_file_system= 0;
+ my_bool opt_large_pages= 0;
+ my_bool opt_super_large_pages= 0;
+@@ -5789,10 +5793,10 @@
+ "Log slow OPTIMIZE, ANALYZE, ALTER and other administrative statements to "
+ "the slow log if it is open.", &opt_log_slow_admin_statements,
+ &opt_log_slow_admin_statements, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+- {"log-slow-slave-statements", 0,
++ /*{"log-slow-slave-statements", 0,
+ "Log slow statements executed by slave thread to the slow log if it is open.",
+ &opt_log_slow_slave_statements, &opt_log_slow_slave_statements,
+- 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
++ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},*/
+ {"log-slow-queries", OPT_SLOW_QUERY_LOG,
+ "Log slow queries to a table or log file. Defaults logging to table "
+ "mysql.slow_log or hostname-slow.log if --log-output=file is used. "
+@@ -7182,6 +7186,10 @@
+
+ C_MODE_END
+
++/* defined in sys_vars.cc */
++extern void init_log_slow_verbosity();
++extern void init_use_global_log_slow_control();
++
+ /**
+ Get server options from the command line,
+ and perform related server initializations.
+@@ -7321,6 +7329,8 @@
+ global_system_variables.long_query_time= (ulonglong)
+ (global_system_variables.long_query_time_double * 1e6);
+
++ init_log_slow_verbosity();
++ init_use_global_log_slow_control();
+ if (opt_short_log_format)
+ opt_specialflag|= SPECIAL_SHORT_LOG_FORMAT;
+
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/mysqld.h 2010-12-16 04:27:46.000000000 +0300
+@@ -116,6 +116,10 @@
+ extern char* opt_secure_backup_file_priv;
+ extern size_t opt_secure_backup_file_priv_len;
+ extern my_bool opt_log_slow_admin_statements, opt_log_slow_slave_statements;
++extern my_bool opt_log_slow_sp_statements;
++extern my_bool opt_log_slow_timestamp_every;
++extern ulonglong opt_use_global_log_slow_control;
++extern my_bool opt_slow_query_log_microseconds_timestamp;
+ extern my_bool sp_automatic_privileges, opt_noacl;
+ extern my_bool opt_old_style_user_limits, trust_function_creators;
+ extern uint opt_crash_binlog_innodb;
+diff -ruN a/sql/slave.cc b/sql/slave.cc
+--- a/sql/slave.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/slave.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -2037,6 +2037,7 @@
+ + MAX_LOG_EVENT_HEADER; /* note, incr over the global not session var */
+ thd->slave_thread = 1;
+ thd->enable_slow_log= opt_log_slow_slave_statements;
++ thd->write_to_slow_log= opt_log_slow_slave_statements;
+ set_slave_thread_options(thd);
+ thd->client_capabilities = CLIENT_LOCAL_FILES;
+ mysql_mutex_lock(&LOCK_thread_count);
+diff -ruN a/sql/sp_head.cc b/sql/sp_head.cc
+--- a/sql/sp_head.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sp_head.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -2151,7 +2151,7 @@
+ DBUG_PRINT("info",(" %.*s: eval args done", (int) m_name.length,
+ m_name.str));
+ }
+- if (!(m_flags & LOG_SLOW_STATEMENTS) && thd->enable_slow_log)
++ if (!(m_flags & LOG_SLOW_STATEMENTS || opt_log_slow_sp_statements) && thd->enable_slow_log)
+ {
+ DBUG_PRINT("info", ("Disabling slow log for the execution"));
+ save_enable_slow_log= true;
+diff -ruN a/sql/sql_cache.cc b/sql/sql_cache.cc
+--- a/sql/sql_cache.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/sql_cache.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -1756,6 +1756,7 @@
+ response, we can't handle it anyway.
+ */
+ (void) trans_commit_stmt(thd);
++ thd->query_plan_flags|= QPLAN_QC;
+ if (!thd->stmt_da->is_set())
+ thd->stmt_da->disable_status();
+
+@@ -1766,6 +1767,7 @@
+ err_unlock:
+ unlock();
+ err:
++ thd->query_plan_flags|= QPLAN_QC_NO;
+ MYSQL_QUERY_CACHE_MISS(thd->query());
+ DBUG_RETURN(0); // Query was not cached
+ }
+diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
+--- a/sql/sql_class.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/sql_class.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -367,6 +367,37 @@
+ thd->warning_info->inc_current_row_for_warning();
+ }
+
++extern "C"
++void increment_thd_innodb_stats(THD* thd,
++ unsigned long long trx_id,
++ long io_reads,
++ long long io_read,
++ long io_reads_wait_timer,
++ long lock_que_wait_timer,
++ long que_wait_timer,
++ long page_access)
++{
++ thd->innodb_was_used = TRUE;
++ thd->innodb_trx_id = trx_id;
++ thd->innodb_io_reads += io_reads;
++ thd->innodb_io_read += io_read;
++ thd->innodb_io_reads_wait_timer += io_reads_wait_timer;
++ thd->innodb_lock_que_wait_timer += lock_que_wait_timer;
++ thd->innodb_innodb_que_wait_timer += que_wait_timer;
++ thd->innodb_page_access += page_access;
++}
++
++extern "C"
++unsigned long thd_log_slow_verbosity(const THD *thd)
++{
++ return (unsigned long) thd->variables.log_slow_verbosity;
++}
++
++extern "C"
++int thd_opt_slow_log()
++{
++ return (int) opt_slow_log;
++}
+
+ /**
+ Dumps a text description of a thread, its security context
+@@ -661,6 +692,7 @@
+ *cond_hdl= NULL;
+ return FALSE;
+ }
++ last_errno= sql_errno;
+
+ for (Internal_error_handler *error_handler= m_internal_handler;
+ error_handler;
+@@ -3355,6 +3387,12 @@
+ first_successful_insert_id_in_prev_stmt;
+ backup->first_successful_insert_id_in_cur_stmt=
+ first_successful_insert_id_in_cur_stmt;
++ backup->innodb_io_reads= innodb_io_reads;
++ backup->innodb_io_read= innodb_io_read;
++ backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer;
++ backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer;
++ backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer;
++ backup->innodb_page_access= innodb_page_access;
+
+ if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) &&
+ !is_current_stmt_binlog_format_row())
+@@ -3375,6 +3413,14 @@
+ cuted_fields= 0;
+ transaction.savepoints= 0;
+ first_successful_insert_id_in_cur_stmt= 0;
++ last_errno= 0;
++ innodb_trx_id= 0;
++ innodb_io_reads= 0;
++ innodb_io_read= 0;
++ innodb_io_reads_wait_timer= 0;
++ innodb_lock_que_wait_timer= 0;
++ innodb_innodb_que_wait_timer= 0;
++ innodb_page_access= 0;
+ }
+
+
+@@ -3437,6 +3483,12 @@
+ */
+ examined_row_count+= backup->examined_row_count;
+ cuted_fields+= backup->cuted_fields;
++ innodb_io_reads+= backup->innodb_io_reads;
++ innodb_io_read+= backup->innodb_io_read;
++ innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer;
++ innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer;
++ innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer;
++ innodb_page_access+= backup->innodb_page_access;
+ DBUG_VOID_RETURN;
+ }
+
+diff -ruN a/sql/sql_class.h b/sql/sql_class.h
+--- a/sql/sql_class.h 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_class.h 2010-12-16 04:27:46.000000000 +0300
+@@ -60,6 +60,26 @@
+ enum enum_duplicates { DUP_ERROR, DUP_REPLACE, DUP_UPDATE };
+ enum enum_delay_key_write { DELAY_KEY_WRITE_NONE, DELAY_KEY_WRITE_ON,
+ DELAY_KEY_WRITE_ALL };
++enum enum_use_global_log_slow_control { SLOG_UG_LOG_SLOW_FILTER, SLOG_UG_LOG_SLOW_RATE_LIMIT, SLOG_UG_LOG_SLOW_VERBOSITY, SLOG_UG_LONG_QUERY_TIME, SLOG_UG_MIN_EXAMINED_ROW_LIMIT, SLOG_UG_ALL };
++enum enum_log_slow_verbosity {
++ SLOG_V_MICROTIME, SLOG_V_QUERY_PLAN, SLOG_V_INNODB,
++ SLOG_V_PROFILING, SLOG_V_PROFILING_USE_GETRUSAGE,
++ SLOG_V_MINIMAL, SLOG_V_STANDARD, SLOG_V_FULL
++};
++#define QPLAN_NONE 0
++#define QPLAN_QC 1 << 0
++#define QPLAN_QC_NO 1 << 1
++#define QPLAN_FULL_SCAN 1 << 2
++#define QPLAN_FULL_JOIN 1 << 3
++#define QPLAN_TMP_TABLE 1 << 4
++#define QPLAN_TMP_DISK 1 << 5
++#define QPLAN_FILESORT 1 << 6
++#define QPLAN_FILESORT_DISK 1 << 7
++enum enum_log_slow_filter {
++ SLOG_F_QC_NO, SLOG_F_FULL_SCAN, SLOG_F_FULL_JOIN,
++ SLOG_F_TMP_TABLE, SLOG_F_TMP_DISK, SLOG_F_FILESORT,
++ SLOG_F_FILESORT_DISK
++};
+ enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT,
+ SLAVE_EXEC_MODE_IDEMPOTENT,
+ SLAVE_EXEC_MODE_LAST_BIT};
+@@ -508,6 +528,17 @@
+
+ my_bool sysdate_is_now;
+
++ ulong log_slow_rate_limit;
++ ulonglong log_slow_filter;
++ ulonglong log_slow_verbosity;
++
++ ulong innodb_io_reads;
++ ulonglong innodb_io_read;
++ ulong innodb_io_reads_wait_timer;
++ ulong innodb_lock_que_wait_timer;
++ ulong innodb_innodb_que_wait_timer;
++ ulong innodb_page_access;
++
+ double long_query_time_double;
+
+ } SV;
+@@ -1140,6 +1171,14 @@
+ uint in_sub_stmt;
+ bool enable_slow_log;
+ bool last_insert_id_used;
++
++ ulong innodb_io_reads;
++ ulonglong innodb_io_read;
++ ulong innodb_io_reads_wait_timer;
++ ulong innodb_lock_que_wait_timer;
++ ulong innodb_innodb_que_wait_timer;
++ ulong innodb_page_access;
++
+ SAVEPOINT *savepoints;
+ enum enum_check_fields count_cuted_fields;
+ };
+@@ -1575,6 +1614,26 @@
+ thr_lock_type update_lock_default;
+ Delayed_insert *di;
+
++ bool write_to_slow_log;
++
++ ulonglong bytes_sent_old;
++ ulong tmp_tables_used;
++ ulong tmp_tables_disk_used;
++ ulonglong tmp_tables_size;
++ bool innodb_was_used;
++ ulonglong innodb_trx_id;
++ ulong innodb_io_reads;
++ ulonglong innodb_io_read;
++ ulong innodb_io_reads_wait_timer;
++ ulong innodb_lock_que_wait_timer;
++ ulong innodb_innodb_que_wait_timer;
++ ulong innodb_page_access;
++
++ ulong query_plan_flags;
++ ulong query_plan_fsort_passes;
++
++ uint last_errno;
++
+ /* <> 0 if we are inside of trigger or stored function. */
+ uint in_sub_stmt;
+
+diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
+--- a/sql/sql_connect.cc 2010-11-03 01:01:14.000000000 +0300
++++ b/sql/sql_connect.cc 2010-12-16 04:27:46.000000000 +0300
+@@ -738,6 +738,15 @@
+
+ prepare_new_connection_state(thd);
+
++ /*
++ If rate limiting of slow log writes is enabled, decide whether to log this
++ new thread's queries or not. Uses extremely simple algorithm. :)
++ */
++ thd->write_to_slow_log= FALSE;
++ if (thd->variables.log_slow_rate_limit <= 1 ||
++ (thd->thread_id % thd->variables.log_slow_rate_limit) == 0)
++ thd->write_to_slow_log= TRUE;
++
+ while (!net->error && net->vio != 0 &&
+ !(thd->killed == THD::KILL_CONNECTION))
+ {
+diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
+--- a/sql/sql_parse.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_parse.cc 2010-12-16 04:47:41.000000000 +0300
+@@ -1424,7 +1424,6 @@
+ DBUG_RETURN(error);
+ }
+
+-
+ void log_slow_statement(THD *thd)
+ {
+ DBUG_ENTER("log_slow_statement");
+@@ -1437,6 +1436,42 @@
+ if (unlikely(thd->in_sub_stmt))
+ DBUG_VOID_RETURN; // Don't set time for sub stmt
+
++ /* Follow the slow log filter configuration. */
++ if (thd->variables.log_slow_filter != 0 &&
++ (!(thd->variables.log_slow_filter & thd->query_plan_flags) ||
++ ((thd->variables.log_slow_filter & SLOG_F_QC_NO) &&
++ (thd->query_plan_flags & QPLAN_QC))))
++ DBUG_VOID_RETURN;
++
++ /*
++ Low long_query_time value most likely means user is debugging stuff and even
++ though some thread's queries are not supposed to be logged b/c of the rate
++ limit, if one of them takes long enough (>= 1 second) it will be sensible
++ to make an exception and write to slow log anyway.
++ */
++
++ ulonglong end_utime_of_query= thd->current_utime();
++#define USE_GLOBAL_UPDATE(variable_name,enum_value_name) \
++ if (opt_use_global_log_slow_control & (ULL(1) << enum_value_name)) \
++ { \
++ thd->variables. variable_name= \
++ global_system_variables. variable_name; \
++ }
++ USE_GLOBAL_UPDATE(log_slow_filter,SLOG_UG_LOG_SLOW_FILTER);
++ USE_GLOBAL_UPDATE(log_slow_rate_limit,SLOG_UG_LOG_SLOW_RATE_LIMIT);
++ USE_GLOBAL_UPDATE(log_slow_verbosity,SLOG_UG_LOG_SLOW_VERBOSITY);
++ USE_GLOBAL_UPDATE(long_query_time,SLOG_UG_LONG_QUERY_TIME);
++ USE_GLOBAL_UPDATE(long_query_time_double,SLOG_UG_LONG_QUERY_TIME);
++ USE_GLOBAL_UPDATE(min_examined_row_limit,SLOG_UG_MIN_EXAMINED_ROW_LIMIT);
++#undef USE_GLOBAL_UPDATE
++
++ /* Do not log this thread's queries due to rate limiting. */
++ if (thd->write_to_slow_log != TRUE
++ && (thd->variables.long_query_time >= 1000000
++ || (ulong) (end_utime_of_query - thd->utime_after_lock) < 1000000))
++ DBUG_VOID_RETURN;
++
++
+ /*
+ Do not log administrative statements unless the appropriate option is
+ set.
+@@ -1812,6 +1847,9 @@
+ context.resolve_in_table_list_only(select_lex->
+ table_list.first);
+
++ /* Reset the counter at all cases for the extended slow query log */
++ thd->sent_row_count= 0;
++
+ /*
+ Reset warning count for each query that uses tables
+ A better approach would be to reset this for any commands
+@@ -5238,6 +5276,21 @@
+ thd->rand_used= 0;
+ thd->sent_row_count= thd->examined_row_count= 0;
+
++ thd->bytes_sent_old= thd->status_var.bytes_sent;
++ thd->tmp_tables_used= thd->tmp_tables_disk_used= 0;
++ thd->tmp_tables_size= 0;
++ thd->innodb_was_used= FALSE;
++ thd->innodb_trx_id= 0;
++ thd->innodb_io_reads= 0;
++ thd->innodb_io_read= 0;
++ thd->innodb_io_reads_wait_timer= 0;
++ thd->innodb_lock_que_wait_timer= 0;
++ thd->innodb_innodb_que_wait_timer= 0;
++ thd->innodb_page_access= 0;
++ thd->query_plan_flags= QPLAN_NONE;
++ thd->query_plan_fsort_passes= 0;
++ thd->last_errno= 0;
++
+ thd->reset_current_stmt_binlog_format_row();
+ thd->binlog_unsafe_warning_flags= 0;
+
+diff -ruN a/sql/sql_select.cc b/sql/sql_select.cc
+--- a/sql/sql_select.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_select.cc 2010-12-16 04:27:47.000000000 +0300
+@@ -6870,7 +6870,10 @@
+ {
+ join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+ if (statistics)
++ {
+ status_var_increment(join->thd->status_var.select_scan_count);
++ join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
++ }
+ }
+ }
+ else
+@@ -6884,7 +6887,10 @@
+ {
+ join->thd->server_status|=SERVER_QUERY_NO_INDEX_USED;
+ if (statistics)
++ {
+ status_var_increment(join->thd->status_var.select_full_join_count);
++ join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
++ }
+ }
+ }
+ if (!table->no_keyread)
+@@ -10210,6 +10216,7 @@
+ (ulong) rows_limit,test(group)));
+
+ status_var_increment(thd->status_var.created_tmp_tables);
++ thd->query_plan_flags|= QPLAN_TMP_TABLE;
+
+ if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
+ temp_pool_slot = bitmap_lock_set_next(&temp_pool);
+@@ -11107,6 +11114,7 @@
+ goto err;
+ }
+ status_var_increment(table->in_use->status_var.created_tmp_disk_tables);
++ table->in_use->query_plan_flags|= QPLAN_TMP_DISK;
+ share->db_record_offset= 1;
+ DBUG_RETURN(0);
+ err:
+@@ -11125,6 +11133,14 @@
+ save_proc_info=thd->proc_info;
+ thd_proc_info(thd, "removing tmp table");
+
++ thd->tmp_tables_used++;
++ if (entry->file)
++ {
++ thd->tmp_tables_size += entry->file->stats.data_file_length;
++ if (entry->file->ht->db_type != DB_TYPE_HEAP)
++ thd->tmp_tables_disk_used++;
++ }
++
+ // Release latches since this can take a long time
+ ha_release_temporary_latches(thd);
+
+diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
+--- a/sql/sql_show.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sql_show.cc 2010-12-16 04:27:47.000000000 +0300
+@@ -1942,8 +1942,17 @@
+ table->field[4]->store(command_name[tmp->command].str,
+ command_name[tmp->command].length, cs);
+ /* MYSQL_TIME */
+- table->field[5]->store((longlong)(tmp->start_time ?
+- now - tmp->start_time : 0), FALSE);
++ longlong value_in_time_column= 0;
++ if(tmp->start_time)
++ {
++ value_in_time_column = (now - tmp->start_time);
++ if(value_in_time_column > now)
++ {
++ value_in_time_column= 0;
++ }
++ }
++ table->field[5]->store(value_in_time_column, FALSE);
++
+ /* STATE */
+ if ((val= thread_state_info(tmp)))
+ {
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2010-12-16 04:27:10.000000000 +0300
++++ b/sql/sys_vars.cc 2010-12-16 04:36:12.000000000 +0300
+@@ -2836,6 +2836,116 @@
+ DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
+ ON_UPDATE(fix_log_state));
+
++const char *log_slow_filter_name[]= { "qc_miss", "full_scan", "full_join",
++ "tmp_table", "tmp_table_on_disk", "filesort", "filesort_on_disk", 0};
++static Sys_var_set Sys_log_slow_filter(
++ "log_slow_filter",
++ "Log only the queries that followed certain execution plan. "
++ "Multiple flags allowed in a comma-separated string. "
++ "[qc_miss, full_scan, full_join, tmp_table, tmp_table_on_disk, "
++ "filesort, filesort_on_disk]",
++ SESSION_VAR(log_slow_filter), CMD_LINE(REQUIRED_ARG),
++ log_slow_filter_name, DEFAULT(0));
++static Sys_var_ulong sys_log_slow_rate_limit(
++ "log_slow_rate_limit","Rate limit statement writes to slow log to only those from every (1/log_slow_rate_limit) session.",
++ SESSION_VAR(log_slow_rate_limit), CMD_LINE(REQUIRED_ARG),
++ VALID_RANGE(1, ULONG_MAX), DEFAULT(1), BLOCK_SIZE(1));
++const char* log_slow_verbosity_name[] = {
++ "microtime", "query_plan", "innodb",
++ "profiling", "profling_use_getrusage",
++ "minimal", "standard", "full", 0
++};
++static ulonglong update_log_slow_verbosity_replace(ulonglong value, ulonglong what, ulonglong by)
++{
++ if((value & what) == what)
++ {
++ value = value & (~what);
++ value = value | by;
++ }
++ return value;
++}
++void update_log_slow_verbosity(ulonglong* value_ptr)
++{
++ ulonglong &value = *value_ptr;
++ ulonglong microtime= ULL(1) << SLOG_V_MICROTIME;
++ ulonglong query_plan= ULL(1) << SLOG_V_QUERY_PLAN;
++ ulonglong innodb= ULL(1) << SLOG_V_INNODB;
++ ulonglong minimal= ULL(1) << SLOG_V_MINIMAL;
++ ulonglong standard= ULL(1) << SLOG_V_STANDARD;
++ ulonglong full= ULL(1) << SLOG_V_FULL;
++ value= update_log_slow_verbosity_replace(value,minimal,microtime);
++ value= update_log_slow_verbosity_replace(value,standard,microtime | query_plan);
++ value= update_log_slow_verbosity_replace(value,full,microtime | query_plan | innodb);
++}
++static bool update_log_slow_verbosity_helper(sys_var */*self*/, THD *thd,
++ enum_var_type type)
++{
++ if(type == OPT_SESSION)
++ {
++ update_log_slow_verbosity(&(thd->variables.log_slow_verbosity));
++ }
++ else
++ {
++ update_log_slow_verbosity(&(global_system_variables.log_slow_verbosity));
++ }
++ return false;
++}
++void init_use_global_log_slow_control()
++{
++ update_log_slow_verbosity(&(global_system_variables.log_slow_verbosity));
++}
++static Sys_var_set Sys_log_slow_verbosity(
++ "log_slow_verbosity",
++ "Choose how verbose the messages to your slow log will be. "
++ "Multiple flags allowed in a comma-separated string. [microtime, query_plan, innodb, profiling, profiling_use_getrusage]",
++ SESSION_VAR(log_slow_verbosity), CMD_LINE(REQUIRED_ARG),
++ log_slow_verbosity_name, DEFAULT(SLOG_V_MICROTIME),
++ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
++ ON_UPDATE(update_log_slow_verbosity_helper));
++static Sys_var_mybool Sys_log_slow_slave_statements(
++ "log_slow_slave_statements",
++ "Log queries replayed be the slave SQL thread",
++ GLOBAL_VAR(opt_log_slow_slave_statements), CMD_LINE(OPT_ARG),
++ DEFAULT(FALSE));
++static Sys_var_mybool Sys_log_slow_sp_statements(
++ "log_slow_sp_statements",
++ "Log slow statements executed by stored procedure to the slow log if it is open.",
++ GLOBAL_VAR(opt_log_slow_sp_statements), CMD_LINE(OPT_ARG),
++ DEFAULT(TRUE));
++static Sys_var_mybool Sys_log_slow_timestamp_every(
++ "log_slow_timestamp_every",
++ "Timestamp is printed for all records of the slow log even if they are same time.",
++ GLOBAL_VAR(opt_log_slow_timestamp_every), CMD_LINE(OPT_ARG),
++ DEFAULT(FALSE));
++const char *use_global_log_slow_control_name[]= { "log_slow_filter", "log_slow_rate_limit", "log_slow_verbosity", "long_query_time", "min_examined_row_limit", "all", 0};
++static bool update_use_global_log_slow_control(sys_var */*self*/, THD */*thd*/,
++ enum_var_type /*type*/)
++{
++ if(opt_use_global_log_slow_control & (ULL(1) << SLOG_UG_ALL))
++ {
++ opt_use_global_log_slow_control=
++ SLOG_UG_LOG_SLOW_FILTER | SLOG_UG_LOG_SLOW_RATE_LIMIT | SLOG_UG_LOG_SLOW_VERBOSITY |
++ SLOG_UG_LONG_QUERY_TIME | SLOG_UG_MIN_EXAMINED_ROW_LIMIT;
++ }
++ return false;
++}
++void init_log_slow_verbosity()
++{
++ update_use_global_log_slow_control(0,0,OPT_GLOBAL);
++}
++static Sys_var_set Sys_use_global_log_slow_control(
++ "use_global_log_slow_control",
++ "Choose flags, wich always use the global variables. Multiple flags allowed in a comma-separated string. [none, log_slow_filter, log_slow_rate_limit, log_slow_verbosity, long_query_time, min_examined_row_limit, all]",
++ GLOBAL_VAR(opt_use_global_log_slow_control), CMD_LINE(REQUIRED_ARG),
++ use_global_log_slow_control_name, DEFAULT(0),
++ NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
++ ON_UPDATE(update_use_global_log_slow_control));
++static Sys_var_mybool Sys_slow_query_log_microseconds_timestamp(
++ "slow_query_log_microseconds_timestamp",
++ "Log slow statements executed by stored procedure to the slow log if it is open.",
++ GLOBAL_VAR(opt_slow_query_log_microseconds_timestamp), CMD_LINE(OPT_ARG),
++ DEFAULT(FALSE));
++
+ /* Synonym of "slow_query_log" for consistency with SHOW VARIABLES output */
+ static Sys_var_mybool Sys_log_slow(
+ "log_slow_queries",
+diff -ruN a/sql/sql_profile.cc b/sql/sql_profile.cc
+--- a/sql/sql_profile.cc 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_profile.cc 2010-12-02 20:26:35.448357413 +0900
+@@ -243,7 +243,8 @@
+ {
+ time_usecs= (double) my_getsystime() / 10.0; /* 1 sec was 1e7, now is 1e6 */
+ #ifdef HAVE_GETRUSAGE
+- getrusage(RUSAGE_SELF, &rusage);
++ if ((profile->get_profiling())->enabled_getrusage())
++ getrusage(RUSAGE_SELF, &rusage);
+ #elif defined(_WIN32)
+ FILETIME ftDummy;
+ // NOTE: Get{Process|Thread}Times has a granularity of the clock interval,
+@@ -251,6 +252,19 @@
+ // measurable by this function.
+ GetProcessTimes(GetCurrentProcess(), &ftDummy, &ftDummy, &ftKernel, &ftUser);
+ #endif
++
++#ifdef HAVE_CLOCK_GETTIME
++ struct timespec tp;
++
++ if (!(clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ {
++ cpu_time_usecs= tp.tv_sec*1000000000.0 + tp.tv_nsec;
++ }
++ else
++#endif
++ {
++ cpu_time_usecs= 0;
++ }
+ }
+
+
+@@ -366,7 +380,8 @@
+ finish_current_query();
+ }
+
+- enabled= ((thd->variables.option_bits & OPTION_PROFILING) != 0);
++ enabled= ((thd->variables.option_bits & OPTION_PROFILING) != 0) ||
++ ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_PROFILING)) != 0);
+
+ if (! enabled) DBUG_VOID_RETURN;
+
+@@ -404,7 +419,8 @@
+ status_change("ending", NULL, NULL, 0);
+
+ if ((enabled) && /* ON at start? */
+- ((thd->variables.option_bits & OPTION_PROFILING) != 0) && /* and ON at end? */
++ (((thd->variables.option_bits & OPTION_PROFILING) != 0) ||
++ ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_PROFILING)) != 0)) && /* and ON at end? */
+ (current->query_source != NULL) &&
+ (! current->entries.is_empty()))
+ {
+@@ -505,6 +521,118 @@
+ DBUG_VOID_RETURN;
+ }
+
++bool PROFILING::enabled_getrusage()
++{
++ return ((thd->variables.log_slow_verbosity & (ULL(1) << SLOG_V_PROFILING_USE_GETRUSAGE)) != 0);
++}
++
++/**
++ For a given profile entry specified by a name and 2 time measurements,
++ print its normalized name (i.e. with all spaces replaced with underscores)
++ along with its wall clock and CPU time.
++*/
++
++static void my_b_print_status(IO_CACHE *log_file, const char *status,
++ PROF_MEASUREMENT *start, PROF_MEASUREMENT *stop)
++{
++ DBUG_ENTER("my_b_print_status");
++ DBUG_ASSERT(log_file != NULL && status != NULL);
++ char query_time_buff[22+7];
++ const char *tmp;
++
++ my_b_printf(log_file, "Profile_");
++ for (tmp= status; *tmp; tmp++)
++ my_b_write_byte(log_file, *tmp == ' ' ? '_' : *tmp);
++
++ snprintf(query_time_buff, sizeof(query_time_buff), "%.6f",
++ (stop->time_usecs - start->time_usecs) / (1000.0 * 1000));
++ my_b_printf(log_file, ": %s ", query_time_buff);
++
++ my_b_printf(log_file, "Profile_");
++ for (tmp= status; *tmp; tmp++)
++ my_b_write_byte(log_file, *tmp == ' ' ? '_' : *tmp);
++ my_b_printf(log_file, "_cpu: ");
++
++ snprintf(query_time_buff, sizeof(query_time_buff), "%.6f",
++ (stop->cpu_time_usecs - start->cpu_time_usecs) /
++ (1000.0 * 1000 * 1000));
++ my_b_printf(log_file, "%s ", query_time_buff);
++
++ DBUG_VOID_RETURN;
++}
++
++/**
++ Print output for current query to file
++*/
++
++int PROFILING::print_current(IO_CACHE *log_file)
++{
++ DBUG_ENTER("PROFILING::print_current");
++ ulonglong row_number= 0;
++
++ QUERY_PROFILE *query;
++ /* Get current query */
++ if (current == NULL)
++ {
++ DBUG_RETURN(0);
++ }
++
++ query= current;
++
++ my_b_printf(log_file, "# ");
++
++ void *entry_iterator;
++ PROF_MEASUREMENT *entry= NULL, *previous= NULL, *first= NULL;
++ /* ...and for each query, go through all its state-change steps. */
++ for (entry_iterator= query->entries.new_iterator();
++ entry_iterator != NULL;
++ entry_iterator= query->entries.iterator_next(entry_iterator),
++ previous=entry, row_number++)
++ {
++ entry= query->entries.iterator_value(entry_iterator);
++
++ /* Skip the first. We count spans of fence, not fence-posts. */
++ if (previous == NULL) {first= entry; continue;}
++
++ if (thd->lex->sql_command == SQLCOM_SHOW_PROFILE)
++ {
++ /*
++ We got here via a SHOW command. That means that we stored
++ information about the query we wish to show and that isn't
++ in a WHERE clause at a higher level to filter out rows we
++ wish to exclude.
++
++ Because that functionality isn't available in the server yet,
++ we must filter here, at the wrong level. Once one can con-
++ struct where and having conditions at the SQL layer, then this
++ condition should be ripped out.
++ */
++ if (thd->lex->profile_query_id == 0) /* 0 == show final query */
++ {
++ if (query != last)
++ continue;
++ }
++ else
++ {
++ if (thd->lex->profile_query_id != query->profiling_query_id)
++ continue;
++ }
++ }
++
++ my_b_print_status(log_file, previous->status, previous, entry);
++ }
++
++ my_b_write_byte(log_file, '\n');
++ if ((entry != NULL) && (first != NULL))
++ {
++ my_b_printf(log_file, "# ");
++ my_b_print_status(log_file, "total", first, entry);
++ my_b_write_byte(log_file, '\n');
++ }
++
++ DBUG_RETURN(0);
++}
++
+ /**
+ Fill the information schema table, "query_profile", as defined in show.cc .
+ There are two ways to get to this function: Selecting from the information
+diff -ruN a/sql/sql_profile.h b/sql/sql_profile.h
+--- a/sql/sql_profile.h 2010-11-03 07:01:14.000000000 +0900
++++ b/sql/sql_profile.h 2010-12-02 19:23:07.823955510 +0900
+@@ -164,11 +164,15 @@
+ */
+ class PROF_MEASUREMENT
+ {
+-private:
+- friend class QUERY_PROFILE;
+- friend class PROFILING;
+-
+ QUERY_PROFILE *profile;
++
++ char *allocated_status_memory;
++
++ void set_label(const char *status_arg, const char *function_arg,
++ const char *file_arg, unsigned int line_arg);
++ void clean_up();
++
++public:
+ char *status;
+ #ifdef HAVE_GETRUSAGE
+ struct rusage rusage;
+@@ -181,12 +185,7 @@
+ unsigned int line;
+
+ double time_usecs;
+- char *allocated_status_memory;
+-
+- void set_label(const char *status_arg, const char *function_arg,
+- const char *file_arg, unsigned int line_arg);
+- void clean_up();
+-
++ double cpu_time_usecs;
+ PROF_MEASUREMENT();
+ PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char *status_arg);
+ PROF_MEASUREMENT(QUERY_PROFILE *profile_arg, const char *status_arg,
+@@ -231,6 +230,11 @@
+
+ /* Show this profile. This is called by PROFILING. */
+ bool show(uint options);
++
++public:
++
++ inline PROFILING * get_profiling() { return profiling; };
++
+ };
+
+
+@@ -276,9 +280,11 @@
+
+ /* SHOW PROFILES */
+ bool show_profiles();
++ bool enabled_getrusage();
+
+ /* ... from INFORMATION_SCHEMA.PROFILING ... */
+ int fill_statistics_info(THD *thd, TABLE_LIST *tables, Item *cond);
++ int print_current(IO_CACHE *log_file);
+ };
+
+ # endif /* HAVE_PROFILING */
--- /dev/null
+# name : sql_no_fcache.patch
+# introduced : 12
+# maintainer : Oleg
+#
+#!!! notice !!!
+# Any small change to this file in the main branch
+# should be done or reviewed by the maintainer!
+diff -ruN a/client/mysqldump.c b/client/mysqldump.c
+--- a/client/mysqldump.c 2010-07-28 16:47:58.264067653 +0400
++++ b/client/mysqldump.c 2010-07-28 16:47:59.604985656 +0400
+@@ -138,6 +138,8 @@
+ #endif
+ static uint opt_protocol= 0;
+
++static my_bool server_supports_sql_no_fcache= FALSE;
++
+ /*
+ Dynamic_string wrapper functions. In this file use these
+ wrappers, they will terminate the process if there is
+@@ -1471,6 +1473,17 @@
+ /* Don't switch charsets for 4.1 and earlier. (bug#34192). */
+ server_supports_switching_charsets= FALSE;
+ }
++
++ /* Check to see if we support SQL_NO_FCACHE on this server. */
++ if (mysql_query(mysql, "SELECT SQL_NO_FCACHE NOW()") == 0)
++ {
++ MYSQL_RES *res = mysql_store_result(mysql);
++ if (res)
++ {
++ mysql_free_result(res);
++ }
++ server_supports_sql_no_fcache= TRUE;
++ }
+ /*
+ As we're going to set SQL_MODE, it would be lost on reconnect, so we
+ cannot reconnect.
+@@ -3143,7 +3156,12 @@
+
+ /* now build the query string */
+
+- dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * INTO OUTFILE '");
++ dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ ");
++ if (server_supports_sql_no_fcache)
++ {
++ dynstr_append_checked(&query_string, "/*!50084 SQL_NO_FCACHE */ ");
++ }
++ dynstr_append_checked(&query_string, "* INTO OUTFILE '");
+ dynstr_append_checked(&query_string, filename);
+ dynstr_append_checked(&query_string, "'");
+
+@@ -3193,7 +3211,12 @@
+ check_io(md_result_file);
+ }
+
+- dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * FROM ");
++ dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ ");
++ if (server_supports_sql_no_fcache)
++ {
++ dynstr_append_checked(&query_string, "/*!50084 SQL_NO_FCACHE */ ");
++ }
++ dynstr_append_checked(&query_string, "* FROM ");
+ dynstr_append_checked(&query_string, result_table);
+
+ if (where)
+diff -ruN a/include/flashcache_ioctl.h b/include/flashcache_ioctl.h
+--- a/include/flashcache_ioctl.h 1970-01-01 03:00:00.000000000 +0300
++++ b/include/flashcache_ioctl.h 2010-07-28 16:47:59.744079911 +0400
+@@ -0,0 +1,53 @@
++/****************************************************************************
++ * flashcache_ioctl.h
++ * FlashCache: Device mapper target for block-level disk caching
++ *
++ * Copyright 2010 Facebook, Inc.
++ * Author: Mohan Srinivasan (mohan@facebook.com)
++ *
++ * Based on DM-Cache:
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Ming Zhao (mingzhao@ufl.edu)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program. If not, see <http://www.gnu.org/licenses/>.
++ ****************************************************************************/
++
++#ifndef FLASHCACHE_IOCTL_H
++#define FLASHCACHE_IOCTL_H
++
++#include <linux/types.h>
++
++#define FLASHCACHE_IOCTL 0xfe
++
++enum {
++ FLASHCACHEADDNCPID_CMD=200,
++ FLASHCACHEDELNCPID_CMD,
++ FLASHCACHEDELNCALL_CMD,
++ FLASHCACHEADDWHITELIST_CMD,
++ FLASHCACHEDELWHITELIST_CMD,
++ FLASHCACHEDELWHITELISTALL_CMD,
++};
++
++#define FLASHCACHEADDNCPID _IOW(FLASHCACHE_IOCTL, FLASHCACHEADDNCPID_CMD, pid_t)
++#define FLASHCACHEDELNCPID _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELNCPID_CMD, pid_t)
++#define FLASHCACHEDELNCALL _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELNCALL_CMD, pid_t)
++
++#define FLASHCACHEADDBLACKLIST FLASHCACHEADDNCPID
++#define FLASHCACHEDELBLACKLIST FLASHCACHEDELNCPID
++#define FLASHCACHEDELALLBLACKLIST FLASHCACHEDELNCALL
++
++#define FLASHCACHEADDWHITELIST _IOW(FLASHCACHE_IOCTL, FLASHCACHEADDWHITELIST_CMD, pid_t)
++#define FLASHCACHEDELWHITELIST _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELWHITELIST_CMD, pid_t)
++#define FLASHCACHEDELALLWHITELIST _IOW(FLASHCACHE_IOCTL, FLASHCACHEDELWHITELISTALL_CMD, pid_t)
++
++#endif
+diff -ruN a/patch_info/sql_no_fcache.info b/patch_info/sql_no_fcache.info
+--- a/patch_info/sql_no_fcache.info 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/sql_no_fcache.info 2010-07-28 16:47:59.915439165 +0400
+@@ -0,0 +1,6 @@
++File=sql_no_fcache.patch
++Name=Support for flashcache including the SQL_NO_FCACHE option that prevents blocks from being cached during a query.
++Version=1.0
++Author=Facebook
++License=GPL
++Comment=
+diff -ruN a/sql/lex.h b/sql/lex.h
+--- a/sql/lex.h 2010-07-28 16:47:58.575318748 +0400
++++ b/sql/lex.h 2010-07-28 16:48:00.134078469 +0400
+@@ -516,6 +516,7 @@
+ { "SQL_CACHE", SYM(SQL_CACHE_SYM)},
+ { "SQL_CALC_FOUND_ROWS", SYM(SQL_CALC_FOUND_ROWS)},
+ { "SQL_NO_CACHE", SYM(SQL_NO_CACHE_SYM)},
++ { "SQL_NO_FCACHE", SYM(SQL_NO_FCACHE_SYM)},
+ { "SQL_SMALL_RESULT", SYM(SQL_SMALL_RESULT)},
+ { "SQL_THREAD", SYM(SQL_THREAD)},
+ { "SQL_TSI_SECOND", SYM(SECOND_SYM)},
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2010-07-28 16:47:58.565318871 +0400
++++ b/sql/mysqld.h 2010-07-28 16:48:03.004544367 +0400
+@@ -190,6 +190,8 @@
+ extern ulong back_log;
+ extern char language[FN_REFLEN];
+ extern ulong server_id, concurrency;
++/* flashcache */
++extern int cachedev_fd;
+ extern time_t server_start_time, flush_status_time;
+ extern char *opt_mysql_tmpdir, mysql_charsets_dir[];
+ extern int mysql_unpacked_real_data_home_len;
+diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
+--- a/sql/mysqld.cc 2010-07-28 16:47:58.565318871 +0400
++++ b/sql/mysqld.cc 2010-07-28 16:48:03.004544367 +0400
+@@ -86,6 +86,11 @@
+ #ifdef HAVE_SYS_PRCTL_H
+ #include <sys/prctl.h>
+ #endif
++#if defined(__linux__)
++#include <mntent.h>
++#include <sys/statfs.h>
++#include "flashcache_ioctl.h"
++#endif//__linux__
+
+ #include <thr_alarm.h>
+ #include <ft_global.h>
+@@ -481,6 +486,11 @@
+ ulong specialflag=0;
+ ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
+ ulong max_connections, max_connect_errors;
++
++/* flashcache */
++int cachedev_fd;
++my_bool cachedev_enabled= FALSE;
++
+ /**
+ Limit of the total number of prepared statements in the server.
+ Is necessary to protect the server against out-of-memory attacks.
+@@ -4172,6 +4182,97 @@
+ #define decrement_handler_count()
+ #endif /* defined(_WIN32) || defined(HAVE_SMEM) */
+
++#if defined(__linux__)
++/*
++ * Auto detect if we support flash cache on the host system.
++ * This needs to be called before we setuid away from root
++ * to avoid permission problems on opening the device node.
++ */
++static void init_cachedev(void)
++{
++ struct statfs stfs_data_home_dir;
++ struct statfs stfs;
++ struct mntent *ent;
++ pid_t pid = getpid();
++ FILE *mounts;
++ const char *error_message= NULL;
++
++ // disabled by default
++ cachedev_fd = -1;
++ cachedev_enabled= FALSE;
++
++ if (!mysql_data_home)
++ {
++ error_message= "mysql_data_home not set";
++ goto epilogue;
++ }
++
++ if (statfs(mysql_data_home, &stfs_data_home_dir) < 0)
++ {
++ error_message= "statfs failed";
++ goto epilogue;
++ }
++
++ mounts = setmntent("/etc/mtab", "r");
++ if (mounts == NULL)
++ {
++ error_message= "setmntent failed";
++ goto epilogue;
++ }
++
++ while ((ent = getmntent(mounts)) != NULL)
++ {
++ if (statfs(ent->mnt_dir, &stfs) < 0)
++ continue;
++ if (memcmp(&stfs.f_fsid, &stfs_data_home_dir.f_fsid, sizeof(fsid_t)) == 0)
++ break;
++ }
++ endmntent(mounts);
++
++ if (ent == NULL)
++ {
++ error_message= "getmntent loop failed";
++ goto epilogue;
++ }
++
++ cachedev_fd = open(ent->mnt_fsname, O_RDONLY);
++ if (cachedev_fd < 0)
++ {
++ error_message= "open flash device failed";
++ goto epilogue;
++ }
++
++ /* cleanup previous whitelistings */
++ if (ioctl(cachedev_fd, FLASHCACHEDELALLWHITELIST, &pid) < 0)
++ {
++ close(cachedev_fd);
++ cachedev_fd = -1;
++ error_message= "ioctl failed";
++ } else {
++ ioctl(cachedev_fd, FLASHCACHEADDWHITELIST, &pid);
++ }
++
++epilogue:
++ sql_print_information("Flashcache bypass: %s",
++ (cachedev_fd > 0) ? "enabled" : "disabled");
++ if (error_message)
++ sql_print_information("Flashcache setup error is : %s\n", error_message);
++ else
++ cachedev_enabled= TRUE;
++
++}
++
++static void cleanup_cachedev(void)
++{
++ pid_t pid = getpid();
++
++ if (cachedev_enabled) {
++ ioctl(cachedev_fd, FLASHCACHEDELWHITELIST, &pid);
++ close(cachedev_fd);
++ cachedev_fd = -1;
++ }
++}
++#endif//__linux__
+
+ #ifndef EMBEDDED_LIBRARY
+ #ifndef DBUG_OFF
+@@ -4426,6 +4527,10 @@
+ test_lc_time_sz();
+ #endif
+
++#if defined(__linux__)
++ init_cachedev();
++#endif//__linux__
++
+ /*
+ We have enough space for fiddling with the argv, continue
+ */
+@@ -4629,6 +4734,10 @@
+ }
+ #endif
+ clean_up(1);
++#if defined(__linux__)
++ cleanup_cachedev();
++#endif//__linux__
++
+ mysqld_exit(0);
+ }
+
+@@ -6448,6 +6557,7 @@
+ {"Delayed_errors", (char*) &delayed_insert_errors, SHOW_LONG},
+ {"Delayed_insert_threads", (char*) &delayed_insert_threads, SHOW_LONG_NOFLUSH},
+ {"Delayed_writes", (char*) &delayed_insert_writes, SHOW_LONG},
++ {"Flashcache_enabled", (char*) &cachedev_enabled, SHOW_BOOL },
+ {"Flush_commands", (char*) &refresh_version, SHOW_LONG_NOFLUSH},
+ {"Handler_commit", (char*) offsetof(STATUS_VAR, ha_commit_count), SHOW_LONG_STATUS},
+ {"Handler_delete", (char*) offsetof(STATUS_VAR, ha_delete_count), SHOW_LONG_STATUS},
+diff -ruN a/sql/sql_lex.cc b/sql/sql_lex.cc
+--- a/sql/sql_lex.cc 2010-07-28 16:47:58.555318714 +0400
++++ b/sql/sql_lex.cc 2010-07-28 16:48:07.794069239 +0400
+@@ -384,6 +384,7 @@
+ lex->describe= 0;
+ lex->subqueries= FALSE;
+ lex->view_prepare_mode= FALSE;
++ lex->disable_flashcache= FALSE;
+ lex->derived_tables= 0;
+ lex->safe_to_cache_query= 1;
+ lex->leaf_tables_insert= 0;
+diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
+--- a/sql/sql_lex.h 2010-07-28 16:47:58.575318748 +0400
++++ b/sql/sql_lex.h 2010-07-28 16:48:08.405691834 +0400
+@@ -2300,6 +2300,7 @@
+ */
+ bool view_prepare_mode;
+ bool safe_to_cache_query;
++ bool disable_flashcache;
+ bool subqueries, ignore;
+ st_parsing_options parsing_options;
+ Alter_info alter_info;
+diff -ruN a/sql/sql_select.cc b/sql/sql_select.cc
+--- a/sql/sql_select.cc 2010-07-28 16:47:58.555318714 +0400
++++ b/sql/sql_select.cc 2010-07-28 16:48:13.414069437 +0400
+@@ -55,6 +55,12 @@
+
+ #define PREV_BITS(type,A) ((type) (((type) 1 << (A)) -1))
+
++#include <sys/syscall.h>
++#include <sys/ioctl.h>
++#if defined(__linux__)
++#include "flashcache_ioctl.h"
++#endif//__linux__
++
+ const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
+ "MAYBE_REF","ALL","range","index","fulltext",
+ "ref_or_null","unique_subquery","index_subquery",
+@@ -266,8 +272,17 @@
+ ulong setup_tables_done_option)
+ {
+ bool res;
++ pid_t pid;
+ register SELECT_LEX *select_lex = &lex->select_lex;
+ DBUG_ENTER("handle_select");
++#if defined(__linux__)
++ if(lex->disable_flashcache && cachedev_fd > 0)
++ {
++ pid = syscall(SYS_gettid);
++ ioctl(cachedev_fd, FLASHCACHEADDNCPID, &pid);
++ }
++#endif//__linux__
++
+ MYSQL_SELECT_START(thd->query());
+
+ if (select_lex->master_unit()->is_union() ||
+@@ -302,6 +317,12 @@
+ if (unlikely(res))
+ result->abort_result_set();
+
++#if defined(__linux__)
++ if (lex->disable_flashcache && cachedev_fd > 0)
++ {
++ ioctl(cachedev_fd, FLASHCACHEDELNCPID, &pid);
++ }
++#endif//__linux__
+ MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
+ DBUG_RETURN(res);
+ }
+diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
+--- a/sql/sql_yacc.yy 2010-07-28 16:47:58.565318871 +0400
++++ b/sql/sql_yacc.yy 2010-07-28 16:48:14.205317990 +0400
+@@ -1283,6 +1283,7 @@
+ %token SQL_CACHE_SYM
+ %token SQL_CALC_FOUND_ROWS
+ %token SQL_NO_CACHE_SYM
++%token SQL_NO_FCACHE_SYM
+ %token SQL_SMALL_RESULT
+ %token SQL_SYM /* SQL-2003-R */
+ %token SQL_THREAD
+@@ -7349,6 +7350,10 @@
+ Lex->select_lex.sql_cache= SELECT_LEX::SQL_NO_CACHE;
+ }
+ }
++ | SQL_NO_FCACHE_SYM
++ {
++ Lex->disable_flashcache= TRUE;
++ }
+ | SQL_CACHE_SYM
+ {
+ /*
# name : userstat.patch
# introduced : 11 or before
-# maintainer : Yasufumi
+# maintainer : Oleg
#
#!!! notice !!!
# Any small change to this file in the main branch
# should be done or reviewed by the maintainer!
-diff -ruN a/configure b/configure
-diff -ruN a/configure.in b/configure.in
---- a/configure.in 2010-10-12 00:34:15.000000000 +0400
-+++ b/configure.in 2010-11-24 18:00:58.000000000 +0300
-@@ -2095,13 +2095,16 @@
- realpath rename rint rwlock_init setupterm \
- shmget shmat shmdt shmctl sigaction sigemptyset sigaddset \
- sighold sigset sigthreadmask port_create sleep \
-- snprintf socket stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
-+ snprintf socket strsep stpcpy strcasecmp strerror strsignal strnlen strpbrk strstr \
- strtol strtoll strtoul strtoull tell tempnam thr_setconcurrency vidattr \
- posix_fallocate backtrace backtrace_symbols backtrace_symbols_fd printstack)
-
- #
- #
- #
-+# The following change can be safely null-merged to 5.5
-+# since configure.cmake in 5.5 does the same check
-+AC_CHECK_LIB(rt, clock_gettime)
- case "$target" in
- *-*-aix4* | *-*-sco*)
- # (grr) aix 4.3 has a stub for clock_gettime, (returning ENOSYS)
-diff -ruN a/include/config.h.in b/include/config.h.in
---- a/include/config.h.in 2010-10-12 00:39:59.000000000 +0400
-+++ b/include/config.h.in 2010-11-24 17:53:34.000000000 +0300
-@@ -498,6 +498,9 @@
- /* Define to 1 if you have the `pthread' library (-lpthread). */
- #undef HAVE_LIBPTHREAD
-
-+/* Define to 1 if you have the `rt' library (-lrt). */
-+#undef HAVE_LIBRT
-+
- /* Define if have -lwrap */
- #undef HAVE_LIBWRAP
-
-@@ -842,6 +845,9 @@
- /* Define to 1 if you have the `strpbrk' function. */
- #undef HAVE_STRPBRK
-
-+/* Define to 1 if you have the `strsep' function. */
-+#undef HAVE_STRSEP
-+
- /* Define to 1 if you have the `strsignal' function. */
- #undef HAVE_STRSIGNAL
-
-@@ -863,7 +869,7 @@
- /* Define to 1 if you have the `strtoull' function. */
- #undef HAVE_STRTOULL
-
--/* Define to 1 if `st_rdev' is member of `struct stat'. */
-+/* Define to 1 if `st_rdev' is a member of `struct stat'. */
- #undef HAVE_STRUCT_STAT_ST_RDEV
-
- /* Define to 1 if your `struct stat' has `st_rdev'. Deprecated, use
-@@ -1158,6 +1164,9 @@
- /* Define to the one symbol short name of this package. */
- #undef PACKAGE_TARNAME
-
-+/* Define to the home page for this package. */
-+#undef PACKAGE_URL
-+
- /* Define to the version of this package. */
- #undef PACKAGE_VERSION
-
diff -ruN a/include/mysql/plugin.h b/include/mysql/plugin.h
---- a/include/mysql/plugin.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/include/mysql/plugin.h 2010-11-24 17:24:52.000000000 +0300
-@@ -705,6 +705,9 @@
+--- a/include/mysql/plugin.h 2010-12-03 20:58:24.000000000 +0300
++++ b/include/mysql/plugin.h 2010-12-31 06:06:43.000000000 +0300
+@@ -547,6 +547,9 @@
unsigned long thd_log_slow_verbosity(const MYSQL_THD thd);
int thd_opt_slow_log();
#define EXTENDED_SLOWLOG
Create a temporary file.
diff -ruN a/include/mysql_com.h b/include/mysql_com.h
---- a/include/mysql_com.h 2010-10-12 00:34:28.000000000 +0400
-+++ b/include/mysql_com.h 2010-11-24 17:28:26.000000000 +0300
-@@ -29,6 +29,7 @@
+--- a/include/mysql_com.h 2010-12-03 20:58:24.000000000 +0300
++++ b/include/mysql_com.h 2010-12-31 06:12:05.000000000 +0300
+@@ -31,6 +31,7 @@
#define SERVER_VERSION_LENGTH 60
#define SQLSTATE_LENGTH 5
+#define LIST_PROCESS_HOST_LEN 64
/*
- USER_HOST_BUFF_SIZE -- length of string buffer, that is enough to contain
-@@ -115,6 +116,12 @@
- thread */
- #define REFRESH_MASTER 128 /* Remove all bin logs in the index
- and truncate the index */
-+#define REFRESH_TABLE_STATS 256 /* Refresh table stats hash table */
-+#define REFRESH_INDEX_STATS 512 /* Refresh index stats hash table */
-+#define REFRESH_USER_STATS 1024 /* Refresh user stats hash table */
-+#define REFRESH_SLOW_QUERY_LOG 2048 /* Flush slow query log and rotate*/
-+#define REFRESH_CLIENT_STATS 4096 /* Refresh client stats hash table */
-+#define REFRESH_THREAD_STATS 8192 /* Refresh thread stats hash table */
-
- /* The following can't be set with mysql_refresh() */
- #define REFRESH_READ_LOCK 16384 /* Lock tables for read */
-diff -ruN /dev/null b/patch_info/userstats.info
---- /dev/null 1970-01-01 00:00:00.000000000 +0000
-+++ b/patch_info/userstats.info 2010-11-24 17:24:52.000000000 +0300
-@@ -0,0 +1,11 @@
+ Maximum length of comments
+@@ -142,6 +143,11 @@
+ #define REFRESH_DES_KEY_FILE 0x40000L
+ #define REFRESH_USER_RESOURCES 0x80000L
+ #define REFRESH_QUERY_RESPONSE_TIME 0x100000L /* response time distibution */
++#define REFRESH_TABLE_STATS 0x200000L /* Refresh table stats my_hash table */
++#define REFRESH_INDEX_STATS 0x400000L /* Refresh index stats my_hash table */
++#define REFRESH_USER_STATS 0x800000L /* Refresh user stats my_hash table */
++#define REFRESH_CLIENT_STATS 0x1000000L /* Refresh client stats my_hash table */
++#define REFRESH_THREAD_STATS 0x2000000L /* Refresh thread stats my_hash table */
+
+ #define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */
+ #define CLIENT_FOUND_ROWS 2 /* Found instead of affected rows */
+diff -ruN a/patch_info/userstats.patch b/patch_info/userstats.patch
+--- a/patch_info/userstats.patch 1970-01-01 03:00:00.000000000 +0300
++++ b/patch_info/userstats.patch 2010-12-30 00:45:46.000000000 +0300
+@@ -0,0 +1,15 @@
+File=userstats.patch
+Name=SHOW USER/TABLE/INDEX statistics
+Version=V2
+
+2008-11-26
+YK: add switch variable "userstat_running" to control INFORMATION_SCHEMA.*_STATISTICS (default:OFF)
++2010-12-31
++Ported to 5.5.8
++2011-1-5
++Fix porting
diff -ruN a/sql/handler.cc b/sql/handler.cc
---- a/sql/handler.cc 2010-10-12 00:34:25.000000000 +0400
-+++ b/sql/handler.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -1194,6 +1194,8 @@
+--- a/sql/handler.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/handler.cc 2010-12-30 00:59:23.000000000 +0300
+@@ -1239,6 +1239,8 @@
if (cookie)
tc_log->unlog(cookie, xid);
DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+ if (is_real_trans)
+ thd->diff_commit_trans++;
+ RUN_HOOK(transaction, after_commit, (thd, FALSE));
end:
- if (rw_trans)
- start_waiting_global_read_lock(thd);
-@@ -1324,6 +1326,8 @@
- /* Always cleanup. Even if there nht==0. There may be savepoints. */
+ if (rw_trans && mdl_request.ticket)
+@@ -1393,6 +1395,8 @@
+ /* Always cleanup. Even if nht==0. There may be savepoints. */
if (is_real_trans)
thd->transaction.cleanup();
+
+ thd->diff_rollback_trans++;
- #endif /* USING_TRANSACTIONS */
if (all)
thd->transaction_rollback_request= FALSE;
-@@ -1762,6 +1766,7 @@
+
+@@ -1796,6 +1800,7 @@
ha_info->reset(); /* keep it conveniently zero-filled */
}
trans->ha_list= sv->ha_list;
DBUG_RETURN(error);
}
-@@ -2122,6 +2127,8 @@
+@@ -2165,6 +2170,8 @@
dup_ref=ref+ALIGN_SIZE(ref_length);
cached_table_flags= table_flags();
}
-+ rows_read = rows_changed = 0;
++ rows_read= rows_changed= 0;
+ memset(index_rows_read, 0, sizeof(index_rows_read));
DBUG_RETURN(error);
}
-@@ -3571,6 +3578,111 @@
+@@ -3596,6 +3603,127 @@
return;
}
+// Updates the global table stats with the TABLE this handler represents.
-+void handler::update_global_table_stats() {
-+ if (!opt_userstat_running) {
-+ rows_read = rows_changed = 0;
++void handler::update_global_table_stats()
++{
++ if (!opt_userstat_running)
++ {
++ rows_read= rows_changed= 0;
+ return;
+ }
+
-+ if (!rows_read && !rows_changed) return; // Nothing to update.
++ if (!rows_read && !rows_changed)
++ return; // Nothing to update.
+ // table_cache_key is db_name + '\0' + table_name + '\0'.
-+ if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++ if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str)
++ return;
+
+ TABLE_STATS* table_stats;
+ char key[NAME_LEN * 2 + 2];
+ // [db] + '.' + [table]
+ sprintf(key, "%s.%s", table->s->table_cache_key.str, table->s->table_name.str);
+
-+ pthread_mutex_lock(&LOCK_global_table_stats);
++ mysql_mutex_lock(&LOCK_global_table_stats);
+ // Gets the global table stats, creating one if necessary.
-+ if (!(table_stats = (TABLE_STATS*)hash_search(&global_table_stats,
-+ (uchar*)key,
-+ strlen(key)))) {
-+ if (!(table_stats = ((TABLE_STATS*)
-+ my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++ if (!(table_stats = (TABLE_STATS *) my_hash_search(&global_table_stats,
++ (uchar*)key,
++ strlen(key))))
++ {
++ if (!(table_stats = ((TABLE_STATS *)
++ my_malloc(sizeof(TABLE_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++ {
+ // Out of memory.
+ sql_print_error("Allocating table stats failed.");
+ goto end;
+ }
+ strncpy(table_stats->table, key, sizeof(table_stats->table));
-+ table_stats->rows_read = 0;
-+ table_stats->rows_changed = 0;
-+ table_stats->rows_changed_x_indexes = 0;
-+ table_stats->engine_type = (int) ht->db_type;
++ table_stats->rows_read= 0;
++ table_stats->rows_changed= 0;
++ table_stats->rows_changed_x_indexes= 0;
++ table_stats->engine_type= (int) ht->db_type;
+
-+ if (my_hash_insert(&global_table_stats, (uchar*)table_stats)) {
++ if (my_hash_insert(&global_table_stats, (uchar *) table_stats))
++ {
+ // Out of memory.
+ sql_print_error("Inserting table stats failed.");
-+ my_free((char*)table_stats, 0);
++ my_free((char *) table_stats);
+ goto end;
+ }
+ }
+ // Updates the global table stats.
-+ table_stats->rows_read += rows_read;
-+ table_stats->rows_changed += rows_changed;
-+ table_stats->rows_changed_x_indexes +=
-+ rows_changed * (table->s->keys ? table->s->keys : 1);
-+ current_thd->diff_total_read_rows += rows_read;
-+ rows_read = rows_changed = 0;
++ table_stats->rows_read+= rows_read;
++ table_stats->rows_changed+= rows_changed;
++ table_stats->rows_changed_x_indexes+=
++ rows_changed * (table->s->keys ? table->s->keys : 1);
++ current_thd->diff_total_read_rows+= rows_read;
++ rows_read= rows_changed= 0;
+end:
-+ pthread_mutex_unlock(&LOCK_global_table_stats);
++ mysql_mutex_unlock(&LOCK_global_table_stats);
+}
+
+// Updates the global index stats with this handler's accumulated index reads.
-+void handler::update_global_index_stats() {
++void handler::update_global_index_stats()
++{
+ // table_cache_key is db_name + '\0' + table_name + '\0'.
-+ if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str) return;
++ if (!table->s || !table->s->table_cache_key.str || !table->s->table_name.str)
++ return;
+
-+ if (!opt_userstat_running) {
-+ for (uint x = 0; x < table->s->keys; x++) {
-+ index_rows_read[x] = 0;
++ if (!opt_userstat_running)
++ {
++ for (uint x= 0; x < table->s->keys; ++x)
++ {
++ index_rows_read[x]= 0;
+ }
+ return;
+ }
+
-+ for (uint x = 0; x < table->s->keys; x++) {
-+ if (index_rows_read[x]) {
++ for (uint x = 0; x < table->s->keys; ++x)
++ {
++ if (index_rows_read[x])
++ {
+ // Rows were read using this index.
+ KEY* key_info = &table->key_info[x];
+
+ sprintf(key, "%s.%s.%s", table->s->table_cache_key.str,
+ table->s->table_name.str, key_info->name);
+
-+ pthread_mutex_lock(&LOCK_global_index_stats);
++ mysql_mutex_lock(&LOCK_global_index_stats);
+ // Gets the global index stats, creating one if necessary.
-+ if (!(index_stats = (INDEX_STATS*)hash_search(&global_index_stats,
-+ (uchar*)key,
-+ strlen(key)))) {
-+ if (!(index_stats = ((INDEX_STATS*)
-+ my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL))))) {
++ if (!(index_stats = (INDEX_STATS *) my_hash_search(&global_index_stats,
++ (uchar *) key,
++ strlen(key))))
++ {
++ if (!(index_stats = ((INDEX_STATS *)
++ my_malloc(sizeof(INDEX_STATS), MYF(MY_WME | MY_ZEROFILL)))))
++ {
+ // Out of memory.
+ sql_print_error("Allocating index stats failed.");
+ goto end;
+ }
+ strncpy(index_stats->index, key, sizeof(index_stats->index));
-+ index_stats->rows_read = 0;
++ index_stats->rows_read= 0;
+
-+ if (my_hash_insert(&global_index_stats, (uchar*)index_stats)) {
++ if (my_hash_insert(&global_index_stats, (uchar *) index_stats))
++ {
+ // Out of memory.
+ sql_print_error("Inserting index stats failed.");
-+ my_free((char*)index_stats, 0);
++ my_free((char *) index_stats);
+ goto end;
+ }
+ }
+ // Updates the global index stats.
-+ index_stats->rows_read += index_rows_read[x];
-+ index_rows_read[x] = 0;
-+end:
-+ pthread_mutex_unlock(&LOCK_global_index_stats);
++ index_stats->rows_read+= index_rows_read[x];
++ index_rows_read[x]= 0;
++ end:
++ mysql_mutex_unlock(&LOCK_global_index_stats);
+ }
+ }
+}
/****************************************************************************
** Some general functions that isn't in the handler class
diff -ruN a/sql/handler.h b/sql/handler.h
---- a/sql/handler.h 2010-10-12 00:34:25.000000000 +0400
-+++ b/sql/handler.h 2010-11-24 17:28:49.000000000 +0300
-@@ -30,6 +30,10 @@
-
- #define USING_TRANSACTIONS
+--- a/sql/handler.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/handler.h 2010-12-31 05:10:00.000000000 +0300
+@@ -33,6 +33,10 @@
+ #include <ft_global.h>
+ #include <keycache.h>
+#if MAX_KEY > 128
+#error MAX_KEY is too large. Values up to 128 are supported.
// the following is for checking tables
#define HA_ADMIN_ALREADY_DONE 1
-@@ -1121,6 +1125,9 @@
+@@ -561,10 +565,12 @@
+ enum enum_schema_tables
+ {
+ SCH_CHARSETS= 0,
++ SCH_CLIENT_STATS,
+ SCH_COLLATIONS,
+ SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
+ SCH_COLUMNS,
+ SCH_COLUMN_PRIVILEGES,
++ SCH_INDEX_STATS,
+ SCH_ENGINES,
+ SCH_EVENTS,
+ SCH_FILES,
+@@ -592,9 +598,12 @@
+ SCH_TABLE_CONSTRAINTS,
+ SCH_TABLE_NAMES,
+ SCH_TABLE_PRIVILEGES,
++ SCH_TABLE_STATS,
+ SCH_TEMPORARY_TABLES,
++ SCH_THREAD_STATS,
+ SCH_TRIGGERS,
+ SCH_USER_PRIVILEGES,
++ SCH_USER_STATS,
+ SCH_VARIABLES,
+ SCH_VIEWS
+ };
+@@ -1209,6 +1218,9 @@
bool locked;
bool implicit_emptied; /* Can be !=0 only if HEAP */
const COND *pushed_cond;
/**
next_insert_id is the next value which should be inserted into the
auto_increment column: in a inserting-multi-row statement (like INSERT
-@@ -1158,9 +1165,11 @@
+@@ -1260,10 +1272,12 @@
ref_length(sizeof(my_off_t)),
ft_handler(0), inited(NONE),
locked(FALSE), implicit_emptied(0),
- pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
+ pushed_cond(0), rows_read(0), rows_changed(0), next_insert_id(0), insert_id_for_cur_row(0),
- auto_inc_intervals_count(0)
+ auto_inc_intervals_count(0),
+ m_psi(NULL)
- {}
+ {
+ memset(index_rows_read, 0, sizeof(index_rows_read));
virtual ~handler(void)
{
DBUG_ASSERT(locked == FALSE);
-@@ -1284,6 +1293,8 @@
+@@ -1386,6 +1400,8 @@
{
table= table_arg;
table_share= share;
-+ rows_read = rows_changed = 0;
++ rows_read = rows_changed= 0;
+ memset(index_rows_read, 0, sizeof(index_rows_read));
}
virtual double scan_time()
{ return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; }
-@@ -1628,6 +1639,8 @@
+@@ -1753,6 +1769,8 @@
virtual bool is_crashed() const { return 0; }
virtual bool auto_repair() const { return 0; }
#define CHF_CREATE_FLAG 0
#define CHF_DELETE_FLAG 1
diff -ruN a/sql/lex.h b/sql/lex.h
---- a/sql/lex.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/lex.h 2010-11-24 17:28:26.000000000 +0300
-@@ -106,6 +106,7 @@
- { "CHECKSUM", SYM(CHECKSUM_SYM)},
+--- a/sql/lex.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/lex.h 2010-12-30 01:25:40.000000000 +0300
+@@ -111,6 +111,7 @@
{ "CIPHER", SYM(CIPHER_SYM)},
+ { "CLASS_ORIGIN", SYM(CLASS_ORIGIN_SYM)},
{ "CLIENT", SYM(CLIENT_SYM)},
+ { "CLIENT_STATISTICS", SYM(CLIENT_STATS_SYM)},
{ "CLOSE", SYM(CLOSE_SYM)},
{ "COALESCE", SYM(COALESCE)},
{ "CODE", SYM(CODE_SYM)},
-@@ -245,6 +246,7 @@
+@@ -257,6 +258,7 @@
{ "IN", SYM(IN_SYM)},
{ "INDEX", SYM(INDEX_SYM)},
{ "INDEXES", SYM(INDEXES)},
{ "INFILE", SYM(INFILE)},
{ "INITIAL_SIZE", SYM(INITIAL_SIZE_SYM)},
{ "INNER", SYM(INNER_SYM)},
-@@ -478,6 +480,7 @@
- { "SIGNED", SYM(SIGNED_SYM)},
- { "SIMPLE", SYM(SIMPLE_SYM)},
- { "SLAVE", SYM(SLAVE)},
-+ { "SLOW", SYM(SLOW_SYM)},
- { "SNAPSHOT", SYM(SNAPSHOT_SYM)},
- { "SMALLINT", SYM(SMALLINT)},
- { "SOCKET", SYM(SOCKET_SYM)},
-@@ -527,12 +530,14 @@
+@@ -550,12 +552,14 @@
{ "TABLES", SYM(TABLES)},
{ "TABLESPACE", SYM(TABLESPACE)},
{ "TABLE_CHECKSUM", SYM(TABLE_CHECKSUM_SYM)},
{ "TIME", SYM(TIME_SYM)},
{ "TIMESTAMP", SYM(TIMESTAMP)},
{ "TIMESTAMPADD", SYM(TIMESTAMP_ADD)},
-@@ -568,6 +573,7 @@
+@@ -591,6 +595,7 @@
{ "USE", SYM(USE_SYM)},
{ "USER", SYM(USER)},
{ "USER_RESOURCES", SYM(RESOURCES)},
{ "USING", SYM(USING)},
{ "UTC_DATE", SYM(UTC_DATE_SYM)},
diff -ruN a/sql/log.cc b/sql/log.cc
---- a/sql/log.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/log.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -826,6 +826,13 @@
+--- a/sql/log.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/log.cc 2010-12-30 01:55:35.000000000 +0300
+@@ -922,6 +922,13 @@
mysql_slow_log.reopen_file();
}
/*
Log error with all enabled log event handlers
-@@ -937,6 +944,21 @@
- return rc;
- }
-
-+bool LOGGER::flush_slow_log(THD *thd)
-+{
-+ /*
-+ Now we lock logger, as nobody should be able to use logging routines while
-+ log tables are closed
-+ */
-+ logger.lock_exclusive();
-+
-+ /* reopen log files */
-+ file_log_handler->flush_slow_log();
-+
-+ /* end of log flush */
-+ logger.unlock();
-+ return 0;
-+}
-
- /*
- Log slow query with all enabled log event handlers
-@@ -4495,6 +4517,8 @@
+@@ -4843,6 +4850,8 @@
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
if (e.write(file))
goto err;
+ if (file == &log_file)
-+ thd->binlog_bytes_written += e.data_written;
++ thd->binlog_bytes_written+= e.data_written;
}
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
{
-@@ -4506,12 +4530,16 @@
+@@ -4854,12 +4863,16 @@
minimum());
if (e.write(file))
goto err;
+ if (file == &log_file)
-+ thd->binlog_bytes_written += e.data_written;
++ thd->binlog_bytes_written+= e.data_written;
}
if (thd->rand_used)
{
if (e.write(file))
goto err;
+ if (file == &log_file)
-+ thd->binlog_bytes_written += e.data_written;
++ thd->binlog_bytes_written+= e.data_written;
}
if (thd->user_var_events.elements)
{
-@@ -4527,6 +4555,8 @@
- user_var_event->charset_number);
+@@ -4882,6 +4895,8 @@
+ flags);
if (e.write(file))
goto err;
+ if (file == &log_file)
-+ thd->binlog_bytes_written += e.data_written;
++ thd->binlog_bytes_written+= e.data_written;
}
}
}
-@@ -4539,6 +4569,8 @@
- if (event_info->write(file) ||
+@@ -4893,6 +4908,8 @@
+ if (event_info->write(file) ||
DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
goto err;
+ if (file == &log_file)
-+ thd->binlog_bytes_written += event_info->data_written;
++ thd->binlog_bytes_written+= event_info->data_written;
- if (file == &log_file) // we are writing to the real log (disk)
- {
-@@ -4684,7 +4716,7 @@
+ error= 0;
+
+@@ -5056,7 +5073,8 @@
be reset as a READ_CACHE to be able to read the contents from it.
*/
-int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
-+int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool sync_log)
++int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache,
++ bool lock_log, bool sync_log)
{
Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
-@@ -4732,6 +4764,7 @@
+@@ -5103,6 +5121,7 @@
/* write the first half of the split header */
if (my_b_write(&log_file, header, carry))
return ER_ERROR_ON_WRITE;
-+ thd->binlog_bytes_written += carry;
++ thd->binlog_bytes_written+= carry;
/*
copy fixed second half of header to cache so the correct
-@@ -4800,6 +4833,7 @@
+@@ -5171,6 +5190,7 @@
/* Write data to the binary log file */
if (my_b_write(&log_file, cache->read_pos, length))
return ER_ERROR_ON_WRITE;
-+ thd->binlog_bytes_written += length;
++ thd->binlog_bytes_written+= length;
cache->read_pos=cache->read_end; // Mark buffer used up
} while ((length= my_b_fill(cache)));
-@@ -4922,21 +4956,24 @@
- */
+@@ -5281,20 +5301,23 @@
+ Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0);
if (qinfo.write(&log_file))
goto err;
-+ thd->binlog_bytes_written += qinfo.data_written;
-
++ thd->binlog_bytes_written+= qinfo.data_written;
DBUG_EXECUTE_IF("crash_before_writing_xid",
{
- if ((write_error= write_cache(cache, false, true)))
if (commit_event && commit_event->write(&log_file))
goto err;
+ if (commit_event)
-+ thd->binlog_bytes_written += commit_event->data_written;
++ thd->binlog_bytes_written+= commit_event->data_written;
if (incident && write_incident(thd, FALSE))
goto err;
diff -ruN a/sql/log.h b/sql/log.h
---- a/sql/log.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/log.h 2010-11-24 17:24:52.000000000 +0300
-@@ -361,7 +361,7 @@
+--- a/sql/log.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/log.h 2010-12-30 01:56:04.000000000 +0300
+@@ -414,7 +414,8 @@
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
-
bool write_incident(THD *thd, bool lock);
+
- int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
-+ int write_cache(THD *thd, IO_CACHE *cache, bool lock_log, bool flush_and_sync);
++ int write_cache(THD *thd, IO_CACHE *cache,
++ bool lock_log, bool flush_and_sync);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
-@@ -499,6 +499,7 @@
+@@ -566,6 +567,7 @@
const char *sql_text, uint sql_text_len,
CHARSET_INFO *client_cs);
void flush();
void init_pthread_objects();
MYSQL_QUERY_LOG *get_mysql_slow_log() { return &mysql_slow_log; }
MYSQL_QUERY_LOG *get_mysql_log() { return &mysql_log; }
-@@ -543,6 +544,7 @@
- void init_base();
- void init_log_tables();
- bool flush_logs(THD *thd);
-+ bool flush_slow_log(THD *thd);
- /* Perform basic logger cleanup. this will leave e.g. error log open. */
- void cleanup_base();
- /* Free memory. Nothing could be logged after this function is called */
-diff -ruN a/sql/mysql_priv.h b/sql/mysql_priv.h
---- a/sql/mysql_priv.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/mysql_priv.h 2010-11-24 17:31:34.000000000 +0300
-@@ -1139,7 +1139,17 @@
- bool multi_delete_set_locks_and_link_aux_tables(LEX *lex);
- void init_max_user_conn(void);
- void init_update_queries(void);
-+void init_global_user_stats(void);
-+void init_global_table_stats(void);
-+void init_global_index_stats(void);
-+void init_global_client_stats(void);
-+void init_global_thread_stats(void);
- void free_max_user_conn(void);
-+void free_global_user_stats(void);
-+void free_global_table_stats(void);
-+void free_global_index_stats(void);
-+void free_global_client_stats(void);
-+void free_global_thread_stats(void);
- pthread_handler_t handle_bootstrap(void *arg);
- int mysql_execute_command(THD *thd);
- bool do_command(THD *thd);
-@@ -2015,6 +2025,7 @@
- extern ulong max_connect_errors, connect_timeout;
- extern ulong slave_net_timeout, slave_trans_retries;
- extern uint max_user_connections;
-+extern ulonglong denied_connections;
- extern ulong what_to_log,flush_time;
- extern ulong query_buff_size;
- extern ulong max_prepared_stmt_count, prepared_stmt_count;
-@@ -2068,6 +2079,7 @@
- extern my_bool opt_slave_compressed_protocol, use_temp_pool;
- extern ulong slave_exec_mode_options;
- extern my_bool opt_readonly, lower_case_file_system;
-+extern my_bool opt_userstat_running, opt_thread_statistics;
- extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
- extern my_bool opt_secure_auth;
- extern char* opt_secure_file_priv;
-@@ -2132,6 +2144,15 @@
- extern struct system_variables max_system_variables;
- extern struct system_status_var global_status_var;
- extern struct rand_struct sql_rand;
-+extern HASH global_user_stats;
-+extern HASH global_client_stats;
-+extern HASH global_thread_stats;
-+extern pthread_mutex_t LOCK_global_user_client_stats;
-+extern HASH global_table_stats;
-+extern pthread_mutex_t LOCK_global_table_stats;
-+extern HASH global_index_stats;
-+extern pthread_mutex_t LOCK_global_index_stats;
-+extern pthread_mutex_t LOCK_stats;
-
- extern const char *opt_date_time_formats[];
- extern KNOWN_DATE_TIME_FORMAT known_date_time_formats[];
diff -ruN a/sql/mysqld.cc b/sql/mysqld.cc
---- a/sql/mysqld.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/mysqld.cc 2010-11-24 17:31:34.000000000 +0300
-@@ -533,6 +533,7 @@
+--- a/sql/mysqld.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/mysqld.cc 2010-12-30 02:04:50.000000000 +0300
+@@ -438,6 +438,7 @@
uint opt_debug_sync_timeout= 0;
#endif /* defined(ENABLED_DEBUG_SYNC) */
my_bool opt_old_style_user_limits= 0, trust_function_creators= 0;
+my_bool opt_userstat_running= 0, opt_thread_statistics= 0;
+ my_bool opt_optimizer_fix= 0;
/*
True if there is at least one per-hour limit for some user, so we should
- check them before each query (and possibly reset counters when hour is
-@@ -581,6 +582,7 @@
+@@ -486,6 +487,7 @@
+ ulong specialflag=0;
ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
ulong max_connections, max_connect_errors;
- uint max_user_connections= 0;
-+ulonglong denied_connections = 0;
- /**
- Limit of the total number of prepared statements in the server.
- Is necessary to protect the server against out-of-memory attacks.
-@@ -682,6 +684,10 @@
- LOCK_global_system_variables,
- LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
- LOCK_connection_count;
-+pthread_mutex_t LOCK_stats;
-+pthread_mutex_t LOCK_global_user_client_stats;
-+pthread_mutex_t LOCK_global_table_stats;
-+pthread_mutex_t LOCK_global_index_stats;
++ulonglong denied_connections= 0;
+
+ /* flashcache */
+ int cachedev_fd;
+@@ -630,7 +632,9 @@
+ LOCK_crypt,
+ LOCK_global_system_variables,
+ LOCK_user_conn, LOCK_slave_list, LOCK_active_mi,
+- LOCK_connection_count, LOCK_error_messages;
++ LOCK_connection_count, LOCK_error_messages,
++ LOCK_stats, LOCK_global_user_client_stats,
++ LOCK_global_table_stats, LOCK_global_index_stats;
/**
The below lock protects access to two global server variables:
max_prepared_stmt_count and prepared_stmt_count. These variables
-@@ -1367,6 +1373,11 @@
- x_free(opt_secure_file_priv);
- bitmap_free(&temp_pool);
- free_max_user_conn();
+@@ -1490,6 +1494,11 @@
+ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
+ query_response_time_free();
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ free_global_user_stats();
+ free_global_client_stats();
+ free_global_thread_stats();
#ifdef HAVE_REPLICATION
end_slave_list();
#endif
-@@ -1483,6 +1494,10 @@
- (void) pthread_cond_destroy(&COND_thread_cache);
- (void) pthread_cond_destroy(&COND_flush_thread_cache);
- (void) pthread_cond_destroy(&COND_manager);
-+ (void) pthread_mutex_destroy(&LOCK_stats);
-+ (void) pthread_mutex_destroy(&LOCK_global_user_client_stats);
-+ (void) pthread_mutex_destroy(&LOCK_global_table_stats);
-+ (void) pthread_mutex_destroy(&LOCK_global_index_stats);
+@@ -1593,6 +1602,10 @@
+ mysql_cond_destroy(&COND_thread_cache);
+ mysql_cond_destroy(&COND_flush_thread_cache);
+ mysql_cond_destroy(&COND_manager);
++ mysql_mutex_destroy(&LOCK_stats);
++ mysql_mutex_destroy(&LOCK_global_user_client_stats);
++ mysql_mutex_destroy(&LOCK_global_table_stats);
++ mysql_mutex_destroy(&LOCK_global_index_stats);
}
-
#endif /*EMBEDDED_LIBRARY*/
-@@ -3172,6 +3187,7 @@
+
+@@ -3024,6 +3037,7 @@
{"show_binlog_events", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOG_EVENTS]), SHOW_LONG_STATUS},
{"show_binlogs", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_BINLOGS]), SHOW_LONG_STATUS},
{"show_charsets", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CHARSETS]), SHOW_LONG_STATUS},
+ {"show_client_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CLIENT_STATS]), SHOW_LONG_STATUS},
{"show_collations", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLLATIONS]), SHOW_LONG_STATUS},
- {"show_column_types", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_COLUMN_TYPES]), SHOW_LONG_STATUS},
{"show_contributors", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CONTRIBUTORS]), SHOW_LONG_STATUS},
-@@ -3193,6 +3209,7 @@
+ {"show_create_db", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_CREATE_DB]), SHOW_LONG_STATUS},
+@@ -3044,6 +3058,7 @@
#endif
{"show_function_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS_FUNC]), SHOW_LONG_STATUS},
{"show_grants", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_GRANTS]), SHOW_LONG_STATUS},
{"show_keys", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_KEYS]), SHOW_LONG_STATUS},
{"show_master_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_MASTER_STAT]), SHOW_LONG_STATUS},
{"show_new_master", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_NEW_MASTER]), SHOW_LONG_STATUS},
-@@ -3211,9 +3228,12 @@
- {"show_slave_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_STAT]), SHOW_LONG_STATUS},
+@@ -3063,10 +3078,13 @@
+ {"show_slave_status_nolock", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_SLAVE_NOLOCK_STAT]), SHOW_LONG_STATUS},
{"show_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STATUS]), SHOW_LONG_STATUS},
{"show_storage_engines", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_STORAGE_ENGINES]), SHOW_LONG_STATUS},
+ {"show_table_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATS]), SHOW_LONG_STATUS},
{"show_table_status", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLE_STATUS]), SHOW_LONG_STATUS},
{"show_tables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TABLES]), SHOW_LONG_STATUS},
+ {"show_temporary_tables",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TEMPORARY_TABLES]), SHOW_LONG_STATUS},
+ {"show_thread_statistics",(char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_THREAD_STATS]), SHOW_LONG_STATUS},
{"show_triggers", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_TRIGGERS]), SHOW_LONG_STATUS},
+ {"show_user_statistics", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_USER_STATS]), SHOW_LONG_STATUS},
{"show_variables", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_VARIABLES]), SHOW_LONG_STATUS},
{"show_warnings", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SHOW_WARNS]), SHOW_LONG_STATUS},
{"slave_start", (char*) offsetof(STATUS_VAR, com_stat[(uint) SQLCOM_SLAVE_START]), SHOW_LONG_STATUS},
-@@ -3652,6 +3672,10 @@
- #endif
- (void) pthread_mutex_init(&LOCK_server_started, MY_MUTEX_INIT_FAST);
- (void) pthread_cond_init(&COND_server_started,NULL);
-+ (void) pthread_mutex_init(&LOCK_stats, MY_MUTEX_INIT_FAST);
-+ (void) pthread_mutex_init(&LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
-+ (void) pthread_mutex_init(&LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
-+ (void) pthread_mutex_init(&LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
+@@ -3587,6 +3605,13 @@
+ mysql_mutex_init(key_LOCK_server_started,
+ &LOCK_server_started, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_COND_server_started, &COND_server_started, NULL);
++ mysql_mutex_init(key_LOCK_stats, &LOCK_stats, MY_MUTEX_INIT_FAST);
++ mysql_mutex_init(key_LOCK_global_user_client_stats,
++ &LOCK_global_user_client_stats, MY_MUTEX_INIT_FAST);
++ mysql_mutex_init(key_LOCK_global_table_stats,
++ &LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
++ mysql_mutex_init(key_LOCK_global_index_stats,
++ &LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
sp_cache_init();
#ifdef HAVE_EVENT_SCHEDULER
Events::init_mutexes();
-@@ -4053,6 +4077,9 @@
- if (!errmesg[0][0])
- unireg_abort(1);
-
+@@ -3956,6 +3981,9 @@
+ query_response_time_init();
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ /* We have to initialize the storage engines before CSV logging */
+ init_global_table_stats();
+ init_global_index_stats();
+
- /* We have to initialize the storage engines before CSV logging */
if (ha_init())
{
-@@ -4199,6 +4226,9 @@
+ sql_print_error("Can't init databases");
+@@ -4092,6 +4120,9 @@
init_max_user_conn();
init_update_queries();
DBUG_RETURN(0);
}
-@@ -5016,6 +5046,7 @@
-
- DBUG_PRINT("error",("Too many connections"));
- close_connection(thd, ER_CON_COUNT_ERROR, 1);
+@@ -5123,6 +5154,7 @@
+ {
+ sql_print_warning("%s", ER_DEFAULT(ER_CON_COUNT_ERROR));
+ }
+ statistic_increment(denied_connections, &LOCK_status);
delete thd;
DBUG_VOID_RETURN;
}
-@@ -5800,6 +5831,8 @@
- OPT_SLAVE_EXEC_MODE,
- OPT_GENERAL_LOG_FILE,
- OPT_SLOW_QUERY_LOG_FILE,
-+ OPT_USERSTAT_RUNNING,
-+ OPT_THREAD_STATISTICS,
- OPT_USE_GLOBAL_LONG_QUERY_TIME,
- OPT_USE_GLOBAL_LOG_SLOW_CONTROL,
- OPT_SLOW_QUERY_LOG_MICROSECONDS_TIMESTAMP,
-@@ -7292,6 +7325,14 @@
- &max_system_variables.net_wait_timeout, 0, GET_ULONG,
- REQUIRED_ARG, NET_WAIT_TIMEOUT, 1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT),
- 0, 1, 0},
-+ {"userstat_running", OPT_USERSTAT_RUNNING,
-+ "Control USER_STATISTICS, CLIENT_STATISTICS, THREAD_STATISTICS, INDEX_STATISTICS and TABLE_STATISTICS running",
-+ (uchar**) &opt_userstat_running, (uchar**) &opt_userstat_running,
-+ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
-+ {"thread_statistics", OPT_THREAD_STATISTICS,
-+ "Control TABLE_STATISTICS running, when userstat_running is enabled",
-+ (uchar**) &opt_thread_statistics, (uchar**) &opt_thread_statistics,
-+ 0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
- {"binlog-direct-non-transactional-updates", OPT_BINLOG_DIRECT_NON_TRANS_UPDATE,
- "Causes updates to non-transactional engines using statement format to be "
- "written directly to binary log. Before using this option, make sure that "
-diff -ruN a/sql/set_var.cc b/sql/set_var.cc
---- a/sql/set_var.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/set_var.cc 2010-11-24 17:31:34.000000000 +0300
-@@ -556,6 +556,10 @@
- static sys_var_thd_ulong sys_read_buff_size(&vars, "read_buffer_size",
- &SV::read_buff_size);
- static sys_var_opt_readonly sys_readonly(&vars, "read_only", &opt_readonly);
-+static sys_var_bool_ptr sys_userstat_running(&vars, "userstat_running",
-+ &opt_userstat_running);
-+static sys_var_bool_ptr sys_thread_statistics(&vars, "thread_statistics",
-+ &opt_thread_statistics);
- static sys_var_thd_ulong sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size",
- &SV::read_rnd_buff_size);
- static sys_var_thd_ulong sys_div_precincrement(&vars, "div_precision_increment",
+@@ -7820,6 +7852,8 @@
+ key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
+ key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
+ key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
++ key_LOCK_stats, key_LOCK_global_user_client_stats,
++ key_LOCK_global_table_stats, key_LOCK_global_index_stats,
+ key_LOCK_gdl, key_LOCK_global_system_variables,
+ key_LOCK_manager,
+ key_LOCK_prepared_stmt_count,
+@@ -7857,6 +7891,13 @@
+ { &key_LOCK_delayed_insert, "LOCK_delayed_insert", PSI_FLAG_GLOBAL},
+ { &key_LOCK_delayed_status, "LOCK_delayed_status", PSI_FLAG_GLOBAL},
+ { &key_LOCK_error_log, "LOCK_error_log", PSI_FLAG_GLOBAL},
++ { &key_LOCK_stats, "LOCK_stats", PSI_FLAG_GLOBAL},
++ { &key_LOCK_global_user_client_stats,
++ "LOCK_global_user_client_stats", PSI_FLAG_GLOBAL},
++ { &key_LOCK_global_table_stats,
++ "LOCK_global_table_stats", PSI_FLAG_GLOBAL},
++ { &key_LOCK_global_index_stats,
++ "LOCK_global_index_stats", PSI_FLAG_GLOBAL},
+ { &key_LOCK_gdl, "LOCK_gdl", PSI_FLAG_GLOBAL},
+ { &key_LOCK_global_system_variables, "LOCK_global_system_variables", PSI_FLAG_GLOBAL},
+ { &key_LOCK_manager, "LOCK_manager", PSI_FLAG_GLOBAL},
+diff -ruN a/sql/mysqld.h b/sql/mysqld.h
+--- a/sql/mysqld.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/mysqld.h 2010-12-31 06:04:59.000000000 +0300
+@@ -23,6 +23,7 @@
+ #include "my_atomic.h" /* my_atomic_rwlock_t */
+ #include "mysql/psi/mysql_file.h" /* MYSQL_FILE */
+ #include "sql_list.h" /* I_List */
++#include "hash.h"
+
+ class THD;
+ struct handlerton;
+@@ -114,6 +115,7 @@
+ extern ulonglong slave_type_conversions_options;
+ extern my_bool read_only, opt_readonly;
+ extern my_bool lower_case_file_system;
++extern my_bool opt_userstat_running, opt_thread_statistics;
+ extern my_bool opt_optimizer_fix;
+ extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
+ extern my_bool opt_secure_auth;
+@@ -180,6 +182,7 @@
+ extern ulong slave_trans_retries;
+ extern uint slave_net_timeout;
+ extern uint max_user_connections;
++extern ulonglong denied_connections;
+ extern ulong what_to_log,flush_time;
+ extern ulong max_prepared_stmt_count, prepared_stmt_count;
+ extern ulong binlog_cache_size, open_files_limit;
+@@ -205,6 +208,11 @@
+ extern struct system_variables max_system_variables;
+ extern struct system_status_var global_status_var;
+ extern struct rand_struct sql_rand;
++extern HASH global_user_stats;
++extern HASH global_client_stats;
++extern HASH global_thread_stats;
++extern HASH global_table_stats;
++extern HASH global_index_stats;
+ extern const char *opt_date_time_formats[];
+ extern handlerton *partition_hton;
+ extern handlerton *myisam_hton;
+@@ -242,6 +250,8 @@
+ key_delayed_insert_mutex, key_hash_filo_lock, key_LOCK_active_mi,
+ key_LOCK_connection_count, key_LOCK_crypt, key_LOCK_delayed_create,
+ key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
++ key_LOCK_stats, key_LOCK_global_user_client_stats,
++ key_LOCK_global_table_stats, key_LOCK_global_index_stats,
+ key_LOCK_gdl, key_LOCK_global_system_variables,
+ key_LOCK_logger, key_LOCK_manager,
+ key_LOCK_prepared_stmt_count,
+@@ -338,7 +348,9 @@
+ LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
+ LOCK_slave_list, LOCK_active_mi, LOCK_manager,
+ LOCK_global_system_variables, LOCK_user_conn,
+- LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count;
++ LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count,
++ LOCK_stats, LOCK_global_user_client_stats,
++ LOCK_global_table_stats, LOCK_global_index_stats;
+ extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_thread_count;
+ #ifdef HAVE_OPENSSL
+ extern mysql_mutex_t LOCK_des_key_file;
+@@ -450,6 +462,16 @@
+ return id;
+ }
+
++void init_global_user_stats(void);
++void init_global_table_stats(void);
++void init_global_index_stats(void);
++void init_global_client_stats(void);
++void init_global_thread_stats(void);
++void free_global_user_stats(void);
++void free_global_table_stats(void);
++void free_global_index_stats(void);
++void free_global_client_stats(void);
++void free_global_thread_stats(void);
+
+ /*
+ TODO: Replace this with an inline function.
diff -ruN a/sql/sql_base.cc b/sql/sql_base.cc
---- a/sql/sql_base.cc 2010-10-12 00:34:33.000000000 +0400
-+++ b/sql/sql_base.cc 2010-11-24 17:29:05.000000000 +0300
-@@ -1382,6 +1382,12 @@
- DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx", table->s->db.str,
- table->s->table_name.str, (long) table));
+--- a/sql/sql_base.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_base.cc 2010-12-30 02:33:17.000000000 +0300
+@@ -1524,6 +1524,11 @@
+ table->mdl_ticket= NULL;
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ if(table->file)
+ {
+ table->file->update_global_table_stats();
+ table->file->update_global_index_stats();
+ }
-+
*table_ptr=table->next;
- /*
- When closing a MERGE parent or child table, detach the children first.
-@@ -1922,6 +1928,8 @@
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+
+@@ -2149,6 +2154,8 @@
DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
table->s->db.str, table->s->table_name.str));
closefrm(table, 0);
if (delete_table)
diff -ruN a/sql/sql_class.cc b/sql/sql_class.cc
---- a/sql/sql_class.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_class.cc 2010-11-24 17:31:33.000000000 +0300
-@@ -706,6 +706,13 @@
+--- a/sql/sql_class.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_class.cc 2010-12-30 02:41:40.000000000 +0300
+@@ -601,6 +601,13 @@
mysys_var=0;
binlog_evt_union.do_union= FALSE;
enable_slow_log= 0;
-+ busy_time = 0;
-+ cpu_time = 0;
-+ bytes_received = 0;
-+ bytes_sent = 0;
-+ binlog_bytes_written = 0;
-+ updated_row_count = 0;
-+ sent_row_count_2 = 0;
++ busy_time= 0;
++ cpu_time= 0;
++ bytes_received= 0;
++ bytes_sent= 0;
++ binlog_bytes_written= 0;
++ updated_row_count= 0;
++ sent_row_count_2= 0;
#ifndef DBUG_OFF
dbug_sentry=THD_SENTRY_MAGIC;
#endif
-@@ -909,6 +916,7 @@
- reset_current_stmt_binlog_row_based();
- bzero((char *) &status_var, sizeof(status_var));
- sql_log_bin_toplevel= options & OPTION_BIN_LOG;
+@@ -981,6 +988,7 @@
+ variables.option_bits|= OPTION_BIN_LOG;
+ else
+ variables.option_bits&= ~OPTION_BIN_LOG;
+ reset_stats();
#if defined(ENABLED_DEBUG_SYNC)
/* Initialize the Debug Sync Facility. See debug_sync.cc. */
-@@ -916,6 +924,84 @@
+@@ -988,6 +996,94 @@
#endif /* defined(ENABLED_DEBUG_SYNC) */
}
+// Resets stats in a THD.
-+void THD::reset_stats(void) {
-+ current_connect_time = time(NULL);
-+ last_global_update_time = current_connect_time;
++void THD::reset_stats(void)
++{
++ current_connect_time= time(NULL);
++ last_global_update_time= current_connect_time;
+ reset_diff_stats();
+}
+
+// Resets the 'diff' stats, which are used to update global stats.
-+void THD::reset_diff_stats(void) {
-+ diff_total_busy_time = 0;
-+ diff_total_cpu_time = 0;
-+ diff_total_bytes_received = 0;
-+ diff_total_bytes_sent = 0;
-+ diff_total_binlog_bytes_written = 0;
-+ diff_total_sent_rows = 0;
-+ diff_total_updated_rows = 0;
-+ diff_total_read_rows = 0;
-+ diff_select_commands = 0;
-+ diff_update_commands = 0;
-+ diff_other_commands = 0;
-+ diff_commit_trans = 0;
-+ diff_rollback_trans = 0;
-+ diff_denied_connections = 0;
-+ diff_lost_connections = 0;
-+ diff_access_denied_errors = 0;
-+ diff_empty_queries = 0;
++void THD::reset_diff_stats(void)
++{
++ diff_total_busy_time= 0;
++ diff_total_cpu_time= 0;
++ diff_total_bytes_received= 0;
++ diff_total_bytes_sent= 0;
++ diff_total_binlog_bytes_written= 0;
++ diff_total_sent_rows= 0;
++ diff_total_updated_rows= 0;
++ diff_total_read_rows= 0;
++ diff_select_commands= 0;
++ diff_update_commands= 0;
++ diff_other_commands= 0;
++ diff_commit_trans= 0;
++ diff_rollback_trans= 0;
++ diff_denied_connections= 0;
++ diff_lost_connections= 0;
++ diff_access_denied_errors= 0;
++ diff_empty_queries= 0;
+}
+
+// Updates 'diff' stats of a THD.
-+void THD::update_stats(bool ran_command) {
-+ if (opt_userstat_running) {
-+ diff_total_busy_time += busy_time;
-+ diff_total_cpu_time += cpu_time;
-+ diff_total_bytes_received += bytes_received;
-+ diff_total_bytes_sent += bytes_sent;
-+ diff_total_binlog_bytes_written += binlog_bytes_written;
-+ diff_total_sent_rows += sent_row_count_2;
-+ diff_total_updated_rows += updated_row_count;
++void THD::update_stats(bool ran_command)
++{
++ if (opt_userstat_running)
++ {
++ diff_total_busy_time+= busy_time;
++ diff_total_cpu_time+= cpu_time;
++ diff_total_bytes_received+= bytes_received;
++ diff_total_bytes_sent+= bytes_sent;
++ diff_total_binlog_bytes_written+= binlog_bytes_written;
++ diff_total_sent_rows+= sent_row_count_2;
++ diff_total_updated_rows+= updated_row_count;
+ // diff_total_read_rows is updated in handler.cc.
+
-+ if (ran_command) {
++ if (ran_command)
++ {
+ // The replication thread has the COM_CONNECT command.
+ if ((old_command == COM_QUERY || command == COM_CONNECT) &&
-+ (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END)) {
++ (lex->sql_command >= 0 && lex->sql_command < SQLCOM_END))
++ {
+ // A SQL query.
-+ if (lex->sql_command == SQLCOM_SELECT) {
++ if (lex->sql_command == SQLCOM_SELECT)
++ {
+ diff_select_commands++;
+ if (!sent_row_count_2)
+ diff_empty_queries++;
-+ } else if (! sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND) {
++ }
++ else if (!sql_command_flags[lex->sql_command] & CF_STATUS_COMMAND)
++ {
+ // 'SHOW ' commands become SQLCOM_SELECT.
+ diff_other_commands++;
+ // 'SHOW ' commands shouldn't inflate total sent row count.
-+ diff_total_sent_rows -= sent_row_count_2;
++ diff_total_sent_rows-= sent_row_count_2;
+ } else if (is_update_query(lex->sql_command)) {
+ diff_update_commands++;
+ } else {
+ // diff_access_denied_errors is updated in sql_parse.cc.
+
+ /* reset counters to zero to avoid double-counting since values
-+ are already store in diff_total_*. */
++ are already store in diff_total_*.
++ */
+ }
-+ busy_time = 0;
-+ cpu_time = 0;
-+ bytes_received = 0;
-+ bytes_sent = 0;
-+ binlog_bytes_written = 0;
-+ updated_row_count = 0;
-+ sent_row_count_2 = 0;
++ busy_time= 0;
++ cpu_time= 0;
++ bytes_received= 0;
++ bytes_sent= 0;
++ binlog_bytes_written= 0;
++ updated_row_count= 0;
++ sent_row_count_2= 0;
+}
/*
Init THD for query processing.
-@@ -1547,6 +1633,32 @@
+@@ -1688,6 +1784,32 @@
}
#endif
struct Item_change_record: public ilink
{
-@@ -1734,6 +1846,7 @@
- buffer.set(buff, sizeof(buff), &my_charset_bin);
+@@ -1864,6 +1986,7 @@
}
+
thd->sent_row_count++;
+ thd->sent_row_count_2++;
- if (thd->is_error())
- {
- protocol->remove_last_row();
-@@ -1838,6 +1951,7 @@
+
+ if (thd->vio_ok())
+ DBUG_RETURN(protocol->write());
+@@ -1956,6 +2079,7 @@
select_export::~select_export()
{
thd->sent_row_count=row_count;
-+ thd->sent_row_count_2=row_count;
++ thd->sent_row_count_2= row_count;
}
-@@ -2870,6 +2984,7 @@
+@@ -2979,6 +3103,7 @@
if (likely(thd != 0))
{ /* current_thd==0 when close_connection() calls net_send_error() */
thd->status_var.bytes_sent+= length;
}
}
-@@ -2877,6 +2992,7 @@
+@@ -2986,6 +3111,7 @@
void thd_increment_bytes_received(ulong length)
{
current_thd->status_var.bytes_received+= length;
diff -ruN a/sql/sql_class.h b/sql/sql_class.h
---- a/sql/sql_class.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_class.h 2010-11-24 17:28:57.000000000 +0300
-@@ -1435,6 +1435,8 @@
- first byte of the packet in do_command()
+--- a/sql/sql_class.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_class.h 2010-12-31 05:15:57.000000000 +0300
+@@ -1610,6 +1610,8 @@
*/
enum enum_server_command command;
+ uint32 server_id;
+ // Used to save the command, before it is set to COM_SLEEP.
+ enum enum_server_command old_command;
- uint32 server_id;
uint32 file_id; // for LOAD DATA INFILE
/* remote (peer) port */
-@@ -1828,6 +1830,8 @@
- /* variables.transaction_isolation is reset to this after each commit */
- enum_tx_isolation session_tx_isolation;
+ uint16 peer_port;
+@@ -2081,6 +2083,8 @@
+ */
+ enum_tx_isolation tx_isolation;
enum_check_fields count_cuted_fields;
+ ha_rows updated_row_count;
+ ha_rows sent_row_count_2; /* for userstat */
DYNAMIC_ARRAY user_var_events; /* For user variables replication */
MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */
-@@ -1916,6 +1920,49 @@
+@@ -2176,6 +2180,49 @@
*/
LOG_INFO* current_linfo;
NET* slave_net; // network connection from slave -> m.
+ Used to update global user stats. The global user stats are updated
+ occasionally with the 'diff' variables. After the update, the 'diff'
+ variables are reset to 0.
-+ */
++ */
+ // Time when the current thread connected to MySQL.
+ time_t current_connect_time;
+ // Last time when THD stats were updated in global_user_stats.
/* Used by the sys_var class to store temporary values */
union
{
-@@ -1981,6 +2028,11 @@
+@@ -2256,6 +2303,11 @@
alloc_root.
*/
void init_for_queries();
void change_user(void);
void cleanup(void);
void cleanup_after_query();
-@@ -2351,9 +2403,15 @@
- *p_db= strmake(db, db_length);
- *p_db_length= db_length;
- return FALSE;
-+
-+ // Returns string as 'IP:port' for the client-side of the connnection represented
-+ // by 'client' as displayed by SHOW PROCESSLIST. Allocates memory from the heap of
-+ // this THD and that is not reclaimed immediately, so use sparingly. May return NULL.
+@@ -2726,6 +2778,15 @@
}
thd_scheduler scheduler;
++ /* Returns string as 'IP:port' for the client-side
++ of the connnection represented
++ by 'client' as displayed by SHOW PROCESSLIST.
++ Allocates memory from the heap of
++ this THD and that is not reclaimed
++ immediately, so use sparingly. May return NULL.
++ */
+ char *get_client_host_port(THD *client);
+
public:
inline Internal_error_handler *get_internal_handler()
{ return m_internal_handler; }
-@@ -2437,6 +2495,9 @@
+@@ -2913,6 +2974,10 @@
LEX_STRING invoker_host;
};
-+// Returns string as 'IP' for the client-side of the connection represented by
-+// 'client'. Does not allocate memory. May return "".
++/* Returns string as 'IP' for the client-side of the connection represented by
++ 'client'. Does not allocate memory. May return "".
++*/
+const char *get_client_host(THD *client);
- /** A short cut for thd->main_da.set_ok_status(). */
+ /** A short cut for thd->stmt_da->set_ok_status(). */
diff -ruN a/sql/sql_connect.cc b/sql/sql_connect.cc
---- a/sql/sql_connect.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_connect.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -42,6 +42,24 @@
- extern void win_install_sigabrt_handler();
- #endif
+--- a/sql/sql_connect.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_connect.cc 2010-12-31 03:53:28.000000000 +0300
+@@ -55,6 +55,24 @@
+ #define MIN_HANDSHAKE_SIZE 6
+ #endif /* HAVE_OPENSSL && !EMBEDDED_LIBRARY */
+// Increments connection count for user.
+static int increment_connection_count(THD* thd, bool use_lock);
+HASH global_client_stats;
+HASH global_thread_stats;
+// Protects global_user_stats and global_client_stats
-+extern pthread_mutex_t LOCK_global_user_client_stats;
++extern mysql_mutex_t LOCK_global_user_client_stats;
+
+HASH global_table_stats;
-+extern pthread_mutex_t LOCK_global_table_stats;
++extern mysql_mutex_t LOCK_global_table_stats;
+
+HASH global_index_stats;
-+extern pthread_mutex_t LOCK_global_index_stats;
++extern mysql_mutex_t LOCK_global_index_stats;
+
/*
Get structure for logging connection data for the current user
*/
-@@ -99,6 +117,563 @@
+@@ -112,6 +130,586 @@
}
+extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
+ my_bool not_used __attribute__((unused)))
+{
-+ *length = strlen(user_stats->user);
-+ return (uchar*)user_stats->user;
++ *length= strlen(user_stats->user);
++ return (uchar*) user_stats->user;
+}
+
+extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
+ my_bool not_used __attribute__((unused)))
+{
-+ *length = sizeof(my_thread_id);
-+ return (uchar*)&(thread_stats->id);
++ *length= sizeof(my_thread_id);
++ return (uchar *) &(thread_stats->id);
+}
+
+void free_user_stats(USER_STATS* user_stats)
+{
-+ my_free((char*)user_stats, MYF(0));
++ my_free((char *) user_stats);
+}
+
+void free_thread_stats(THREAD_STATS* thread_stats)
+{
-+ my_free((char*)thread_stats, MYF(0));
++ my_free((char *) thread_stats);
+}
+
+void init_user_stats(USER_STATS *user_stats,
+ strncpy(user_stats->user, user, sizeof(user_stats->user));
+ strncpy(user_stats->priv_user, priv_user, sizeof(user_stats->priv_user));
+
-+ user_stats->total_connections = total_connections;
-+ user_stats->concurrent_connections = concurrent_connections;
-+ user_stats->connected_time = connected_time;
-+ user_stats->busy_time = busy_time;
-+ user_stats->cpu_time = cpu_time;
-+ user_stats->bytes_received = bytes_received;
-+ user_stats->bytes_sent = bytes_sent;
-+ user_stats->binlog_bytes_written = binlog_bytes_written;
-+ user_stats->rows_fetched = rows_fetched;
-+ user_stats->rows_updated = rows_updated;
-+ user_stats->rows_read = rows_read;
-+ user_stats->select_commands = select_commands;
-+ user_stats->update_commands = update_commands;
-+ user_stats->other_commands = other_commands;
-+ user_stats->commit_trans = commit_trans;
-+ user_stats->rollback_trans = rollback_trans;
-+ user_stats->denied_connections = denied_connections;
-+ user_stats->lost_connections = lost_connections;
-+ user_stats->access_denied_errors = access_denied_errors;
-+ user_stats->empty_queries = empty_queries;
++ user_stats->total_connections= total_connections;
++ user_stats->concurrent_connections= concurrent_connections;
++ user_stats->connected_time= connected_time;
++ user_stats->busy_time= busy_time;
++ user_stats->cpu_time= cpu_time;
++ user_stats->bytes_received= bytes_received;
++ user_stats->bytes_sent= bytes_sent;
++ user_stats->binlog_bytes_written= binlog_bytes_written;
++ user_stats->rows_fetched= rows_fetched;
++ user_stats->rows_updated= rows_updated;
++ user_stats->rows_read= rows_read;
++ user_stats->select_commands= select_commands;
++ user_stats->update_commands= update_commands;
++ user_stats->other_commands= other_commands;
++ user_stats->commit_trans= commit_trans;
++ user_stats->rollback_trans= rollback_trans;
++ user_stats->denied_connections= denied_connections;
++ user_stats->lost_connections= lost_connections;
++ user_stats->access_denied_errors= access_denied_errors;
++ user_stats->empty_queries= empty_queries;
+ DBUG_VOID_RETURN;
+}
+
+ DBUG_PRINT("info",
+ ("Add thread_stats entry for thread %lu",
+ id));
-+ thread_stats->id = id;
-+
-+ thread_stats->total_connections = total_connections;
-+ thread_stats->concurrent_connections = concurrent_connections;
-+ thread_stats->connected_time = connected_time;
-+ thread_stats->busy_time = busy_time;
-+ thread_stats->cpu_time = cpu_time;
-+ thread_stats->bytes_received = bytes_received;
-+ thread_stats->bytes_sent = bytes_sent;
-+ thread_stats->binlog_bytes_written = binlog_bytes_written;
-+ thread_stats->rows_fetched = rows_fetched;
-+ thread_stats->rows_updated = rows_updated;
-+ thread_stats->rows_read = rows_read;
-+ thread_stats->select_commands = select_commands;
-+ thread_stats->update_commands = update_commands;
-+ thread_stats->other_commands = other_commands;
-+ thread_stats->commit_trans = commit_trans;
-+ thread_stats->rollback_trans = rollback_trans;
-+ thread_stats->denied_connections = denied_connections;
-+ thread_stats->lost_connections = lost_connections;
-+ thread_stats->access_denied_errors = access_denied_errors;
-+ thread_stats->empty_queries = empty_queries;
++ thread_stats->id= id;
++
++ thread_stats->total_connections= total_connections;
++ thread_stats->concurrent_connections= concurrent_connections;
++ thread_stats->connected_time= connected_time;
++ thread_stats->busy_time= busy_time;
++ thread_stats->cpu_time= cpu_time;
++ thread_stats->bytes_received= bytes_received;
++ thread_stats->bytes_sent= bytes_sent;
++ thread_stats->binlog_bytes_written= binlog_bytes_written;
++ thread_stats->rows_fetched= rows_fetched;
++ thread_stats->rows_updated= rows_updated;
++ thread_stats->rows_read= rows_read;
++ thread_stats->select_commands= select_commands;
++ thread_stats->update_commands= update_commands;
++ thread_stats->other_commands= other_commands;
++ thread_stats->commit_trans= commit_trans;
++ thread_stats->rollback_trans= rollback_trans;
++ thread_stats->denied_connections= denied_connections;
++ thread_stats->lost_connections= lost_connections;
++ thread_stats->access_denied_errors= access_denied_errors;
++ thread_stats->empty_queries= empty_queries;
+ DBUG_VOID_RETURN;
+}
+
+ ulonglong access_denied_errors,
+ ulonglong empty_queries)
+{
-+ user_stats->total_connections += total_connections;
-+ user_stats->concurrent_connections += concurrent_connections;
-+ user_stats->connected_time += connected_time;
-+ user_stats->busy_time += busy_time;
-+ user_stats->cpu_time += cpu_time;
-+ user_stats->bytes_received += bytes_received;
-+ user_stats->bytes_sent += bytes_sent;
-+ user_stats->binlog_bytes_written += binlog_bytes_written;
-+ user_stats->rows_fetched += rows_fetched;
-+ user_stats->rows_updated += rows_updated;
-+ user_stats->rows_read += rows_read;
-+ user_stats->select_commands += select_commands;
-+ user_stats->update_commands += update_commands;
-+ user_stats->other_commands += other_commands;
-+ user_stats->commit_trans += commit_trans;
-+ user_stats->rollback_trans += rollback_trans;
-+ user_stats->denied_connections += denied_connections;
-+ user_stats->lost_connections += lost_connections;
-+ user_stats->access_denied_errors += access_denied_errors;
-+ user_stats->empty_queries += empty_queries;
++ user_stats->total_connections+= total_connections;
++ user_stats->concurrent_connections+= concurrent_connections;
++ user_stats->connected_time+= connected_time;
++ user_stats->busy_time+= busy_time;
++ user_stats->cpu_time+= cpu_time;
++ user_stats->bytes_received+= bytes_received;
++ user_stats->bytes_sent+= bytes_sent;
++ user_stats->binlog_bytes_written+= binlog_bytes_written;
++ user_stats->rows_fetched+= rows_fetched;
++ user_stats->rows_updated+= rows_updated;
++ user_stats->rows_read+= rows_read;
++ user_stats->select_commands+= select_commands;
++ user_stats->update_commands+= update_commands;
++ user_stats->other_commands+= other_commands;
++ user_stats->commit_trans+= commit_trans;
++ user_stats->rollback_trans+= rollback_trans;
++ user_stats->denied_connections+= denied_connections;
++ user_stats->lost_connections+= lost_connections;
++ user_stats->access_denied_errors+= access_denied_errors;
++ user_stats->empty_queries+= empty_queries;
+}
+
+void add_thread_stats(THREAD_STATS *thread_stats,
+ ulonglong access_denied_errors,
+ ulonglong empty_queries)
+{
-+ thread_stats->total_connections += total_connections;
-+ thread_stats->concurrent_connections += concurrent_connections;
-+ thread_stats->connected_time += connected_time;
-+ thread_stats->busy_time += busy_time;
-+ thread_stats->cpu_time += cpu_time;
-+ thread_stats->bytes_received += bytes_received;
-+ thread_stats->bytes_sent += bytes_sent;
-+ thread_stats->binlog_bytes_written += binlog_bytes_written;
-+ thread_stats->rows_fetched += rows_fetched;
-+ thread_stats->rows_updated += rows_updated;
-+ thread_stats->rows_read += rows_read;
-+ thread_stats->select_commands += select_commands;
-+ thread_stats->update_commands += update_commands;
-+ thread_stats->other_commands += other_commands;
-+ thread_stats->commit_trans += commit_trans;
-+ thread_stats->rollback_trans += rollback_trans;
-+ thread_stats->denied_connections += denied_connections;
-+ thread_stats->lost_connections += lost_connections;
-+ thread_stats->access_denied_errors += access_denied_errors;
-+ thread_stats->empty_queries += empty_queries;
++ thread_stats->total_connections+= total_connections;
++ thread_stats->concurrent_connections+= concurrent_connections;
++ thread_stats->connected_time+= connected_time;
++ thread_stats->busy_time+= busy_time;
++ thread_stats->cpu_time+= cpu_time;
++ thread_stats->bytes_received+= bytes_received;
++ thread_stats->bytes_sent+= bytes_sent;
++ thread_stats->binlog_bytes_written+= binlog_bytes_written;
++ thread_stats->rows_fetched+= rows_fetched;
++ thread_stats->rows_updated+= rows_updated;
++ thread_stats->rows_read+= rows_read;
++ thread_stats->select_commands+= select_commands;
++ thread_stats->update_commands+= update_commands;
++ thread_stats->other_commands+= other_commands;
++ thread_stats->commit_trans+= commit_trans;
++ thread_stats->rollback_trans+= rollback_trans;
++ thread_stats->denied_connections+= denied_connections;
++ thread_stats->lost_connections+= lost_connections;
++ thread_stats->access_denied_errors+= access_denied_errors;
++ thread_stats->empty_queries+= empty_queries;
+}
+
+void init_global_user_stats(void)
+{
-+ if (hash_init(&global_user_stats, system_charset_info, max_connections,
-+ 0, 0, (hash_get_key)get_key_user_stats,
-+ (hash_free_key)free_user_stats, 0)) {
++ if (my_hash_init(&global_user_stats, system_charset_info, max_connections,
++ 0, 0, (my_hash_get_key)get_key_user_stats,
++ (my_hash_free_key)free_user_stats, 0)) {
+ sql_print_error("Initializing global_user_stats failed.");
+ exit(1);
+ }
+
+void init_global_client_stats(void)
+{
-+ if (hash_init(&global_client_stats, system_charset_info, max_connections,
-+ 0, 0, (hash_get_key)get_key_user_stats,
-+ (hash_free_key)free_user_stats, 0)) {
++ if (my_hash_init(&global_client_stats, system_charset_info, max_connections,
++ 0, 0, (my_hash_get_key)get_key_user_stats,
++ (my_hash_free_key)free_user_stats, 0)) {
+ sql_print_error("Initializing global_client_stats failed.");
+ exit(1);
+ }
+
+void init_global_thread_stats(void)
+{
-+ if (hash_init(&global_thread_stats, &my_charset_bin, max_connections,
-+ 0, 0, (hash_get_key)get_key_thread_stats,
-+ (hash_free_key)free_thread_stats, 0)) {
++ if (my_hash_init(&global_thread_stats, &my_charset_bin, max_connections,
++ 0, 0, (my_hash_get_key) get_key_thread_stats,
++ (my_hash_free_key) free_thread_stats, 0))
++ {
+ sql_print_error("Initializing global_client_stats failed.");
+ exit(1);
+ }
+extern "C" uchar *get_key_table_stats(TABLE_STATS *table_stats, size_t *length,
+ my_bool not_used __attribute__((unused)))
+{
-+ *length = strlen(table_stats->table);
-+ return (uchar*)table_stats->table;
++ *length= strlen(table_stats->table);
++ return (uchar*) table_stats->table;
+}
+
+extern "C" void free_table_stats(TABLE_STATS* table_stats)
+{
-+ my_free((char*)table_stats, MYF(0));
++ my_free((char*) table_stats);
+}
+
+void init_global_table_stats(void)
+{
-+ if (hash_init(&global_table_stats, system_charset_info, max_connections,
-+ 0, 0, (hash_get_key)get_key_table_stats,
-+ (hash_free_key)free_table_stats, 0)) {
++ if (my_hash_init(&global_table_stats, system_charset_info, max_connections,
++ 0, 0, (my_hash_get_key)get_key_table_stats,
++ (my_hash_free_key)free_table_stats, 0)) {
+ sql_print_error("Initializing global_table_stats failed.");
+ exit(1);
+ }
+extern "C" uchar *get_key_index_stats(INDEX_STATS *index_stats, size_t *length,
+ my_bool not_used __attribute__((unused)))
+{
-+ *length = strlen(index_stats->index);
-+ return (uchar*)index_stats->index;
++ *length= strlen(index_stats->index);
++ return (uchar*) index_stats->index;
+}
+
+extern "C" void free_index_stats(INDEX_STATS* index_stats)
+{
-+ my_free((char*)index_stats, MYF(0));
++ my_free((char*) index_stats);
+}
+
+void init_global_index_stats(void)
+{
-+ if (hash_init(&global_index_stats, system_charset_info, max_connections,
-+ 0, 0, (hash_get_key)get_key_index_stats,
-+ (hash_free_key)free_index_stats, 0)) {
++ if (my_hash_init(&global_index_stats, system_charset_info, max_connections,
++ 0, 0, (my_hash_get_key)get_key_index_stats,
++ (my_hash_free_key)free_index_stats, 0)) {
+ sql_print_error("Initializing global_index_stats failed.");
+ exit(1);
+ }
+
+void free_global_user_stats(void)
+{
-+ hash_free(&global_user_stats);
++ my_hash_free(&global_user_stats);
+}
+
+void free_global_thread_stats(void)
+{
-+ hash_free(&global_thread_stats);
++ my_hash_free(&global_thread_stats);
+}
+
+void free_global_table_stats(void)
+{
-+ hash_free(&global_table_stats);
++ my_hash_free(&global_table_stats);
+}
+
+void free_global_index_stats(void)
+{
-+ hash_free(&global_index_stats);
++ my_hash_free(&global_index_stats);
+}
+
+void free_global_client_stats(void)
+{
-+ hash_free(&global_client_stats);
++ my_hash_free(&global_client_stats);
+}
+
+// 'mysql_system_user' is used for when the user is not defined for a THD.
+{
+ USER_STATS* user_stats;
+
-+ if (!(user_stats = (USER_STATS*)hash_search(users_or_clients, (uchar*) name,
-+ strlen(name))))
++ if (!(user_stats = (USER_STATS *) my_hash_search(users_or_clients,
++ (uchar*) name,
++ strlen(name))))
+ {
+ // First connection for this user or client
-+ if (!(user_stats = ((USER_STATS*)
++ if (!(user_stats = ((USER_STATS *)
+ my_malloc(sizeof(USER_STATS), MYF(MY_WME | MY_ZEROFILL)))))
+ {
+ return 1; // Out of memory
+ 0, // access denied errors
+ 0); // empty queries
+
-+ if (my_hash_insert(users_or_clients, (uchar*)user_stats))
++ if (my_hash_insert(users_or_clients, (uchar *) user_stats))
+ {
-+ my_free((char*)user_stats, 0);
++ my_free((char *) user_stats);
+ return 1; // Out of memory
+ }
+ }
+{
+ THREAD_STATS* thread_stats;
+
-+ if (!(thread_stats = (THREAD_STATS*)hash_search(users_or_clients, (uchar*) &id,
-+ sizeof(my_thread_id))))
++ if (!(thread_stats = (THREAD_STATS *) my_hash_search(users_or_clients,
++ (uchar*) &id,
++ sizeof(my_thread_id))))
+ {
+ // First connection for this user or client
-+ if (!(thread_stats = ((THREAD_STATS*)
++ if (!(thread_stats = ((THREAD_STATS *)
+ my_malloc(sizeof(THREAD_STATS), MYF(MY_WME | MY_ZEROFILL)))))
+ {
+ return 1; // Out of memory
+ 0, // access denied errors
+ 0); // empty queries
+
-+ if (my_hash_insert(users_or_clients, (uchar*)thread_stats))
++ if (my_hash_insert(users_or_clients, (uchar *) thread_stats))
+ {
-+ my_free((char*)thread_stats, 0);
++ my_free((char *) thread_stats);
+ return 1; // Out of memory
+ }
+ }
+ return 0;
+}
+
-+// Increments the global user and client stats connection count. If 'use_lock'
-+// is true, LOCK_global_user_client_stats will be locked/unlocked. Returns
-+// 0 on success, 1 on error.
++/* Increments the global user and client stats connection count. If 'use_lock'
++ is true, LOCK_global_user_client_stats will be locked/unlocked. Returns
++ 0 on success, 1 on error.
++*/
+static int increment_connection_count(THD* thd, bool use_lock)
+{
-+ char* user_string = get_valid_user_string(thd->main_security_ctx.user);
-+ const char* client_string = get_client_host(thd);
-+ int return_value = 0;
++ char* user_string= get_valid_user_string(thd->main_security_ctx.user);
++ const char* client_string= get_client_host(thd);
++ int return_value= 0;
+
+ if (!opt_userstat_running)
+ return return_value;
+
-+ if (use_lock) pthread_mutex_lock(&LOCK_global_user_client_stats);
++ if (use_lock)
++ mysql_mutex_lock(&LOCK_global_user_client_stats);
+
+ if (increment_count_by_name(user_string, user_string,
+ &global_user_stats, thd))
+ {
-+ return_value = 1;
++ return_value= 1;
+ goto end;
+ }
+ if (increment_count_by_name(client_string,
+ user_string,
+ &global_client_stats, thd))
+ {
-+ return_value = 1;
++ return_value= 1;
+ goto end;
+ }
-+ if (opt_thread_statistics) {
++ if (opt_thread_statistics)
++ {
+ if (increment_count_by_id(thd->thread_id, &global_thread_stats, thd))
+ {
-+ return_value = 1;
++ return_value= 1;
+ goto end;
+ }
-+ }
++ }
+
+end:
-+ if (use_lock) pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ if (use_lock)
++ mysql_mutex_unlock(&LOCK_global_user_client_stats);
+ return return_value;
+}
+
+ USER_STATS* user_stats,
+ time_t now)
+{
-+ user_stats->connected_time += now - thd->last_global_update_time;
-+// thd->last_global_update_time = now;
-+ user_stats->busy_time += thd->diff_total_busy_time;
-+ user_stats->cpu_time += thd->diff_total_cpu_time;
-+ user_stats->bytes_received += thd->diff_total_bytes_received;
-+ user_stats->bytes_sent += thd->diff_total_bytes_sent;
-+ user_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
-+ user_stats->rows_fetched += thd->diff_total_sent_rows;
-+ user_stats->rows_updated += thd->diff_total_updated_rows;
-+ user_stats->rows_read += thd->diff_total_read_rows;
-+ user_stats->select_commands += thd->diff_select_commands;
-+ user_stats->update_commands += thd->diff_update_commands;
-+ user_stats->other_commands += thd->diff_other_commands;
-+ user_stats->commit_trans += thd->diff_commit_trans;
-+ user_stats->rollback_trans += thd->diff_rollback_trans;
-+ user_stats->denied_connections += thd->diff_denied_connections;
-+ user_stats->lost_connections += thd->diff_lost_connections;
-+ user_stats->access_denied_errors += thd->diff_access_denied_errors;
-+ user_stats->empty_queries += thd->diff_empty_queries;
++ user_stats->connected_time+= now - thd->last_global_update_time;
++//thd->last_global_update_time= now;
++ user_stats->busy_time+= thd->diff_total_busy_time;
++ user_stats->cpu_time+= thd->diff_total_cpu_time;
++ user_stats->bytes_received+= thd->diff_total_bytes_received;
++ user_stats->bytes_sent+= thd->diff_total_bytes_sent;
++ user_stats->binlog_bytes_written+= thd->diff_total_binlog_bytes_written;
++ user_stats->rows_fetched+= thd->diff_total_sent_rows;
++ user_stats->rows_updated+= thd->diff_total_updated_rows;
++ user_stats->rows_read+= thd->diff_total_read_rows;
++ user_stats->select_commands+= thd->diff_select_commands;
++ user_stats->update_commands+= thd->diff_update_commands;
++ user_stats->other_commands+= thd->diff_other_commands;
++ user_stats->commit_trans+= thd->diff_commit_trans;
++ user_stats->rollback_trans+= thd->diff_rollback_trans;
++ user_stats->denied_connections+= thd->diff_denied_connections;
++ user_stats->lost_connections+= thd->diff_lost_connections;
++ user_stats->access_denied_errors+= thd->diff_access_denied_errors;
++ user_stats->empty_queries+= thd->diff_empty_queries;
+}
+
+static void update_global_thread_stats_with_thread(THD* thd,
+ THREAD_STATS* thread_stats,
+ time_t now)
+{
-+ thread_stats->connected_time += now - thd->last_global_update_time;
-+// thd->last_global_update_time = now;
-+ thread_stats->busy_time += thd->diff_total_busy_time;
-+ thread_stats->cpu_time += thd->diff_total_cpu_time;
-+ thread_stats->bytes_received += thd->diff_total_bytes_received;
-+ thread_stats->bytes_sent += thd->diff_total_bytes_sent;
-+ thread_stats->binlog_bytes_written += thd->diff_total_binlog_bytes_written;
-+ thread_stats->rows_fetched += thd->diff_total_sent_rows;
-+ thread_stats->rows_updated += thd->diff_total_updated_rows;
-+ thread_stats->rows_read += thd->diff_total_read_rows;
-+ thread_stats->select_commands += thd->diff_select_commands;
-+ thread_stats->update_commands += thd->diff_update_commands;
-+ thread_stats->other_commands += thd->diff_other_commands;
-+ thread_stats->commit_trans += thd->diff_commit_trans;
-+ thread_stats->rollback_trans += thd->diff_rollback_trans;
-+ thread_stats->denied_connections += thd->diff_denied_connections;
-+ thread_stats->lost_connections += thd->diff_lost_connections;
-+ thread_stats->access_denied_errors += thd->diff_access_denied_errors;
-+ thread_stats->empty_queries += thd->diff_empty_queries;
++ thread_stats->connected_time+= now - thd->last_global_update_time;
++//thd->last_global_update_time= now;
++ thread_stats->busy_time+= thd->diff_total_busy_time;
++ thread_stats->cpu_time+= thd->diff_total_cpu_time;
++ thread_stats->bytes_received+= thd->diff_total_bytes_received;
++ thread_stats->bytes_sent+= thd->diff_total_bytes_sent;
++ thread_stats->binlog_bytes_written+= thd->diff_total_binlog_bytes_written;
++ thread_stats->rows_fetched+= thd->diff_total_sent_rows;
++ thread_stats->rows_updated+= thd->diff_total_updated_rows;
++ thread_stats->rows_read+= thd->diff_total_read_rows;
++ thread_stats->select_commands+= thd->diff_select_commands;
++ thread_stats->update_commands+= thd->diff_update_commands;
++ thread_stats->other_commands+= thd->diff_other_commands;
++ thread_stats->commit_trans+= thd->diff_commit_trans;
++ thread_stats->rollback_trans+= thd->diff_rollback_trans;
++ thread_stats->denied_connections+= thd->diff_denied_connections;
++ thread_stats->lost_connections+= thd->diff_lost_connections;
++ thread_stats->access_denied_errors+= thd->diff_access_denied_errors;
++ thread_stats->empty_queries+= thd->diff_empty_queries;
+}
+
+// Updates the global stats of a user or client
+void update_global_user_stats(THD* thd, bool create_user, time_t now)
+{
-+ if (opt_userstat_running) {
-+ char* user_string = get_valid_user_string(thd->main_security_ctx.user);
-+ const char* client_string = get_client_host(thd);
++ if (opt_userstat_running)
++ {
++ char* user_string= get_valid_user_string(thd->main_security_ctx.user);
++ const char* client_string= get_client_host(thd);
+
-+ USER_STATS* user_stats;
-+ THREAD_STATS* thread_stats;
-+ pthread_mutex_lock(&LOCK_global_user_client_stats);
-+
-+ // Update by user name
-+ if ((user_stats = (USER_STATS*)hash_search(&global_user_stats,
-+ (uchar*)user_string,
-+ strlen(user_string)))) {
-+ // Found user.
-+ update_global_user_stats_with_user(thd, user_stats, now);
-+ } else {
-+ // Create the entry
-+ if (create_user) {
-+ increment_count_by_name(user_string, user_string,
-+ &global_user_stats, thd);
-+ }
-+ }
++ USER_STATS* user_stats;
++ THREAD_STATS* thread_stats;
++ mysql_mutex_lock(&LOCK_global_user_client_stats);
+
-+ // Update by client IP
-+ if ((user_stats = (USER_STATS*)hash_search(&global_client_stats,
-+ (uchar*)client_string,
-+ strlen(client_string)))) {
-+ // Found by client IP
-+ update_global_user_stats_with_user(thd, user_stats, now);
-+ } else {
-+ // Create the entry
-+ if (create_user) {
-+ increment_count_by_name(client_string,
-+ user_string,
-+ &global_client_stats, thd);
++ // Update by user name
++ if ((user_stats = (USER_STATS *) my_hash_search(&global_user_stats,
++ (uchar *) user_string,
++ strlen(user_string))))
++ {
++ // Found user.
++ update_global_user_stats_with_user(thd, user_stats, now);
++ }
++ else
++ {
++ // Create the entry
++ if (create_user)
++ {
++ increment_count_by_name(user_string, user_string,
++ &global_user_stats, thd);
++ }
+ }
-+ }
+
-+ if (opt_thread_statistics) {
-+ // Update by thread ID
-+ if ((thread_stats = (THREAD_STATS*)hash_search(&global_thread_stats,
-+ (uchar*) &(thd->thread_id),
-+ sizeof(my_thread_id)))) {
-+ // Found by thread ID
-+ update_global_thread_stats_with_thread(thd, thread_stats, now);
-+ } else {
++ // Update by client IP
++ if ((user_stats = (USER_STATS *) my_hash_search(&global_client_stats,
++ (uchar *) client_string,
++ strlen(client_string))))
++ {
++ // Found by client IP
++ update_global_user_stats_with_user(thd, user_stats, now);
++ }
++ else
++ {
+ // Create the entry
-+ if (create_user) {
-+ increment_count_by_id(thd->thread_id,
-+ &global_thread_stats, thd);
++ if (create_user)
++ {
++ increment_count_by_name(client_string,
++ user_string,
++ &global_client_stats, thd);
+ }
+ }
-+ }
+
-+ thd->last_global_update_time = now;
-+ thd->reset_diff_stats();
++ if (opt_thread_statistics)
++ {
++ // Update by thread ID
++ if ((thread_stats = (THREAD_STATS *) my_hash_search(&global_thread_stats,
++ (uchar *) &(thd->thread_id),
++ sizeof(my_thread_id))))
++ {
++ // Found by thread ID
++ update_global_thread_stats_with_thread(thd, thread_stats, now);
++ }
++ else
++ {
++ // Create the entry
++ if (create_user)
++ {
++ increment_count_by_id(thd->thread_id,
++ &global_thread_stats, thd);
++ }
++ }
++ }
+
-+ pthread_mutex_unlock(&LOCK_global_user_client_stats);
-+ } else {
-+ thd->reset_diff_stats();
++ thd->last_global_update_time = now;
++ thd->reset_diff_stats();
++
++ mysql_mutex_unlock(&LOCK_global_user_client_stats);
++ }
++ else
++ {
++ thd->reset_diff_stats();
+ }
+}
/*
check if user has already too many connections
-@@ -154,7 +729,10 @@
-
- end:
+@@ -169,6 +767,7 @@
if (error)
-+ {
+ {
uc->connections--; // no need for decrease_user_connections() here
+ statistic_increment(denied_connections, &LOCK_status);
-+ }
- (void) pthread_mutex_unlock(&LOCK_user_conn);
- DBUG_RETURN(error);
- }
-@@ -490,6 +1068,7 @@
- general_log_print(thd, COM_CONNECT, ER(ER_NOT_SUPPORTED_AUTH_MODE));
- DBUG_RETURN(1);
- }
-+ thd->diff_access_denied_errors++;
- my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
- thd->main_security_ctx.user,
- thd->main_security_ctx.host_or_ip,
-@@ -971,11 +1550,20 @@
+ /*
+ The thread may returned back to the pool and assigned to a user
+ that doesn't have a limit. Ensure the user is not using resources
+@@ -565,11 +1164,18 @@
my_sleep(1000); /* must wait after eof() */
#endif
statistic_increment(aborted_connects,&LOCK_status);
+
+ thd->reset_stats();
+ // Updates global user connection stats.
-+ if (increment_connection_count(thd, true)) {
-+ net_send_error(thd, ER_OUTOFMEMORY); // Out of memory
++ if (increment_connection_count(thd, true))
+ DBUG_RETURN(1);
-+ }
+
DBUG_RETURN(0);
}
-@@ -997,6 +1585,7 @@
+@@ -599,6 +1205,7 @@
if (thd->killed || (net->error && net->vio != 0))
{
statistic_increment(aborted_threads,&LOCK_status);
}
if (net->error && net->vio != 0)
-@@ -1123,10 +1712,14 @@
+@@ -728,10 +1335,14 @@
for (;;)
{
NET *net= &thd->net;
goto end_thread;
+ }
- prepare_new_connection_state(thd);
-
-@@ -1149,6 +1742,8 @@
+ MYSQL_CONNECTION_START(thd->thread_id, thd->security_ctx->priv_user,
+ (char *) thd->security_ctx->host_or_ip);
+@@ -758,6 +1369,8 @@
end_thread:
close_connection(thd, 0, 1);
+ thd->update_stats(false);
+ update_global_user_stats(thd, create_user, time(NULL));
- if (thread_scheduler.end_thread(thd,1))
- return 0; // Probably no-threads
+ if (MYSQL_CALLBACK_ELSE(thread_scheduler, end_thread, (thd, 1), 0))
+ return; // Probably no-threads
diff -ruN a/sql/sql_delete.cc b/sql/sql_delete.cc
---- a/sql/sql_delete.cc 2010-10-12 00:34:33.000000000 +0400
-+++ b/sql/sql_delete.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -452,6 +452,7 @@
- my_ok(thd, (ha_rows) thd->row_count_func);
+--- a/sql/sql_delete.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_delete.cc 2010-12-31 03:58:22.000000000 +0300
+@@ -411,6 +411,7 @@
+ my_ok(thd, deleted);
DBUG_PRINT("info",("%ld records deleted",(long) deleted));
}
-+ thd->updated_row_count += deleted;
++ thd->updated_row_count+= deleted;
DBUG_RETURN(error >= 0 || thd->is_error());
}
-@@ -1059,6 +1060,7 @@
- thd->row_count_func= deleted;
- ::my_ok(thd, (ha_rows) thd->row_count_func);
+@@ -1005,6 +1006,7 @@
+ {
+ ::my_ok(thd, deleted);
}
-+ thd->updated_row_count += deleted;
++ thd->updated_row_count+= deleted;
return 0;
}
diff -ruN a/sql/sql_insert.cc b/sql/sql_insert.cc
---- a/sql/sql_insert.cc 2010-10-12 00:34:16.000000000 +0400
-+++ b/sql/sql_insert.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -981,6 +981,7 @@
- thd->row_count_func= info.copied + info.deleted + updated;
- ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
+--- a/sql/sql_insert.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_insert.cc 2010-12-31 04:12:35.000000000 +0300
+@@ -1073,13 +1073,14 @@
+
+ if (error)
+ goto abort;
++ ha_rows row_count;
+ if (values_list.elements == 1 && (!(thd->variables.option_bits & OPTION_WARNINGS) ||
+ !thd->cuted_fields))
+ {
+- my_ok(thd, info.copied + info.deleted +
++ row_count= info.copied + info.deleted +
+ ((thd->client_capabilities & CLIENT_FOUND_ROWS) ?
+- info.touched : info.updated),
+- id);
++ info.touched : info.updated);
++ my_ok(thd, row_count, id);
}
-+ thd->updated_row_count += thd->row_count_func;
+ else
+ {
+@@ -1095,8 +1096,10 @@
+ sprintf(buff, ER(ER_INSERT_INFO), (ulong) info.records,
+ (ulong) (info.deleted + updated),
+ (ulong) thd->warning_info->statement_warn_count());
+- ::my_ok(thd, info.copied + info.deleted + updated, id, buff);
++ row_count= info.copied + info.deleted + updated;
++ ::my_ok(thd, row_count, id, buff);
+ }
++ thd->updated_row_count+= row_count;
thd->abort_on_warning= 0;
DBUG_RETURN(FALSE);
-@@ -3309,6 +3310,7 @@
+@@ -3585,6 +3588,7 @@
thd->first_successful_insert_id_in_prev_stmt :
(info.copied ? autoinc_value_of_last_inserted_row : 0));
- ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
-+ thd->updated_row_count += thd->row_count_func;
+ ::my_ok(thd, row_count, id, buff);
++ thd->updated_row_count+= row_count;
DBUG_RETURN(0);
}
diff -ruN a/sql/sql_lex.h b/sql/sql_lex.h
---- a/sql/sql_lex.h 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_lex.h 2010-11-24 17:31:33.000000000 +0300
-@@ -124,6 +124,9 @@
+--- a/sql/sql_lex.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_lex.h 2010-12-31 05:07:18.000000000 +0300
+@@ -196,6 +196,9 @@
When a command is added here, be sure it's also added in mysqld.cc
in "struct show_var_st status_vars[]= {" ...
*/
SQLCOM_END
};
diff -ruN a/sql/sql_parse.cc b/sql/sql_parse.cc
---- a/sql/sql_parse.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_parse.cc 2010-11-24 17:45:19.000000000 +0300
-@@ -47,6 +47,9 @@
+--- a/sql/sql_parse.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_parse.cc 2010-12-31 04:57:45.000000000 +0300
+@@ -116,6 +116,9 @@
static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables);
- static bool check_show_create_table_access(THD *thd, TABLE_LIST *table);
+ static void sql_kill(THD *thd, ulong id, bool only_kill_query);
+// Uses the THD to update the global stats by user name and client IP
+void update_global_user_stats(THD* thd, bool create_user, time_t now);
const char *any_db="*any*"; // Special symbol for check_access
const LEX_STRING command_name[]={
-@@ -825,6 +828,12 @@
+@@ -701,6 +704,12 @@
*/
thd->clear_error(); // Clear error message
- thd->main_da.reset_diagnostics_area();
-+ thd->updated_row_count=0;
-+ thd->busy_time=0;
-+ thd->cpu_time=0;
-+ thd->bytes_received=0;
-+ thd->bytes_sent=0;
-+ thd->binlog_bytes_written=0;
+ thd->stmt_da->reset_diagnostics_area();
++ thd->updated_row_count= 0;
++ thd->busy_time= 0;
++ thd->cpu_time= 0;
++ thd->bytes_received= 0;
++ thd->bytes_sent= 0;
++ thd->binlog_bytes_written= 0;
net_new_transaction(net);
-@@ -994,6 +1003,9 @@
- DBUG_PRINT("info",("packet: '%*.s'; command: %d", packet_length, packet, command));
-
+@@ -886,6 +895,10 @@
+ (char *) thd->security_ctx->host_or_ip);
+
thd->command=command;
-+ // To increment the corrent command counter for user stats, 'command' must
-+ // be saved because it is set to COM_SLEEP at the end of this function.
-+ thd->old_command = command;
++ /* To increment the corrent command counter for user stats, 'command' must
++ be saved because it is set to COM_SLEEP at the end of this function.
++ */
++ thd->old_command= command;
/*
Commands which always take a long time are logged into
the slow log only if opt_log_slow_admin_statements is set.
-@@ -1865,6 +1877,13 @@
+@@ -1619,6 +1632,13 @@
thd->profiling.discard_current_query();
#endif
break;
case SCH_OPEN_TABLES:
case SCH_VARIABLES:
case SCH_STATUS:
-@@ -2021,6 +2040,7 @@
+@@ -1776,6 +1796,7 @@
thd->security_ctx->priv_host)) &&
check_global_access(thd, SUPER_ACL))
{
my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), "SUPER");
DBUG_RETURN(TRUE);
}
-@@ -5348,6 +5368,7 @@
- if (!no_errors)
- {
- const char *db_name= db ? db : thd->db;
-+ thd->diff_access_denied_errors++;
- my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
- sctx->priv_user, sctx->priv_host, db_name);
- }
-@@ -5380,12 +5401,15 @@
- { // We can never grant this
+@@ -4705,6 +4726,7 @@
+ case ACL_INTERNAL_ACCESS_DENIED:
+ if (! no_errors)
+ {
++ thd->diff_access_denied_errors++;
+ my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
+ sctx->priv_user, sctx->priv_host, db);
+ }
+@@ -4755,6 +4777,7 @@
DBUG_PRINT("error",("No possible access"));
if (!no_errors)
-+ {
+ {
+ thd->diff_access_denied_errors++;
- my_error(ER_ACCESS_DENIED_ERROR, MYF(0),
- sctx->priv_user,
- sctx->priv_host,
- (thd->password ?
- ER(ER_YES) :
- ER(ER_NO))); /* purecov: tested */
-+ }
- DBUG_RETURN(TRUE); /* purecov: tested */
- }
-
-@@ -5411,11 +5435,15 @@
-
- DBUG_PRINT("error",("Access denied"));
- if (!no_errors)
-+ {
-+ // increment needs !no_errors condition, otherwise double counting.
-+ thd->diff_access_denied_errors++;
- my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
- sctx->priv_user, sctx->priv_host,
- (db ? db : (thd->db ?
- thd->db :
- "unknown"))); /* purecov: tested */
-+ }
- DBUG_RETURN(TRUE); /* purecov: tested */
- }
-
-@@ -5444,6 +5472,7 @@
+ if (thd->password == 2)
+ my_error(ER_ACCESS_DENIED_NO_PASSWORD_ERROR, MYF(0),
+ sctx->priv_user,
+@@ -4871,6 +4894,7 @@
if (!thd->col_access && check_grant_db(thd, dst_db_name))
{
my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
thd->security_ctx->priv_user,
thd->security_ctx->priv_host,
-@@ -5525,9 +5554,12 @@
- (want_access & ~(SELECT_ACL | EXTRA_ACL | FILE_ACL)))
- {
- if (!no_errors)
-+ {
-+ thd->diff_access_denied_errors++;
- my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
- sctx->priv_user, sctx->priv_host,
- INFORMATION_SCHEMA_NAME.str);
-+ }
- return TRUE;
- }
- /*
-@@ -5690,6 +5722,7 @@
+@@ -5141,6 +5165,7 @@
if ((thd->security_ctx->master_access & want_access))
return 0;
get_privilege_desc(command, sizeof(command), want_access);
my_error(ER_SPECIFIC_ACCESS_DENIED_ERROR, MYF(0), command);
return 1;
#else
-@@ -6071,6 +6104,30 @@
+@@ -5529,6 +5554,32 @@
lex_start(thd);
mysql_reset_thd_for_next_command(thd);
-+ int start_time_error = 0;
-+ int end_time_error = 0;
++ int start_time_error= 0;
++ int end_time_error= 0;
+ struct timeval start_time, end_time;
-+ double start_usecs = 0;
-+ double end_usecs = 0;
++ double start_usecs= 0;
++ double end_usecs= 0;
+ /* cpu time */
-+ int cputime_error = 0;
++ int cputime_error= 0;
+ struct timespec tp;
-+ double start_cpu_nsecs = 0;
-+ double end_cpu_nsecs = 0;
++ double start_cpu_nsecs= 0;
++ double end_cpu_nsecs= 0;
+
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+#ifdef HAVE_CLOCK_GETTIME
+ /* get start cputime */
+ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
+#endif
+
+ // Gets the start time, in order to measure how long this command takes.
-+ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ if (!(start_time_error = gettimeofday(&start_time, NULL)))
++ {
+ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
+ }
+ }
if (query_cache_send_result_to_client(thd, rawbuf, length) <= 0)
{
LEX *lex= thd->lex;
-@@ -6151,6 +6208,43 @@
- *found_semicolon= NULL;
+@@ -5597,6 +5648,52 @@
+ DBUG_ASSERT(thd->change_list.is_empty());
}
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+ // Gets the end time.
-+ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
-+ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ if (!(end_time_error= gettimeofday(&end_time, NULL)))
++ {
++ end_usecs= end_time.tv_sec * 1000000.0 + end_time.tv_usec;
+ }
+
+ // Calculates the difference between the end and start times.
-+ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++ {
++ thd->busy_time= (end_usecs - start_usecs) / 1000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->busy_time > 2629743) {
-+ thd->busy_time = 0;
++ if (thd->busy_time > 2629743)
++ {
++ thd->busy_time= 0;
+ }
-+ } else {
++ }
++ else
++ {
+ // end time went back in time, or gettimeofday() failed.
-+ thd->busy_time = 0;
++ thd->busy_time= 0;
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
+ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
+#endif
-+ if (start_cpu_nsecs && !cputime_error) {
++ if (start_cpu_nsecs && !cputime_error)
++ {
+ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->cpu_time > 2629743) {
++ if (thd->cpu_time > 2629743)
++ {
+ thd->cpu_time = 0;
+ }
-+ } else
++ }
++ else
+ thd->cpu_time = 0;
+ }
+ // Updates THD stats and the global user stats.
DBUG_VOID_RETURN;
}
-@@ -7016,6 +7110,13 @@
- if (flush_error_log())
- result=1;
- }
-+ if (((options & (REFRESH_SLOW_QUERY_LOG | REFRESH_LOG)) ==
-+ REFRESH_SLOW_QUERY_LOG))
-+ {
-+ /* We are only flushing slow query log */
-+ logger.flush_slow_log(thd);
-+ }
-+
- #ifdef HAVE_QUERY_CACHE
- if (options & REFRESH_QUERY_CACHE_FREE)
- {
-@@ -7116,6 +7217,40 @@
- #endif
- if (options & REFRESH_USER_RESOURCES)
- reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */
-+ if (options & REFRESH_TABLE_STATS)
-+ {
-+ pthread_mutex_lock(&LOCK_global_table_stats);
-+ free_global_table_stats();
-+ init_global_table_stats();
-+ pthread_mutex_unlock(&LOCK_global_table_stats);
-+ }
-+ if (options & REFRESH_INDEX_STATS)
-+ {
-+ pthread_mutex_lock(&LOCK_global_index_stats);
-+ free_global_index_stats();
-+ init_global_index_stats();
-+ pthread_mutex_unlock(&LOCK_global_index_stats);
-+ }
-+ if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS | REFRESH_THREAD_STATS))
-+ {
-+ pthread_mutex_lock(&LOCK_global_user_client_stats);
-+ if (options & REFRESH_USER_STATS)
-+ {
-+ free_global_user_stats();
-+ init_global_user_stats();
-+ }
-+ if (options & REFRESH_CLIENT_STATS)
-+ {
-+ free_global_client_stats();
-+ init_global_client_stats();
-+ }
-+ if (options & REFRESH_THREAD_STATS)
-+ {
-+ free_global_thread_stats();
-+ init_global_thread_stats();
-+ }
-+ pthread_mutex_unlock(&LOCK_global_user_client_stats);
-+ }
- *write_to_binlog= tmp_write_to_binlog;
- /*
- If the query was killed then this function must fail.
diff -ruN a/sql/sql_prepare.cc b/sql/sql_prepare.cc
---- a/sql/sql_prepare.cc 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_prepare.cc 2010-11-24 17:45:09.000000000 +0300
-@@ -96,6 +96,9 @@
- #include <mysql_com.h>
+--- a/sql/sql_prepare.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_prepare.cc 2010-12-31 04:25:04.000000000 +0300
+@@ -114,6 +114,9 @@
#endif
+ #include "lock.h" // MYSQL_OPEN_FORCE_SHARED_MDL
+// Uses the THD to update the global stats by user name and client IP
+void update_global_user_stats(THD* thd, bool create_user, time_t now);
/**
A result class used to send cursor rows using the binary protocol.
*/
-@@ -2103,8 +2106,32 @@
+@@ -2173,8 +2176,34 @@
/* First of all clear possible warnings from the previous command */
mysql_reset_thd_for_next_command(thd);
-+ int start_time_error = 0;
-+ int end_time_error = 0;
++ int start_time_error= 0;
++ int end_time_error= 0;
+ struct timeval start_time, end_time;
-+ double start_usecs = 0;
-+ double end_usecs = 0;
++ double start_usecs= 0;
++ double end_usecs= 0;
+ /* cpu time */
-+ int cputime_error = 0;
++ int cputime_error= 0;
+ struct timespec tp;
-+ double start_cpu_nsecs = 0;
-+ double end_cpu_nsecs = 0;
++ double start_cpu_nsecs= 0;
++ double end_cpu_nsecs= 0;
+
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+#ifdef HAVE_CLOCK_GETTIME
+ /* get start cputime */
-+ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++ if (!(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs= tp.tv_sec * 1000000000.0 + tp.tv_nsec;
+#endif
+
+ // Gets the start time, in order to measure how long this command takes.
-+ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
-+ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ if (!(start_time_error= gettimeofday(&start_time, NULL)))
++ {
++ start_usecs= start_time.tv_sec * 1000000.0 + start_time.tv_usec;
+ }
+ }
+
if (thd->stmt_map.insert(thd, stmt))
{
-@@ -2112,7 +2139,7 @@
+@@ -2182,7 +2211,7 @@
The error is set in the insert. The statement itself
will be also deleted there (this is how the hash works).
*/
+ goto end;
}
- /* Reset warnings from previous command */
-@@ -2139,6 +2166,44 @@
+ thd->protocol= &thd->protocol_binary;
+@@ -2196,6 +2225,53 @@
thd->protocol= save_protocol;
/* check_prepared_statemnt sends the metadata packet in case of success */
+end:
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+ // Gets the end time.
-+ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
-+ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ if (!(end_time_error= gettimeofday(&end_time, NULL)))
++ {
++ end_usecs= end_time.tv_sec * 1000000.0 + end_time.tv_usec;
+ }
+
+ // Calculates the difference between the end and start times.
-+ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++ {
++ thd->busy_time= (end_usecs - start_usecs) / 1000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->busy_time > 2629743) {
-+ thd->busy_time = 0;
++ if (thd->busy_time > 2629743)
++ {
++ thd->busy_time= 0;
+ }
-+ } else {
++ }
++ else
++ {
+ // end time went back in time, or gettimeofday() failed.
-+ thd->busy_time = 0;
++ thd->busy_time= 0;
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ /* get end cputime */
+ if (!cputime_error &&
-+ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++ !(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
+#endif
-+ if (start_cpu_nsecs && !cputime_error) {
-+ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ if (start_cpu_nsecs && !cputime_error)
++ {
++ thd->cpu_time= (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->cpu_time > 2629743) {
-+ thd->cpu_time = 0;
++ if (thd->cpu_time > 2629743)
++ {
++ thd->cpu_time= 0;
+ }
-+ } else
++ }
++ else
+ thd->cpu_time = 0;
+ }
+ // Updates THD stats and the global user stats.
DBUG_VOID_RETURN;
}
-@@ -2489,12 +2554,36 @@
+@@ -2553,12 +2629,38 @@
/* First of all clear possible warnings from the previous command */
mysql_reset_thd_for_next_command(thd);
-+ int start_time_error = 0;
-+ int end_time_error = 0;
++ int start_time_error= 0;
++ int end_time_error= 0;
+ struct timeval start_time, end_time;
-+ double start_usecs = 0;
-+ double end_usecs = 0;
++ double start_usecs= 0;
++ double end_usecs= 0;
+ /* cpu time */
-+ int cputime_error = 0;
++ int cputime_error= 0;
+ struct timespec tp;
-+ double start_cpu_nsecs = 0;
-+ double end_cpu_nsecs = 0;
++ double start_cpu_nsecs= 0;
++ double end_cpu_nsecs= 0;
+
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+#ifdef HAVE_CLOCK_GETTIME
+ /* get start cputime */
+ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
+#endif
+
+ // Gets the start time, in order to measure how long this command takes.
-+ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
++ if (!(start_time_error = gettimeofday(&start_time, NULL)))
++ {
+ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
+ }
+ }
+ goto end;
}
- #if defined(ENABLED_PROFILING) && defined(COMMUNITY_SERVER)
-@@ -2515,6 +2604,44 @@
+ #if defined(ENABLED_PROFILING)
+@@ -2576,6 +2678,53 @@
/* Close connection socket; for use with client testing (Bug#43560). */
DBUG_EXECUTE_IF("close_conn_after_stmt_execute", vio_close(thd->net.vio););
+end:
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+ // Gets the end time.
-+ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
-+ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
++ if (!(end_time_error= gettimeofday(&end_time, NULL)))
++ {
++ end_usecs= end_time.tv_sec * 1000000.0 + end_time.tv_usec;
+ }
+
+ // Calculates the difference between the end and start times.
-+ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++ {
++ thd->busy_time= (end_usecs - start_usecs) / 1000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->busy_time > 2629743) {
-+ thd->busy_time = 0;
++ if (thd->busy_time > 2629743)
++ {
++ thd->busy_time= 0;
+ }
-+ } else {
++ }
++ else
++ {
+ // end time went back in time, or gettimeofday() failed.
-+ thd->busy_time = 0;
++ thd->busy_time= 0;
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ /* get end cputime */
+ if (!cputime_error &&
-+ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++ !(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
+#endif
-+ if (start_cpu_nsecs && !cputime_error) {
-+ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ if (start_cpu_nsecs && !cputime_error)
++ {
++ thd->cpu_time= (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->cpu_time > 2629743) {
-+ thd->cpu_time = 0;
++ if (thd->cpu_time > 2629743)
++ {
++ thd->cpu_time= 0;
+ }
-+ } else
++ }
++ else
+ thd->cpu_time = 0;
+ }
+ // Updates THD stats and the global user stats.
+ update_global_user_stats(thd, true, time(NULL));
+
DBUG_VOID_RETURN;
-
}
-@@ -2588,20 +2715,45 @@
+
+@@ -2648,20 +2797,47 @@
/* First of all clear possible warnings from the previous command */
mysql_reset_thd_for_next_command(thd);
+
-+ int start_time_error = 0;
-+ int end_time_error = 0;
++ int start_time_error= 0;
++ int end_time_error= 0;
+ struct timeval start_time, end_time;
-+ double start_usecs = 0;
-+ double end_usecs = 0;
++ double start_usecs= 0;
++ double end_usecs= 0;
+ /* cpu time */
-+ int cputime_error = 0;
++ int cputime_error= 0;
+ struct timespec tp;
-+ double start_cpu_nsecs = 0;
-+ double end_cpu_nsecs = 0;
++ double start_cpu_nsecs= 0;
++ double end_cpu_nsecs= 0;
+
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+#ifdef HAVE_CLOCK_GETTIME
+ /* get start cputime */
-+ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++ if (!(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
+#endif
+
+ // Gets the start time, in order to measure how long this command takes.
-+ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
-+ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ if (!(start_time_error= gettimeofday(&start_time, NULL)))
++ {
++ start_usecs= start_time.tv_sec * 1000000.0 + start_time.tv_usec;
+ }
+ }
+
}
thd->stmt_arena= stmt;
-@@ -2625,6 +2777,44 @@
+@@ -2678,6 +2854,52 @@
thd->restore_backup_statement(stmt, &stmt_backup);
thd->stmt_arena= thd;
+end:
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+ // Gets the end time.
-+ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ if (!(end_time_error = gettimeofday(&end_time, NULL)))
++ {
+ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
+ }
+
+ // Calculates the difference between the end and start times.
-+ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++ {
++ thd->busy_time= (end_usecs - start_usecs) / 1000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->busy_time > 2629743) {
-+ thd->busy_time = 0;
++ if (thd->busy_time > 2629743)
++ {
++ thd->busy_time= 0;
+ }
-+ } else {
++ }
++ else
++ {
+ // end time went back in time, or gettimeofday() failed.
-+ thd->busy_time = 0;
++ thd->busy_time= 0;
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ /* get end cputime */
+ if (!cputime_error &&
-+ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++ !(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ end_cpu_nsecs= tp.tv_sec*1000000000.0+tp.tv_nsec;
+#endif
-+ if (start_cpu_nsecs && !cputime_error) {
-+ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
++ if (start_cpu_nsecs && !cputime_error)
++ {
++ thd->cpu_time= (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->cpu_time > 2629743) {
-+ thd->cpu_time = 0;
++ if (thd->cpu_time > 2629743)
++ {
++ thd->cpu_time= 0;
+ }
+ } else
-+ thd->cpu_time = 0;
++ thd->cpu_time= 0;
+ }
+ // Updates THD stats and the global user stats.
+ thd->update_stats(true);
DBUG_VOID_RETURN;
}
-@@ -2655,13 +2845,37 @@
+@@ -2708,13 +2930,39 @@
/* First of all clear possible warnings from the previous command */
mysql_reset_thd_for_next_command(thd);
-+ int start_time_error = 0;
-+ int end_time_error = 0;
++ int start_time_error= 0;
++ int end_time_error= 0;
+ struct timeval start_time, end_time;
-+ double start_usecs = 0;
-+ double end_usecs = 0;
++ double start_usecs= 0;
++ double end_usecs= 0;
+ /* cpu time */
-+ int cputime_error = 0;
++ int cputime_error= 0;
+ struct timespec tp;
-+ double start_cpu_nsecs = 0;
-+ double end_cpu_nsecs = 0;
++ double start_cpu_nsecs= 0;
++ double end_cpu_nsecs= 0;
+
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+#ifdef HAVE_CLOCK_GETTIME
+ /* get start cputime */
-+ if (!(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
-+ start_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
++ if (!(cputime_error= clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
++ start_cpu_nsecs= tp.tv_sec * 1000000000.0+tp.tv_nsec;
+#endif
+
+ // Gets the start time, in order to measure how long this command takes.
-+ if (!(start_time_error = gettimeofday(&start_time, NULL))) {
-+ start_usecs = start_time.tv_sec * 1000000.0 + start_time.tv_usec;
++ if (!(start_time_error= gettimeofday(&start_time, NULL)))
++ {
++ start_usecs= start_time.tv_sec * 1000000.0 + start_time.tv_usec;
+ }
+ }
+
}
stmt->close_cursor();
-@@ -2678,6 +2892,44 @@
+@@ -2731,6 +2979,53 @@
my_ok(thd);
+end:
-+ if (opt_userstat_running) {
++ if (opt_userstat_running)
++ {
+ // Gets the end time.
-+ if (!(end_time_error = gettimeofday(&end_time, NULL))) {
++ if (!(end_time_error = gettimeofday(&end_time, NULL)))
++ {
+ end_usecs = end_time.tv_sec * 1000000.0 + end_time.tv_usec;
+ }
+
+ // Calculates the difference between the end and start times.
-+ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error) {
-+ thd->busy_time = (end_usecs - start_usecs) / 1000000;
++ if (start_usecs && end_usecs >= start_usecs && !start_time_error && !end_time_error)
++ {
++ thd->busy_time= (end_usecs - start_usecs) / 1000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->busy_time > 2629743) {
-+ thd->busy_time = 0;
++ if (thd->busy_time > 2629743)
++ {
++ thd->busy_time= 0;
+ }
-+ } else {
++ }
++ else
++ {
+ // end time went back in time, or gettimeofday() failed.
-+ thd->busy_time = 0;
++ thd->busy_time= 0;
+ }
+
+#ifdef HAVE_CLOCK_GETTIME
+ !(cputime_error = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tp)))
+ end_cpu_nsecs = tp.tv_sec*1000000000.0+tp.tv_nsec;
+#endif
-+ if (start_cpu_nsecs && !cputime_error) {
++ if (start_cpu_nsecs && !cputime_error)
++ {
+ thd->cpu_time = (end_cpu_nsecs - start_cpu_nsecs) / 1000000000;
+ // In case there are bad values, 2629743 is the #seconds in a month.
-+ if (thd->cpu_time > 2629743) {
-+ thd->cpu_time = 0;
++ if (thd->cpu_time > 2629743)
++ {
++ thd->cpu_time= 0;
+ }
-+ } else
-+ thd->cpu_time = 0;
++ }
++ else
++ thd->cpu_time= 0;
+ }
+ // Updates THD stats and the global user stats.
+ thd->update_stats(true);
DBUG_VOID_RETURN;
}
+diff -ruN a/sql/sql_reload.cc b/sql/sql_reload.cc
+--- a/sql/sql_reload.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_reload.cc 2010-12-31 05:00:59.000000000 +0300
+@@ -272,14 +272,48 @@
+ mysql_mutex_unlock(&LOCK_active_mi);
+ }
+ #endif
+- if (options & REFRESH_USER_RESOURCES)
+- reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */
+ #ifdef HAVE_RESPONSE_TIME_DISTRIBUTION
+ if (options & REFRESH_QUERY_RESPONSE_TIME)
+ {
+ query_response_time_flush();
+ }
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
++ if (options & REFRESH_USER_RESOURCES)
++ reset_mqh((LEX_USER *) NULL, 0); /* purecov: inspected */
++ if (options & REFRESH_TABLE_STATS)
++ {
++ mysql_mutex_lock(&LOCK_global_table_stats);
++ free_global_table_stats();
++ init_global_table_stats();
++ mysql_mutex_unlock(&LOCK_global_table_stats);
++ }
++ if (options & REFRESH_INDEX_STATS)
++ {
++ mysql_mutex_lock(&LOCK_global_index_stats);
++ free_global_index_stats();
++ init_global_index_stats();
++ mysql_mutex_unlock(&LOCK_global_index_stats);
++ }
++ if (options & (REFRESH_USER_STATS | REFRESH_CLIENT_STATS | REFRESH_THREAD_STATS))
++ {
++ mysql_mutex_lock(&LOCK_global_user_client_stats);
++ if (options & REFRESH_USER_STATS)
++ {
++ free_global_user_stats();
++ init_global_user_stats();
++ }
++ if (options & REFRESH_CLIENT_STATS)
++ {
++ free_global_client_stats();
++ init_global_client_stats();
++ }
++ if (options & REFRESH_THREAD_STATS)
++ {
++ free_global_thread_stats();
++ init_global_thread_stats();
++ }
++ mysql_mutex_unlock(&LOCK_global_user_client_stats);
++ }
+ *write_to_binlog= tmp_write_to_binlog;
+ /*
+ If the query was killed then this function must fail.
diff -ruN a/sql/sql_show.cc b/sql/sql_show.cc
---- a/sql/sql_show.cc 2010-11-24 17:24:52.000000000 +0300
-+++ b/sql/sql_show.cc 2010-11-24 17:31:33.000000000 +0300
-@@ -84,6 +84,40 @@
+--- a/sql/sql_show.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_show.cc 2010-12-31 04:39:23.000000000 +0300
+@@ -114,6 +114,43 @@
static COND * make_cond_for_info_schema(COND *cond, TABLE_LIST *table);
+/*
-+ * Solaris 10 does not have strsep().
-+ *
++ * Solaris 10 does not have strsep().
++ *
+ * based on getToken from http://www.winehq.org/pipermail/wine-patches/2001-November/001322.html
+ *
-+ */
++*/
+
+#ifndef HAVE_STRSEP
+static char* strsep(char** str, const char* delims)
+{
+ char *token;
+
-+ if (*str == NULL) {
++ if (*str == NULL)
++ {
+ /* No more tokens */
+ return NULL;
+ }
+
-+ token = *str;
-+ while (**str != '\0') {
-+ if (strchr(delims, **str) != NULL) {
-+ **str = '\0';
++ token= *str;
++ while (**str != '\0')
++ {
++ if (strchr(delims, **str) != NULL)
++ {
++ **str= '\0';
+ (*str)++;
+ return token;
+ }
+ }
+
+ /* There is not another token */
-+ *str = NULL;
++ *str= NULL;
+
+ return token;
+}
/***************************************************************************
** List all table types supported
***************************************************************************/
-@@ -832,6 +866,7 @@
+@@ -799,6 +836,7 @@
sctx->master_access);
if (!(db_access & DB_ACLS) && check_grant_db(thd,dbname))
{
my_error(ER_DBACCESS_DENIED_ERROR, MYF(0),
sctx->priv_user, sctx->host_or_ip, dbname);
general_log_print(thd,COM_INIT_DB,ER(ER_DBACCESS_DENIED_ERROR),
-@@ -2386,6 +2421,279 @@
+@@ -2351,6 +2389,284 @@
DBUG_RETURN(res);
}
+ RETURN
+ 0 - OK
+ 1 - error
-+ */
++*/
+int send_user_stats(THD* thd, HASH *all_user_stats, TABLE *table)
+{
+ DBUG_ENTER("send_user_stats");
-+ for (uint i = 0; i < all_user_stats->records; ++i) {
++ for (uint i = 0; i < all_user_stats->records; ++i)
++ {
+ restore_record(table, s->default_values);
-+ USER_STATS *user_stats = (USER_STATS*)hash_element(all_user_stats, i);
++ USER_STATS *user_stats = (USER_STATS *) my_hash_element(all_user_stats, i);
+ table->field[0]->store(user_stats->user, strlen(user_stats->user), system_charset_info);
+ table->field[1]->store((longlong)user_stats->total_connections);
+ table->field[2]->store((longlong)user_stats->concurrent_connections);
+int send_thread_stats(THD* thd, HASH *all_thread_stats, TABLE *table)
+{
+ DBUG_ENTER("send_thread_stats");
-+ for (uint i = 0; i < all_thread_stats->records; ++i) {
++ for (uint i = 0; i < all_thread_stats->records; ++i)
++ {
+ restore_record(table, s->default_values);
-+ THREAD_STATS *user_stats = (THREAD_STATS*)hash_element(all_thread_stats, i);
++ THREAD_STATS *user_stats = (THREAD_STATS *) my_hash_element(all_thread_stats, i);
+ table->field[0]->store((longlong)user_stats->id);
+ table->field[1]->store((longlong)user_stats->total_connections);
+ table->field[2]->store((longlong)user_stats->concurrent_connections);
+ RETURN
+ 0 - OK
+ 1 - error
-+ */
++*/
+
+
+int fill_schema_user_stats(THD* thd, TABLE_LIST* tables, COND* cond)
+ // Iterates through all the global stats and sends them to the client.
+ // Pattern matching on the client IP is supported.
+
-+ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ mysql_mutex_lock(&LOCK_global_user_client_stats);
+ int result= send_user_stats(thd, &global_user_stats, table);
-+ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ mysql_mutex_unlock(&LOCK_global_user_client_stats);
+ if (result)
+ goto err;
+
+ RETURN
+ 0 - OK
+ 1 - error
-+ */
++*/
+
+
+int fill_schema_client_stats(THD* thd, TABLE_LIST* tables, COND* cond)
+ // Iterates through all the global stats and sends them to the client.
+ // Pattern matching on the client IP is supported.
+
-+ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ mysql_mutex_lock(&LOCK_global_user_client_stats);
+ int result= send_user_stats(thd, &global_client_stats, table);
-+ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ mysql_mutex_unlock(&LOCK_global_user_client_stats);
+ if (result)
+ goto err;
+
+ // Iterates through all the global stats and sends them to the client.
+ // Pattern matching on the client IP is supported.
+
-+ pthread_mutex_lock(&LOCK_global_user_client_stats);
++ mysql_mutex_lock(&LOCK_global_user_client_stats);
+ int result= send_thread_stats(thd, &global_thread_stats, table);
-+ pthread_mutex_unlock(&LOCK_global_user_client_stats);
++ mysql_mutex_unlock(&LOCK_global_user_client_stats);
+ if (result)
+ goto err;
+
+ DBUG_ENTER("fill_schema_table_stats");
+ char *table_full_name, *table_schema;
+
-+ pthread_mutex_lock(&LOCK_global_table_stats);
-+ for (uint i = 0; i < global_table_stats.records; ++i) {
++ mysql_mutex_lock(&LOCK_global_table_stats);
++ for (uint i = 0; i < global_table_stats.records; ++i)
++ {
+ restore_record(table, s->default_values);
-+ TABLE_STATS *table_stats =
-+ (TABLE_STATS*)hash_element(&global_table_stats, i);
++ TABLE_STATS *table_stats =
++ (TABLE_STATS *) my_hash_element(&global_table_stats, i);
+
+ table_full_name= thd->strdup(table_stats->table);
+ table_schema= strsep(&table_full_name, ".");
+
+ TABLE_LIST tmp_table;
-+ bzero((char*) &tmp_table,sizeof(tmp_table));
++ bzero((char *) &tmp_table,sizeof(tmp_table));
+ tmp_table.table_name= table_full_name;
+ tmp_table.db= table_schema;
+ tmp_table.grant.privilege= 0;
-+ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++ if (check_access(thd, SELECT_ACL, tmp_table.db,
+ &tmp_table.grant.privilege, 0, 0,
-+ is_schema_db(table_schema)) ||
++ is_infoschema_db(table_schema)) ||
+ check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
+ continue;
+
+
+ if (schema_table_store_record(thd, table))
+ {
-+ VOID(pthread_mutex_unlock(&LOCK_global_table_stats));
++ mysql_mutex_unlock(&LOCK_global_table_stats);
+ DBUG_RETURN(1);
+ }
+ }
-+ pthread_mutex_unlock(&LOCK_global_table_stats);
++ mysql_mutex_unlock(&LOCK_global_table_stats);
+ DBUG_RETURN(0);
+}
+
+ DBUG_ENTER("fill_schema_index_stats");
+ char *index_full_name, *table_schema, *table_name;
+
-+ pthread_mutex_lock(&LOCK_global_index_stats);
-+ for (uint i = 0; i < global_index_stats.records; ++i) {
++ mysql_mutex_lock(&LOCK_global_index_stats);
++ for (uint i = 0; i < global_index_stats.records; ++i)
++ {
+ restore_record(table, s->default_values);
+ INDEX_STATS *index_stats =
-+ (INDEX_STATS*)hash_element(&global_index_stats, i);
++ (INDEX_STATS *) my_hash_element(&global_index_stats, i);
+
+ index_full_name= thd->strdup(index_stats->index);
+ table_schema= strsep(&index_full_name, ".");
+ table_name= strsep(&index_full_name, ".");
+
+ TABLE_LIST tmp_table;
-+ bzero((char*) &tmp_table,sizeof(tmp_table));
++ bzero((char *) &tmp_table,sizeof(tmp_table));
+ tmp_table.table_name= table_name;
+ tmp_table.db= table_schema;
+ tmp_table.grant.privilege= 0;
-+ if (check_access(thd, SELECT_ACL | EXTRA_ACL, tmp_table.db,
++ if (check_access(thd, SELECT_ACL, tmp_table.db,
+ &tmp_table.grant.privilege, 0, 0,
-+ is_schema_db(table_schema)) ||
++ is_infoschema_db(table_schema)) ||
+ check_grant(thd, SELECT_ACL, &tmp_table, 1, UINT_MAX, 1))
+ continue;
+
+ table->field[3]->store((longlong)index_stats->rows_read, TRUE);
+
+ if (schema_table_store_record(thd, table))
-+ {
-+ VOID(pthread_mutex_unlock(&LOCK_global_index_stats));
++ {
++ mysql_mutex_unlock(&LOCK_global_index_stats);
+ DBUG_RETURN(1);
+ }
+ }
-+ pthread_mutex_unlock(&LOCK_global_index_stats);
++ mysql_mutex_unlock(&LOCK_global_index_stats);
+ DBUG_RETURN(0);
+}
++
/* collect status for all running threads */
-@@ -6688,6 +6996,104 @@
+@@ -7465,6 +7781,104 @@
};
ST_FIELD_INFO processlist_fields_info[]=
{
{"ID", 4, MYSQL_TYPE_LONGLONG, 0, 0, "Id", SKIP_OPEN_TABLE},
-@@ -6823,6 +7229,8 @@
+@@ -7654,6 +8068,8 @@
{
{"CHARACTER_SETS", charsets_fields_info, create_schema_table,
fill_schema_charsets, make_character_sets_old_format, 0, -1, -1, 0, 0},
-+ {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table,
++ {"CLIENT_STATISTICS", client_stats_fields_info, create_schema_table,
+ fill_schema_client_stats, make_old_format, 0, -1, -1, 0, 0},
{"COLLATIONS", collation_fields_info, create_schema_table,
fill_schema_collation, make_old_format, 0, -1, -1, 0, 0},
{"COLLATION_CHARACTER_SET_APPLICABILITY", coll_charset_app_fields_info,
-@@ -6832,6 +7240,8 @@
+@@ -7663,6 +8079,8 @@
OPTIMIZE_I_S_TABLE|OPEN_VIEW_FULL},
{"COLUMN_PRIVILEGES", column_privileges_fields_info, create_schema_table,
fill_schema_column_privileges, 0, 0, -1, -1, 0, 0},
{"ENGINES", engines_fields_info, create_schema_table,
fill_schema_engines, make_old_format, 0, -1, -1, 0, 0},
#ifdef HAVE_EVENT_SCHEDULER
-@@ -6888,11 +7298,17 @@
+@@ -7735,14 +8153,20 @@
get_all_tables, make_table_names_old_format, 0, 1, 2, 1, 0},
{"TABLE_PRIVILEGES", table_privileges_fields_info, create_schema_table,
fill_schema_table_privileges, 0, 0, -1, -1, 0, 0},
+ {"TABLE_STATISTICS", table_stats_fields_info, create_schema_table,
+ fill_schema_table_stats, make_old_format, 0, -1, -1, 0, 0},
+ {"TEMPORARY_TABLES", temporary_table_fields_info, create_schema_table,
+ fill_temporary_tables, make_temporary_tables_old_format, 0, 2, 3, 0,
+ OPEN_TABLE_ONLY|OPTIMIZE_I_S_TABLE},
+ {"THREAD_STATISTICS", thread_stats_fields_info, create_schema_table,
+ fill_schema_thread_stats, make_old_format, 0, -1, -1, 0, 0},
{"TRIGGERS", triggers_fields_info, create_schema_table,
get_all_tables, make_old_format, get_schema_triggers_record, 5, 6, 0,
- OPEN_TABLE_ONLY},
+ OPEN_TRIGGER_ONLY|OPTIMIZE_I_S_TABLE},
{"USER_PRIVILEGES", user_privileges_fields_info, create_schema_table,
fill_schema_user_privileges, 0, 0, -1, -1, 0, 0},
-+ {"USER_STATISTICS", user_stats_fields_info, create_schema_table,
++ {"USER_STATISTICS", user_stats_fields_info, create_schema_table,
+ fill_schema_user_stats, make_old_format, 0, -1, -1, 0, 0},
{"VARIABLES", variables_fields_info, create_schema_table, fill_variables,
make_old_format, 0, 0, -1, 1, 0},
{"VIEWS", view_fields_info, create_schema_table,
diff -ruN a/sql/sql_update.cc b/sql/sql_update.cc
---- a/sql/sql_update.cc 2010-10-12 00:34:16.000000000 +0400
-+++ b/sql/sql_update.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -890,6 +890,7 @@
- thd->row_count_func=
- (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
- my_ok(thd, (ulong) thd->row_count_func, id, buff);
-+ thd->updated_row_count += thd->row_count_func;
+--- a/sql/sql_update.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_update.cc 2010-12-31 04:08:17.000000000 +0300
+@@ -894,8 +894,10 @@
+ my_snprintf(buff, sizeof(buff), ER(ER_UPDATE_INFO), (ulong) found,
+ (ulong) updated,
+ (ulong) thd->warning_info->statement_warn_count());
+- my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated,
+- id, buff);
++ ha_rows row_count=
++ (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
++ my_ok(thd, row_count, id, buff);
++ thd->updated_row_count += row_count;
DBUG_PRINT("info",("%ld records updated", (long) updated));
}
thd->count_cuted_fields= CHECK_FIELD_IGNORE; /* calc cuted fields */
-@@ -2176,5 +2177,6 @@
- thd->row_count_func=
- (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
- ::my_ok(thd, (ulong) thd->row_count_func, id, buff);
-+ thd->updated_row_count += thd->row_count_func;
+@@ -2136,7 +2138,9 @@
+ thd->first_successful_insert_id_in_prev_stmt : 0;
+ my_snprintf(buff, sizeof(buff), ER(ER_UPDATE_INFO),
+ (ulong) found, (ulong) updated, (ulong) thd->cuted_fields);
+- ::my_ok(thd, (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated,
+- id, buff);
++ ha_rows row_count=
++ (thd->client_capabilities & CLIENT_FOUND_ROWS) ? found : updated;
++ ::my_ok(thd, row_count, id, buff);
++ thd->updated_row_count+= row_count;
DBUG_RETURN(FALSE);
}
diff -ruN a/sql/sql_yacc.yy b/sql/sql_yacc.yy
---- a/sql/sql_yacc.yy 2010-11-24 17:24:51.000000000 +0300
-+++ b/sql/sql_yacc.yy 2010-11-24 17:31:33.000000000 +0300
-@@ -757,6 +757,7 @@
- %token CHECK_SYM /* SQL-2003-R */
+--- a/sql/sql_yacc.yy 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sql_yacc.yy 2010-12-31 05:06:16.000000000 +0300
+@@ -864,6 +864,7 @@
%token CIPHER_SYM
+ %token CLASS_ORIGIN_SYM /* SQL-2003-N */
%token CLIENT_SYM
+%token CLIENT_STATS_SYM
%token CLOSE_SYM /* SQL-2003-R */
%token COALESCE /* SQL-2003-N */
%token CODE_SYM
-@@ -903,6 +904,7 @@
+@@ -1017,6 +1018,7 @@
%token IMPORT
%token INDEXES
%token INDEX_SYM
%token INFILE
%token INITIAL_SIZE_SYM
%token INNER_SYM /* SQL-2003-R */
-@@ -1144,6 +1146,7 @@
- %token SIGNED_SYM
- %token SIMPLE_SYM /* SQL-2003-N */
- %token SLAVE
-+%token SLOW_SYM
- %token SMALLINT /* SQL-2003-R */
- %token SNAPSHOT_SYM
- %token SOCKET_SYM
-@@ -1189,6 +1192,7 @@
+@@ -1315,6 +1317,7 @@
%token TABLESPACE
%token TABLE_REF_PRIORITY
%token TABLE_SYM /* SQL-2003-R */
+%token TABLE_STATS_SYM
%token TABLE_CHECKSUM_SYM
+ %token TABLE_NAME_SYM /* SQL-2003-N */
%token TEMPORARY /* SQL-2003-N */
- %token TEMPTABLE_SYM
-@@ -1197,6 +1201,7 @@
+@@ -1324,6 +1327,7 @@
%token TEXT_SYM
%token THAN_SYM
%token THEN_SYM /* SQL-2003-R */
%token TIMESTAMP /* SQL-2003-R */
%token TIMESTAMP_ADD
%token TIMESTAMP_DIFF
-@@ -1234,6 +1239,7 @@
+@@ -1361,6 +1365,7 @@
%token UPGRADE_SYM
%token USAGE /* SQL-2003-N */
%token USER /* SQL-2003-R */
%token USE_FRM
%token USE_SYM
%token USING /* SQL-2003-R */
-@@ -10346,6 +10352,41 @@
- {
- Lex->sql_command = SQLCOM_SHOW_SLAVE_STAT;
- }
-+ | CLIENT_STATS_SYM wild_and_where
+@@ -11109,6 +11114,41 @@
+ MYSQL_YYABORT;
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ }
++ | CLIENT_STATS_SYM wild_and_where
+ {
+ LEX *lex= Lex;
-+ Lex->sql_command = SQLCOM_SELECT;
++ Lex->sql_command= SQLCOM_SELECT;
+ if (prepare_schema_table(YYTHD, lex, 0, SCH_CLIENT_STATS))
+ MYSQL_YYABORT;
+ }
-+ | USER_STATS_SYM wild_and_where
++ | USER_STATS_SYM wild_and_where
+ {
+ LEX *lex= Lex;
-+ lex->sql_command = SQLCOM_SELECT;
++ lex->sql_command= SQLCOM_SELECT;
+ if (prepare_schema_table(YYTHD, lex, 0, SCH_USER_STATS))
+ MYSQL_YYABORT;
+ }
+ | THREAD_STATS_SYM wild_and_where
+ {
+ LEX *lex= Lex;
-+ Lex->sql_command = SQLCOM_SELECT;
++ Lex->sql_command= SQLCOM_SELECT;
+ if (prepare_schema_table(YYTHD, lex, 0, SCH_THREAD_STATS))
+ MYSQL_YYABORT;
+ }
+ if (prepare_schema_table(YYTHD, lex, 0, SCH_INDEX_STATS))
+ MYSQL_YYABORT;
+ }
- | CREATE PROCEDURE sp_name
+ | CREATE PROCEDURE_SYM sp_name
{
LEX *lex= Lex;
-@@ -10554,6 +10595,18 @@
- { Lex->type|= REFRESH_STATUS; }
- | SLAVE
- { Lex->type|= REFRESH_SLAVE; }
-+ | SLOW_SYM QUERY_SYM LOGS_SYM
-+ { Lex->type |= REFRESH_SLOW_QUERY_LOG; }
+@@ -11351,6 +11391,16 @@
+ Lex->type|= REFRESH_QUERY_RESPONSE_TIME;
+ #endif // HAVE_RESPONSE_TIME_DISTRIBUTION
+ }
+ | CLIENT_STATS_SYM
+ { Lex->type|= REFRESH_CLIENT_STATS; }
+ | USER_STATS_SYM
| MASTER_SYM
{ Lex->type|= REFRESH_MASTER; }
| DES_KEY_FILE
-@@ -11677,6 +11730,7 @@
+@@ -12473,6 +12523,7 @@
| CHAIN_SYM {}
| CHANGED {}
| CIPHER_SYM {}
+ | CLIENT_STATS_SYM {}
| CLIENT_SYM {}
+ | CLASS_ORIGIN_SYM {}
| COALESCE {}
- | CODE_SYM {}
-@@ -11738,6 +11792,7 @@
+@@ -12541,6 +12592,7 @@
| HOSTS_SYM {}
| HOUR_SYM {}
| IDENTIFIED_SYM {}
+ | INDEX_STATS_SYM {}
+ | IGNORE_SERVER_IDS_SYM {}
| INVOKER_SYM {}
| IMPORT {}
- | INDEXES {}
-@@ -11862,6 +11917,7 @@
- | SIMPLE_SYM {}
- | SHARE_SYM {}
- | SHUTDOWN {}
-+ | SLOW_SYM {}
- | SNAPSHOT_SYM {}
- | SOUNDS_SYM {}
- | SOURCE_SYM {}
-@@ -11881,6 +11937,7 @@
+@@ -12692,6 +12744,7 @@
| SUSPEND_SYM {}
| SWAPS_SYM {}
| SWITCHES_SYM {}
+ | TABLE_STATS_SYM {}
+ | TABLE_NAME_SYM {}
| TABLES {}
| TABLE_CHECKSUM_SYM {}
- | TABLESPACE {}
-@@ -11888,6 +11945,7 @@
- | TEMPTABLE_SYM {}
- | TEXT_SYM {}
- | THAN_SYM {}
-+ | THREAD_STATS_SYM {}
- | TRANSACTION_SYM {}
- | TRIGGERS_SYM {}
- | TIMESTAMP {}
-@@ -11905,6 +11963,7 @@
+@@ -12717,6 +12770,7 @@
| UNKNOWN_SYM {}
| UNTIL_SYM {}
| USER {}
| VARIABLES {}
| VIEW_SYM {}
diff -ruN a/sql/structs.h b/sql/structs.h
---- a/sql/structs.h 2010-10-12 00:34:34.000000000 +0400
-+++ b/sql/structs.h 2010-11-24 17:24:52.000000000 +0300
-@@ -237,6 +237,171 @@
+--- a/sql/structs.h 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/structs.h 2010-12-31 05:12:04.000000000 +0300
+@@ -25,6 +25,7 @@
+ #include "my_time.h" /* enum_mysql_timestamp_type */
+ #include "thr_lock.h" /* thr_lock_type */
+ #include "my_base.h" /* ha_rows, ha_key_alg */
++#include "mysql_com.h"
+
+ struct TABLE;
+ class Field;
+@@ -218,6 +219,171 @@
USER_RESOURCES user_resources;
} USER_CONN;
+ ulonglong empty_queries;
+} USER_STATS;
+
-+/* Lookup function for hash tables with USER_STATS entries */
++/* Lookup function for my_hash tables with USER_STATS entries */
+extern "C" uchar *get_key_user_stats(USER_STATS *user_stats, size_t *length,
+ my_bool not_used __attribute__((unused)));
+
-+/* Free all memory for a hash table with USER_STATS entries */
++/* Free all memory for a my_hash table with USER_STATS entries */
+extern void free_user_stats(USER_STATS* user_stats);
+
+/* Intialize an instance of USER_STATS */
+ ulonglong empty_queries;
+} THREAD_STATS;
+
-+/* Lookup function for hash tables with THREAD_STATS entries */
++/* Lookup function for my_hash tables with THREAD_STATS entries */
+extern "C" uchar *get_key_thread_stats(THREAD_STATS *thread_stats, size_t *length,
+ my_bool not_used __attribute__((unused)));
+
-+/* Free all memory for a hash table with THREAD_STATS entries */
++/* Free all memory for a my_hash table with THREAD_STATS entries */
+extern void free_thread_stats(THREAD_STATS* thread_stats);
+
+/* Intialize an instance of THREAD_STATS */
/* Bits in form->update */
#define REG_MAKE_DUPP 1 /* Make a copy of record when read */
#define REG_NEW_RECORD 2 /* Write a new record if not found */
-diff -ruN a/sql/table.h b/sql/table.h
---- a/sql/table.h 2010-10-12 00:34:16.000000000 +0400
-+++ b/sql/table.h 2010-11-24 17:31:33.000000000 +0300
-@@ -944,10 +944,12 @@
- enum enum_schema_tables
- {
- SCH_CHARSETS= 0,
-+ SCH_CLIENT_STATS,
- SCH_COLLATIONS,
- SCH_COLLATION_CHARACTER_SET_APPLICABILITY,
- SCH_COLUMNS,
- SCH_COLUMN_PRIVILEGES,
-+ SCH_INDEX_STATS,
- SCH_ENGINES,
- SCH_EVENTS,
- SCH_FILES,
-@@ -971,8 +973,11 @@
- SCH_TABLE_CONSTRAINTS,
- SCH_TABLE_NAMES,
- SCH_TABLE_PRIVILEGES,
-+ SCH_TABLE_STATS,
-+ SCH_THREAD_STATS,
- SCH_TRIGGERS,
- SCH_USER_PRIVILEGES,
-+ SCH_USER_STATS,
- SCH_VARIABLES,
- SCH_VIEWS
- };
-diff -ruN a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
---- a/storage/innobase/handler/ha_innodb.cc 2010-10-12 00:34:15.000000000 +0400
-+++ b/storage/innobase/handler/ha_innodb.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -4055,6 +4055,8 @@
-
- error = row_insert_for_mysql((byte*) record, prebuilt);
-
-+ if (error == DB_SUCCESS) rows_changed++;
-+
- /* Handle duplicate key errors */
- if (auto_inc_used) {
- ulint err;
-@@ -4392,6 +4394,8 @@
- }
- }
-
-+ if (error == DB_SUCCESS) rows_changed++;
-+
- innodb_srv_conc_exit_innodb(trx);
-
- error = convert_error_code_to_mysql(error, user_thd);
-@@ -4444,6 +4448,8 @@
-
- error = row_update_for_mysql((byte*) record, prebuilt);
-
-+ if (error == DB_SUCCESS) rows_changed++;
-+
- innodb_srv_conc_exit_innodb(trx);
-
- error = convert_error_code_to_mysql(error, user_thd);
-@@ -4923,6 +4929,9 @@
- if (ret == DB_SUCCESS) {
- error = 0;
- table->status = 0;
-+ rows_read++;
-+ if (active_index >= 0 && active_index < MAX_KEY)
-+ index_rows_read[active_index]++;
-
- } else if (ret == DB_RECORD_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
+diff -ruN a/sql/sys_vars.cc b/sql/sys_vars.cc
+--- a/sql/sys_vars.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/sql/sys_vars.cc 2010-12-30 02:22:25.000000000 +0300
+@@ -1547,6 +1547,17 @@
+ NO_MUTEX_GUARD, NOT_IN_BINLOG,
+ ON_CHECK(check_read_only), ON_UPDATE(fix_read_only));
+
++static Sys_var_mybool Sys_userstat_running(
++ "userstat_running",
++ "Control USER_STATISTICS, CLIENT_STATISTICS, THREAD_STATISTICS, "
++ "INDEX_STATISTICS and TABLE_STATISTICS running",
++ GLOBAL_VAR(opt_userstat_running), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
++
++static Sys_var_mybool Sys_thread_statistics(
++ "thread_statistics",
++ "Control TABLE_STATISTICS running, when userstat_running is enabled",
++ GLOBAL_VAR(opt_thread_statistics), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
++
+ // Small lower limit to be able to test MRR
+ static Sys_var_ulong Sys_read_rnd_buff_size(
+ "read_rnd_buffer_size",
diff -ruN a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
---- a/storage/myisam/ha_myisam.cc 2010-10-12 00:34:25.000000000 +0400
-+++ b/storage/myisam/ha_myisam.cc 2010-11-24 17:24:52.000000000 +0300
-@@ -761,6 +761,7 @@
+--- a/storage/myisam/ha_myisam.cc 2010-12-03 20:58:26.000000000 +0300
++++ b/storage/myisam/ha_myisam.cc 2010-12-31 05:58:01.000000000 +0300
+@@ -769,6 +769,7 @@
int ha_myisam::write_row(uchar *buf)
{
ha_statistic_increment(&SSV::ha_write_count);
/* If we have a timestamp column, update it to the current time */
-@@ -773,11 +774,12 @@
+@@ -781,11 +782,13 @@
*/
if (table->next_number_field && buf == table->record[0])
{
}
- return mi_write(file,buf);
+ error=mi_write(file,buf);
-+ if (!error) rows_changed++;
++ if (!error)
++ rows_changed++;
+ return error;
}
int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt)
-@@ -1638,16 +1640,22 @@
+@@ -1536,16 +1539,24 @@
int ha_myisam::update_row(const uchar *old_data, uchar *new_data)
{
table->timestamp_field->set_time();
- return mi_update(file,old_data,new_data);
+ error=mi_update(file,old_data,new_data);
-+ if (!error) rows_changed++;
++ if (!error)
++ rows_changed++;
+ return error;
}
ha_statistic_increment(&SSV::ha_delete_count);
- return mi_delete(file,buf);
+ error=mi_delete(file,buf);
-+ if (!error) rows_changed++;
++ if (!error)
++ rows_changed++;
+ return error;
}
int ha_myisam::index_read_map(uchar *buf, const uchar *key,
-@@ -1658,6 +1666,13 @@
+@@ -1557,6 +1568,14 @@
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, active_index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) {
++ if (!error)
++ {
+ rows_read++;
+
+ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1668,6 +1683,13 @@
+@@ -1569,6 +1588,14 @@
ha_statistic_increment(&SSV::ha_read_key_count);
int error=mi_rkey(file, buf, index, key, keypart_map, find_flag);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) {
++ if (!error)
++ {
+ rows_read++;
+
+ int inx = index;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1680,6 +1702,13 @@
+@@ -1583,6 +1610,14 @@
int error=mi_rkey(file, buf, active_index, key, keypart_map,
HA_READ_PREFIX_LAST);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) {
++ if (!error)
++ {
+ rows_read++;
+
+ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
DBUG_RETURN(error);
}
-
-@@ -1689,6 +1718,13 @@
+@@ -1594,6 +1629,13 @@
ha_statistic_increment(&SSV::ha_read_next_count);
int error=mi_rnext(file,buf,active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1698,6 +1734,13 @@
+@@ -1605,6 +1647,13 @@
ha_statistic_increment(&SSV::ha_read_prev_count);
int error=mi_rprev(file,buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1707,6 +1750,13 @@
+@@ -1616,6 +1665,14 @@
ha_statistic_increment(&SSV::ha_read_first_count);
int error=mi_rfirst(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) {
++ if (!error)
++ {
+ rows_read++;
+
+ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1716,6 +1766,13 @@
+@@ -1627,6 +1684,14 @@
ha_statistic_increment(&SSV::ha_read_last_count);
int error=mi_rlast(file, buf, active_index);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) {
++ if (!error)
++ {
+ rows_read++;
+
+ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1731,6 +1788,13 @@
+@@ -1644,6 +1709,14 @@
error= mi_rnext_same(file,buf);
} while (error == HA_ERR_RECORD_DELETED);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) {
++ if (!error)
++ {
+ rows_read++;
+
+ int inx = (active_index == MAX_KEY) ? file->lastinx : active_index;
+ if (inx >= 0 && inx < MAX_KEY)
+ index_rows_read[inx]++;
+ }
+ MYSQL_INDEX_READ_ROW_DONE(error);
return error;
}
-
-@@ -1747,6 +1811,7 @@
+@@ -1663,6 +1736,8 @@
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
int error=mi_scan(file, buf);
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) rows_read++;
++ if (!error)
++ rows_read++;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
-
-@@ -1760,6 +1825,7 @@
+@@ -1679,6 +1754,8 @@
ha_statistic_increment(&SSV::ha_read_rnd_count);
int error=mi_rrnd(file, buf, my_get_ptr(pos,ref_length));
table->status=error ? STATUS_NOT_FOUND: 0;
-+ if (!error) rows_read++;
++ if (!error)
++ rows_read++;
+ MYSQL_READ_ROW_DONE(error);
return error;
}
-